├── .gitignore
├── IMPROVING_LOG.md
├── LICENSE
├── README.md
├── configs
    ├── blendedmvs
    │   ├── Character.py
    │   ├── Fountain.py
    │   ├── Jade.py
    │   └── Statues.py
    ├── co3d
    │   └── donut_369_40208_78816.py
    ├── custom
    │   ├── Madoka.py
    │   ├── Otobai.py
    │   ├── default_forward_facing.py
    │   └── default_ubd_inward_facing.py
    ├── deepvoxels
    │   ├── armchair.py
    │   ├── cube.py
    │   ├── greek.py
    │   └── vase.py
    ├── default.py
    ├── lf
    │   ├── africa.py
    │   ├── basket.py
    │   ├── lf_default.py
    │   ├── ship.py
    │   ├── statue.py
    │   └── torch.py
    ├── llff
    │   ├── fern.py
    │   ├── fern_lg.py
    │   ├── flower.py
    │   ├── flower_lg.py
    │   ├── fortress.py
    │   ├── fortress_lg.py
    │   ├── horns.py
    │   ├── horns_lg.py
    │   ├── leaves.py
    │   ├── leaves_lg.py
    │   ├── llff_default.py
    │   ├── llff_default_lg.py
    │   ├── orchids.py
    │   ├── orchids_lg.py
    │   ├── room.py
    │   ├── room_lg.py
    │   ├── trex.py
    │   └── trex_lg.py
    ├── nerf
    │   ├── chair.py
    │   ├── drums.py
    │   ├── ficus.py
    │   ├── hotdog.py
    │   ├── lego.py
    │   ├── materials.py
    │   ├── mic.py
    │   ├── ship.py
    │   └── ship.tensorf.py
    ├── nerf_unbounded
    │   ├── bicycle.py
    │   ├── bonsai.py
    │   ├── counter.py
    │   ├── garden.py
    │   ├── kitchen.py
    │   ├── nerf_unbounded_default.py
    │   ├── room.py
    │   └── stump.py
    ├── nsvf
    │   ├── Bike.py
    │   ├── Lifestyle.py
    │   ├── Palace.py
    │   ├── Robot.py
    │   ├── Spaceship.py
    │   ├── Steamtrain.py
    │   ├── Toad.py
    │   └── Wineholder.py
    ├── tankstemple
    │   ├── Barn.py
    │   ├── Barn_lg.py
    │   ├── Caterpillar.py
    │   ├── Caterpillar_lg.py
    │   ├── Family.py
    │   ├── Family_lg.py
    │   ├── Ignatius.py
    │   ├── Ignatius_lg.py
    │   ├── Truck.py
    │   └── Truck_lg.py
    └── tankstemple_unbounded
    │   ├── M60.py
    │   ├── Playground.py
    │   ├── Train.py
    │   ├── Truck.py
    │   └── tt_default.py
├── figs
    ├── debug_cam_and_bbox.png
    └── debug_coarse_volume.png
├── lib
    ├── cuda
    │   ├── adam_upd.cpp
    │   ├── adam_upd_kernel.cu
    │   ├── render_utils.cpp
    │   ├── render_utils_kernel.cu
    │   ├── total_variation.cpp
    │   ├── total_variation_kernel.cu
    │   ├── ub360_utils.cpp
    │   └── ub360_utils_kernel.cu
    ├── dbvgo.py
    ├── dcvgo.py
    ├── dmpigo.py
    ├── dvgo.py
    ├── grid.py
    ├── load_blendedmvs.py
    ├── load_blender.py
    ├── load_co3d.py
    ├── load_data.py
    ├── load_deepvoxels.py
    ├── load_llff.py
    ├── load_nerfpp.py
    ├── load_nsvf.py
    ├── load_tankstemple.py
    ├── masked_adam.py
    └── utils.py
├── requirements.txt
├── run.py
└── tools
    ├── colmap_utils
        ├── colmap_read_model.py
        ├── colmap_wrapper.py
        └── pose_utils.py
    ├── imgs2poses.py
    ├── vis_train.py
    └── vis_volume.py


/.gitignore:
--------------------------------------------------------------------------------
1 | **/.ipynb_checkpoints
2 | **/__pycache__
3 | *.npy
4 | *.npz
5 | *.dae
6 | data
7 | logs
8 | 


--------------------------------------------------------------------------------
/IMPROVING_LOG.md:
--------------------------------------------------------------------------------
  1 | # Improving log
  2 | 
  3 | ### Custom CUDA implementation for efficiency
  4 | Some intermediate steps are reimplemented in cuda (`lib/cuda/`), which improves training speed by 
  5 | **1.8\~3.5**. Below show the results dense grid under `256^3` voxels and `160^3` voxels. *Telsa V100*, *RTX 2080 Ti*, and *RTX 1080 Ti* are tested. The PSNRs of different versions on different machines have about 0.2 PSNR drift. The training speeds of the final version are improved 2--3 times from the original native pytorch implementation.
  6 | 
  7 | ---
  8 | 
  9 | | **num_voxels=256^3**    | lego  |       | mic   |       | ship  |       |
 10 | |--------------|-------|-------|-------|-------|-------|-------|
 11 | | **GPU=V100** | psnr  | `mm:ss` | psnr  | `mm:ss` | psnr  | `mm:ss` |
 12 | | native pytorch<br>[b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 35.51 | `15:10`        | 34.39 | `14:11`        | 30.05 | `17:04` |
 13 | | cuda re-impl. Adam optimizer<br>[d3783f4](https://github.com/sunset1995/DirectVoxGO/tree/d3783f4) | 35.47 | `08:54` (1.7x) | 34.34 | `06:41` (2.1x) | 30.05 | `10:23` (1.6x) |
 14 | | cuda re-impl.  rendering<br>[3de7a6d](https://github.com/sunset1995/DirectVoxGO/tree/3de7a6d) | 35.63 | `06:31` (2.3x) | 34.48 | `04:31` (3.1x) | 30.30 | `08:20` (2.0x) |
 15 | | prevent atomic add in alpha2weight<br>[4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) |  35.61 | `05:35` (2.7x) | 34.51 | `04:00` (3.5x) | 30.29 | `07:20` (2.3x) |
 16 | | |
 17 | | **GPU=2080Ti** |
 18 | | native pytorch [b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | - | OOM | 34.44 | `18:01` | - | OOM |
 19 | | cuda re-impl. [4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 35.61 | `07:19` | 34.49 | `04:30` (4.0x) | 30.29 | `09:53` |
 20 | | |
 21 | | **GPU=1080Ti** |
 22 | | native pytorch [b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 35.76 | `37:22` | 34.47 | `31:18` | 30.09 | `45:28` |
 23 | | cuda re-impl. [4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 35.62 | `14:32` (2.6x) | 34.50 | `08:55` (3.5x) | 30.29 | `21:00` (2.2x) |
 24 | 
 25 | ```python
 26 | # The model&training config for the results above
 27 | coarse_train = dict(N_iters=5000)
 28 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000])
 29 | fine_model_and_render = dict(num_voxels=256**3)
 30 | ```
 31 | 
 32 | ---
 33 | 
 34 | | **num_voxels=160^3**    | lego  |       | mic   |       | ship  |       |
 35 | |--------------|-------|-------|-------|-------|-------|-------|
 36 | | **GPU=V100** | psnr  | `mm:ss` | psnr  | `mm:ss` | psnr  | `mm:ss` |
 37 | | native pytorch<br>[b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 34.65 | `08:29`        | 33.19 | `07:04`        | 29.08 | `10:38`        |
 38 | | cuda re-impl.  Adam optimizer<br>[d3783f4](https://github.com/sunset1995/DirectVoxGO/tree/d3783f4) | 34.66 | `06:01` (1.4x) | 33.14 | `04:38` (1.5x) | 29.04 | `08:06` (1.3x) |
 39 | | cuda re-impl.  rendering<br>[3de7a6d](https://github.com/sunset1995/DirectVoxGO/tree/3de7a6d) | 34.56 | `04:50` (1.8x) | 33.10 | `03:22` (2.1x) | 29.19 | `06:31` (1.6x) |
 40 | | prevent atomic add in alpha2weight<br>[4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 34.58 | `03:58` (2.1x) | 33.12 | `03:00` (2.4x) | 29.17 | `05:46` (1.8x) |
 41 | | |
 42 | | **GPU=2080Ti** |
 43 | | native pytorch [b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 34.68 | `11:27` | 33.18 | `09:19` | 29.13 | `14:35` |
 44 | | cuda re-impl. [4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 34.59 | `04:59` (2.3x) | 33.15 | `03:04` (3.0x) | 29.19 | `07:32` (1.9x) |
 45 | | |
 46 | | **GPU=1080Ti** |
 47 | | native pytorch [b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 34.66 | `22:01` | 33.19 | `17:14` | 29.10 | `29:57` |
 48 | | cuda re-impl. [4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 34.56 | `10:29` (2.1x) | 33.11 | `06:21` (2.7x) | 29.18 | `16:48` (x1.8) |
 49 | 
 50 | ```python
 51 | # The model&training config for the results above
 52 | coarse_train = dict(N_iters=5000)
 53 | fine_train = dict(pg_scale=[1000,2000,3000,4000])
 54 | fine_model_and_render = dict(num_voxels=160**3)
 55 | ```
 56 | 
 57 | ---
 58 | 
 59 | ### Extend for forward-facing scene
 60 | The model for forward-facing scene is implemented in `lib/dmpigo.py`. Some main modifications include:
 61 | - Use NeRF's NDC warping
 62 | - Use Multiplane Image
 63 | - The initial probability stopping at each plane is `1/(# of planes)`
 64 | - Skip coarse stage training as it don't help in forward-facing scene
 65 | - Adopt total variation loss or the quality would degrade
 66 | 
 67 | All config files are in `configs/llff/`. The based config for small model is:
 68 | ```python
 69 | # See configs/llff/llff_default.py
 70 | data = dict(
 71 |     dataset_type='llff',    # use llff dataloader
 72 |     ndc=True,               # use ndc coordinate (only for forward-facing; not support yet)
 73 |     width=1008,             # enforce image width
 74 |     height=756,             # enforce image height
 75 | )
 76 | 
 77 | coarse_train = dict(
 78 |     N_iters=0,              # we don't need the coarse stage training
 79 | )
 80 | 
 81 | fine_train = dict(
 82 |     N_iters=30000,
 83 |     N_rand=4096,            # it seem that larger batch don't help
 84 |     pg_scale=[2000,4000,6000,8000],
 85 |     ray_sampler='flatten',
 86 |     tv_before=1e9,          # enable total variation loss
 87 |     tv_dense_before=10000,  # dense version of total variation loss for the first 10k iterations
 88 |     weight_tv_density=1e-5,
 89 |     weight_tv_k0=1e-6,
 90 | )
 91 | 
 92 | fine_model_and_render = dict(
 93 |     num_voxels=256**3,
 94 |     mpi_depth=128,          # the number of planes in Multiplane Image (work when ndc=True)
 95 |     rgbnet_dim=9,           # it seem that more rgbnet_dim don't help
 96 |     rgbnet_width=64,        # it seem that larger rgbnet_width don't help
 97 |     world_bound_scale=1,    # we don't have to slightly enlarge the ndc
 98 |     fast_color_thres=1e-3,  # the initial probability stopping at each plane is 1/mpi_depth
 99 |                             # so the original 1e-4 would be too passive here
100 | )
101 | ```
102 | See `configs/llff/llff_default_lg.py` for the modification for large model. Basically, we double the number of `mpi_depth` and use a larger MLP.
103 | 
104 | 
105 | **Results**:
106 | - Our training times are measured on single Telsa V100 GPU.
107 | - Training time (`mm:ss`)
108 |     | Method | Avg. | Room | Fern | Leaves | Fortress | Orchids | Flower | T-Rex | Horns |
109 |     |--|--|--|--|--|--|--|--|--|--|
110 |     | NeRF | 30+ hr |
111 |     | Ours small | 05:30 | 05:55 | 06:12 | 04:36 | 05:38 | 05:26 | 05:28 | 05:07 | 05:23 |
112 |     | Ours large | 16:27 | 17:38 | 18:21 | 14:11 | 16:03 | 17:14 | 16:27 | 15:46 | 16:00 |
113 | - PSNR
114 |     | Method | Avg. | Room | Fern | Leaves | Fortress | Orchids | Flower | T-Rex | Horns |
115 |     |--|--|--|--|--|--|--|--|--|--|
116 |     | NeRF | **26.50** | **32.70** | **25.17** | 20.92 | **31.16** | **20.36** | 27.40 | **26.80** | 27.45 |
117 |     | Ours small | 25.83 | 30.88 | 24.69 | 20.81 | 30.09 | 19.82 | 27.34 | 26.04 | 26.98 |
118 |     | Ours large | 26.37 | 32.16 | 24.99 | **21.01** | 30.79 | 20.07 | **27.62** | 26.63 | **27.69** |
119 | - SSIM
120 |     | Method | Avg. | Room | Fern | Leaves | Fortress | Orchids | Flower | T-Rex | Horns |
121 |     |--|--|--|--|--|--|--|--|--|--|
122 |     | NeRF | 0.811 | 0.948 | 0.792 | 0.690 | 0.881 | 0.641 | 0.827 | 0.880 | 0.828 |
123 |     | Ours small | 0.826 | 0.940 | 0.810 | 0.735 | 0.871 | 0.663 | 0.849 | 0.891 | 0.850 |
124 |     | Ours large | **0.840** | **0.951** | **0.821** | **0.745** | **0.890** | **0.673** | **0.856** | **0.909** | **0.877** |
125 | - LPIPS (VGG)
126 |     | Method | Avg. | Room | Fern | Leaves | Fortress | Orchids | Flower | T-Rex | Horns |
127 |     |--|--|--|--|--|--|--|--|--|--|
128 |     | NeRF | 0.250 | 0.178 | 0.280 | 0.316 | 0.171 | 0.321 | 0.219 | 0.249 | 0.268 |
129 |     | Ours small | 0.215 | 0.191 | 0.231 | 0.215 | 0.185 | 0.252 | 0.187 | 0.229 | 0.233 |
130 |     | Ours large | **0.200** | **0.172** | **0.222** | **0.205** | **0.161** | **0.247** | **0.181** | **0.215** | **0.203** |
131 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DirectVoxGO
  2 | 
  3 | Direct Voxel Grid Optimization (CVPR2022 Oral, [project page](https://sunset1995.github.io/dvgo/), [DVGO paper](https://arxiv.org/abs/2111.11215), [DVGO v2 paper](https://arxiv.org/abs/2206.05085)).
  4 | 
  5 | https://user-images.githubusercontent.com/2712505/153380311-19d6c3a1-9130-489a-af16-ad36c78f10a9.mp4
  6 | 
  7 | https://user-images.githubusercontent.com/2712505/153380197-991d1689-6418-499c-a192-d757f9a64b64.mp4
  8 | 
  9 | ### Custom casual capturing
 10 | A [short guide](https://sunset1995.github.io/dvgo/tutor_forward_facing.html) to capture custom forward-facing scenes and rendering fly-through videos.
 11 | 
 12 | Below are two rgb and depth fly-through videos from custom captured scenes.
 13 | 
 14 | https://user-images.githubusercontent.com/2712505/174267754-619d4f81-dd04-4c50-ba7f-434774cb890e.mp4
 15 | 
 16 | ### Features
 17 | - Speedup NeRF by replacing the MLP with the voxel grid.
 18 | - Simple scene representation:
 19 |     - *Volume densities*: dense voxel grid (3D).
 20 |     - *View-dependent colors*: dense feature grid (4D) + shallow MLP.
 21 | - Pytorch cuda extention built just-in-time for another 2--3x speedup.
 22 | - O(N) realization for the distortion loss proposed by [mip-nerf 360](https://jonbarron.info/mipnerf360/).
 23 |     - The loss improves our training time and quality.
 24 |     - We have released a self-contained pytorch package: [torch_efficient_distloss](https://github.com/sunset1995/torch_efficient_distloss).
 25 |     - Consider a batch of 8192 rays X 256 points.
 26 |         - GPU memory consumption: 6192MB => 96MB.
 27 |         - Run times for 100 iters: 20 sec => 0.2sec.
 28 | - Supported datasets:
 29 |     - *Bounded inward-facing*: [NeRF](https://drive.google.com/drive/folders/128yBriW1IG_3NJ5Rp7APSTZsJqdJdfc1), [NSVF](https://dl.fbaipublicfiles.com/nsvf/dataset/Synthetic_NSVF.zip), [BlendedMVS](https://dl.fbaipublicfiles.com/nsvf/dataset/BlendedMVS.zip), [T&T (masked)](https://dl.fbaipublicfiles.com/nsvf/dataset/TanksAndTemple.zip), [DeepVoxels](https://drive.google.com/open?id=1ScsRlnzy9Bd_n-xw83SP-0t548v63mPH).
 30 |     - *Unbounded inward-facing*: [T&T](https://drive.google.com/file/d/11KRfN91W1AxAW6lOFs4EeYDbeoQZCi87/view?usp=sharing), [LF](https://drive.google.com/file/d/1gsjDjkbTh4GAR9fFqlIDZ__qR9NYTURQ/view?usp=sharing), [mip-NeRF360](https://jonbarron.info/mipnerf360/).
 31 |     - *Foward-facing*: [LLFF](https://drive.google.com/drive/folders/14boI-o5hGO9srnWaaogTU5_ji7wkX2S7).
 32 | 
 33 | 
 34 | ### Installation
 35 | ```
 36 | git clone git@github.com:sunset1995/DirectVoxGO.git
 37 | cd DirectVoxGO
 38 | pip install -r requirements.txt
 39 | ```
 40 | [Pytorch](https://pytorch.org/) and [torch_scatter](https://github.com/rusty1s/pytorch_scatter) installation is machine dependent, please install the correct version for your machine.
 41 | 
 42 | <details>
 43 |   <summary> Dependencies (click to expand) </summary>
 44 | 
 45 |   - `PyTorch`, `numpy`, `torch_scatter`: main computation.
 46 |   - `scipy`, `lpips`: SSIM and LPIPS evaluation.
 47 |   - `tqdm`: progress bar.
 48 |   - `mmcv`: config system.
 49 |   - `opencv-python`: image processing.
 50 |   - `imageio`, `imageio-ffmpeg`: images and videos I/O.
 51 |   - `Ninja`: to build the newly implemented torch extention just-in-time.
 52 |   - `einops`: torch tensor shaping with pretty api.
 53 |   - `torch_efficient_distloss`: O(N) realization for the distortion loss.
 54 | </details>
 55 | 
 56 | 
 57 | ## Directory structure for the datasets
 58 | 
 59 | <details>
 60 |   <summary> (click to expand;) </summary>
 61 | 
 62 |     data
 63 |     ├── nerf_synthetic     # Link: https://drive.google.com/drive/folders/128yBriW1IG_3NJ5Rp7APSTZsJqdJdfc1
 64 |     │   └── [chair|drums|ficus|hotdog|lego|materials|mic|ship]
 65 |     │       ├── [train|val|test]
 66 |     │       │   └── r_*.png
 67 |     │       └── transforms_[train|val|test].json
 68 |     │
 69 |     ├── Synthetic_NSVF     # Link: https://dl.fbaipublicfiles.com/nsvf/dataset/Synthetic_NSVF.zip
 70 |     │   └── [Bike|Lifestyle|Palace|Robot|Spaceship|Steamtrain|Toad|Wineholder]
 71 |     │       ├── intrinsics.txt
 72 |     │       ├── rgb
 73 |     │       │   └── [0_train|1_val|2_test]_*.png
 74 |     │       └── pose
 75 |     │           └── [0_train|1_val|2_test]_*.txt
 76 |     │
 77 |     ├── BlendedMVS         # Link: https://dl.fbaipublicfiles.com/nsvf/dataset/BlendedMVS.zip
 78 |     │   └── [Character|Fountain|Jade|Statues]
 79 |     │       ├── intrinsics.txt
 80 |     │       ├── rgb
 81 |     │       │   └── [0|1|2]_*.png
 82 |     │       └── pose
 83 |     │           └── [0|1|2]_*.txt
 84 |     │
 85 |     ├── TanksAndTemple     # Link: https://dl.fbaipublicfiles.com/nsvf/dataset/TanksAndTemple.zip
 86 |     │   └── [Barn|Caterpillar|Family|Ignatius|Truck]
 87 |     │       ├── intrinsics.txt
 88 |     │       ├── rgb
 89 |     │       │   └── [0|1|2]_*.png
 90 |     │       └── pose
 91 |     │           └── [0|1|2]_*.txt
 92 |     │
 93 |     ├── deepvoxels         # Link: https://drive.google.com/drive/folders/1ScsRlnzy9Bd_n-xw83SP-0t548v63mPH
 94 |     │   └── [train|validation|test]
 95 |     │       └── [armchair|cube|greek|vase]
 96 |     │           ├── intrinsics.txt
 97 |     │           ├── rgb/*.png
 98 |     │           └── pose/*.txt
 99 |     │
100 |     ├── nerf_llff_data     # Link: https://drive.google.com/drive/folders/128yBriW1IG_3NJ5Rp7APSTZsJqdJdfc1
101 |     │   └── [fern|flower|fortress|horns|leaves|orchids|room|trex]
102 |     │
103 |     ├── tanks_and_temples  # Link: https://drive.google.com/file/d/11KRfN91W1AxAW6lOFs4EeYDbeoQZCi87/view?usp=sharing
104 |     │   └── [tat_intermediate_M60|tat_intermediate_Playground|tat_intermediate_Train|tat_training_Truck]
105 |     │       └── [train|test]
106 |     │           ├── intrinsics/*txt
107 |     │           ├── pose/*txt
108 |     │           └── rgb/*jpg
109 |     │
110 |     ├── lf_data            # Link: https://drive.google.com/file/d/1gsjDjkbTh4GAR9fFqlIDZ__qR9NYTURQ/view?usp=sharing
111 |     │   └── [africa|basket|ship|statue|torch]
112 |     │       └── [train|test]
113 |     │           ├── intrinsics/*txt
114 |     │           ├── pose/*txt
115 |     │           └── rgb/*jpg
116 |     │
117 |     ├── 360_v2             # Link: https://jonbarron.info/mipnerf360/
118 |     │   └── [bicycle|bonsai|counter|garden|kitchen|room|stump]
119 |     │       ├── poses_bounds.npy
120 |     │       └── [images_2|images_4]
121 |     │
122 |     ├── nerf_llff_data     # Link: https://drive.google.com/drive/folders/14boI-o5hGO9srnWaaogTU5_ji7wkX2S7
123 |     │   └── [fern|flower|fortress|horns|leaves|orchids|room|trex]
124 |     │       ├── poses_bounds.npy
125 |     │       └── [images_2|images_4]
126 |     │
127 |     └── co3d               # Link: https://github.com/facebookresearch/co3d
128 |         └── [donut|teddybear|umbrella|...]
129 |             ├── frame_annotations.jgz
130 |             ├── set_lists.json
131 |             └── [129_14950_29917|189_20376_35616|...]
132 |                 ├── images
133 |                 │   └── frame*.jpg
134 |                 └── masks
135 |                     └── frame*.png
136 | </details>
137 | 
138 | 
139 | 
140 | ## GO
141 | 
142 | - Training
143 |     ```bash
144 |     $ python run.py --config configs/nerf/lego.py --render_test
145 |     ```
146 |     Use `--i_print` and `--i_weights` to change the log interval.
147 | - Evaluation
148 |     To only evaluate the testset `PSNR`, `SSIM`, and `LPIPS` of the trained `lego` without re-training, run:
149 |     ```bash
150 |     $ python run.py --config configs/nerf/lego.py --render_only --render_test \
151 |                                                   --eval_ssim --eval_lpips_vgg
152 |     ```
153 |     Use `--eval_lpips_alex` to evaluate LPIPS with pre-trained Alex net instead of VGG net.
154 | - Render video
155 |     ```bash
156 |     $ python run.py --config configs/nerf/lego.py --render_only --render_video
157 |     ```
158 |     Use `--render_video_factor 4` for a fast preview.
159 | - Reproduction: all config files to reproduce our results.
160 |     <details>
161 |         <summary> (click to expand) </summary>
162 | 
163 |         $ ls configs/*
164 |         configs/blendedmvs:
165 |         Character.py  Fountain.py  Jade.py  Statues.py
166 | 
167 |         configs/nerf:
168 |         chair.py  drums.py  ficus.py  hotdog.py  lego.py  materials.py  mic.py  ship.py
169 | 
170 |         configs/nsvf:
171 |         Bike.py  Lifestyle.py  Palace.py  Robot.py  Spaceship.py  Steamtrain.py  Toad.py  Wineholder.py
172 | 
173 |         configs/tankstemple:
174 |         Barn.py  Caterpillar.py  Family.py  Ignatius.py  Truck.py
175 | 
176 |         configs/deepvoxels:
177 |         armchair.py  cube.py  greek.py  vase.py
178 | 
179 |         configs/tankstemple_unbounded:
180 |         M60.py  Playground.py  Train.py  Truck.py
181 | 
182 |         configs/lf:
183 |         africa.py  basket.py  ship.py  statue.py  torch.py
184 | 
185 |         configs/nerf_unbounded:
186 |         bicycle.py  bonsai.py  counter.py  garden.py  kitchen.py  room.py  stump.py
187 | 
188 |         configs/llff:
189 |         fern.py  flower.py  fortress.py  horns.py  leaves.py  orchids.py  room.py  trex.py
190 |     </details>
191 | 
192 | ### Custom casually captured scenes
193 | Coming soon hopefully.
194 | 
195 | ### Development and tuning guide
196 | #### Extention to new dataset
197 | Adjusting the data related config fields to fit your camera coordinate system is recommend before implementing a new one.
198 | We provide two visualization tools for debugging.
199 | 1. Inspect the camera and the allocated BBox.
200 |     - Export via `--export_bbox_and_cams_only {filename}.npz`:
201 |       ```bash
202 |       python run.py --config configs/nerf/mic.py --export_bbox_and_cams_only cam_mic.npz
203 |       ```
204 |     - Visualize the result:
205 |       ```bash
206 |       python tools/vis_train.py cam_mic.npz
207 |       ```
208 | 2. Inspect the learned geometry after coarse optimization.
209 |     - Export via `--export_coarse_only {filename}.npz` (assumed `coarse_last.tar` available in the train log):
210 |       ```bash
211 |       python run.py --config configs/nerf/mic.py --export_coarse_only coarse_mic.npz
212 |       ```
213 |     - Visualize the result:
214 |       ```bash
215 |       python tools/vis_volume.py coarse_mic.npz 0.001 --cam cam_mic.npz
216 |       ```
217 | 
218 | | Inspecting the cameras & BBox | Inspecting the learned coarse volume |
219 | |:-:|:-:|
220 | |![](figs/debug_cam_and_bbox.png)|![](figs/debug_coarse_volume.png)|
221 | 
222 | 
223 | 
224 | #### Speed and quality tradeoff
225 | We have reported some ablation experiments in our paper supplementary material.
226 | Setting `N_iters`, `N_rand`, `num_voxels`, `rgbnet_depth`, `rgbnet_width` to larger values or setting `stepsize` to smaller values typically leads to better quality but need more computation.
227 | The `weight_distortion` affects the training speed and quality as well.
228 | Only `stepsize` is tunable in testing phase, while all the other fields should remain the same as training.
229 | 
230 | ## Advanced data structure
231 | - **Octree** — [Plenoxels: Radiance Fields without Neural Networks](https://alexyu.net/plenoxels/).
232 | - **Hash** — [Instant Neural Graphics Primitives with a Multiresolution Hash Encoding](https://nvlabs.github.io/instant-ngp/).
233 | - **Factorized components** — [TensoRF: Tensorial Radiance Fields](https://apchenstu.github.io/TensoRF/).
234 | 
235 | You will need them for scaling to a higher grid resolution. But we believe our simplest dense grid could still be your good starting point if you have other challenging problems to deal with.
236 | 
237 | ## Acknowledgement
238 | The code base is origined from an awesome [nerf-pytorch](https://github.com/yenchenlin/nerf-pytorch) implementation, but it becomes very different from the code base now.
239 | 


--------------------------------------------------------------------------------
/configs/blendedmvs/Character.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Character'
 4 | basedir = './logs/blended_mvs'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/BlendedMVS/Character/',
 8 |     dataset_type='blendedmvs',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/blendedmvs/Fountain.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Fountain'
 4 | basedir = './logs/blended_mvs'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/BlendedMVS/Fountain/',
 8 |     dataset_type='blendedmvs',
 9 |     inverse_y=True,
10 |     white_bkgd=False,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/blendedmvs/Jade.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Jade'
 4 | basedir = './logs/blended_mvs'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/BlendedMVS/Jade/',
 8 |     dataset_type='blendedmvs',
 9 |     inverse_y=True,
10 |     white_bkgd=False,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/blendedmvs/Statues.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Statues'
 4 | basedir = './logs/blended_mvs'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/BlendedMVS/Statues/',
 8 |     dataset_type='blendedmvs',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/co3d/donut_369_40208_78816.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_donut_369_40208_78816'
 4 | basedir = './logs/co3d'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/co3d/',
 8 |     dataset_type='co3d',
 9 |     annot_path='./data/co3d/donut/frame_annotations.jgz',
10 |     split_path='./data/co3d/donut/set_lists.json',
11 |     sequence_name='369_40208_78816',
12 |     flip_x=True,
13 |     flip_y=True,
14 |     inverse_y=True,
15 |     white_bkgd=False,
16 | )
17 | 
18 | coarse_train = dict(
19 |     ray_sampler='flatten',
20 | )
21 | 
22 | 


--------------------------------------------------------------------------------
/configs/custom/Madoka.py:
--------------------------------------------------------------------------------
 1 | _base_ = './default_forward_facing.py'
 2 | 
 3 | expname = 'Madoka'
 4 | 
 5 | data = dict(
 6 |     datadir='./data/custom/Madoka/dense',
 7 |     factor=2,
 8 |     movie_render_kwargs={
 9 |         'scale_r': 1.0,
10 |         'scale_f': 0.8,
11 |         'zrate': 2.0,
12 |         'zdelta': 0.5,
13 |     }
14 | )
15 | 
16 | 


--------------------------------------------------------------------------------
/configs/custom/Otobai.py:
--------------------------------------------------------------------------------
 1 | _base_ = './default_forward_facing.py'
 2 | 
 3 | expname = 'Otobai'
 4 | 
 5 | data = dict(
 6 |     datadir='./data/custom/Otobai/dense',
 7 |     factor=2,
 8 |     movie_render_kwargs={
 9 |         'scale_r': 0.8,
10 |         'scale_f': 10.0,
11 |         'zrate': 6.0,
12 |         'zdelta': 0.5,
13 |     }
14 | )
15 | 
16 | 


--------------------------------------------------------------------------------
/configs/custom/default_forward_facing.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | basedir = './logs/custom'
 4 | 
 5 | data = dict(
 6 |     dataset_type='llff',
 7 |     load2gpu_on_the_fly=True,
 8 |     ndc=True,
 9 |     llffhold=0,
10 |     rand_bkgd=True,
11 |     movie_render_kwargs={
12 |         'scale_r': 1.0, # circling radius
13 |         'scale_f': 1.0, # the distance to the looking point of foucs
14 |         'zdelta': 0.5,  # amplitude of forward motion
15 |         'zrate': 1.0,   # frequency of forward motion
16 |         'N_rots': 1,    # number of rotation in 120 frames
17 |     }
18 | )
19 | 
20 | coarse_train = dict(
21 |     N_iters=0,
22 | )
23 | 
24 | fine_train = dict(
25 |     N_iters=30000,
26 |     N_rand=4096,
27 |     weight_distortion=0.01,
28 |     pg_scale=[2000,4000,6000,8000],
29 |     decay_after_scale=0.1,
30 |     ray_sampler='flatten',
31 |     tv_before=1e9,
32 |     tv_dense_before=10000,
33 |     weight_tv_density=1e-5,
34 |     weight_tv_k0=1e-6,
35 | )
36 | 
37 | _mpi_depth = 256
38 | _stepsize = 1.0
39 | 
40 | fine_model_and_render = dict(
41 |     num_voxels=384*384*_mpi_depth,
42 |     mpi_depth=_mpi_depth,
43 |     stepsize=_stepsize,
44 |     rgbnet_dim=9,
45 |     rgbnet_width=64,
46 |     world_bound_scale=1,
47 |     fast_color_thres=_stepsize/_mpi_depth/5,
48 | )
49 | 
50 | 


--------------------------------------------------------------------------------
/configs/custom/default_ubd_inward_facing.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | basedir = './logs/custom'
 4 | 
 5 | data = dict(
 6 |     dataset_type='llff',
 7 |     spherify=True,
 8 |     llffhold=0,
 9 |     bd_factor=None,
10 |     white_bkgd=True,
11 |     rand_bkgd=True,
12 |     unbounded_inward=True,
13 |     load2gpu_on_the_fly=True,
14 | )
15 | 
16 | coarse_train = dict(N_iters=0)
17 | 
18 | fine_train = dict(
19 |     N_iters=40000,
20 |     N_rand=4096,
21 |     lrate_decay=80,
22 |     ray_sampler='flatten',
23 |     weight_nearclip=0.0,
24 |     weight_distortion=0.01,
25 |     pg_scale=[2000,4000,6000,8000,10000,12000,14000,16000],
26 |     tv_before=20000,
27 |     tv_dense_before=20000,
28 |     weight_tv_density=1e-6,
29 |     weight_tv_k0=1e-7,
30 | )
31 | 
32 | alpha_init = 1e-4
33 | stepsize = 0.5
34 | 
35 | fine_model_and_render = dict(
36 |     num_voxels=320**3,
37 |     num_voxels_base=320**3,
38 |     alpha_init=alpha_init,
39 |     stepsize=stepsize,
40 |     fast_color_thres={
41 |         '_delete_': True,
42 |         0   : alpha_init*stepsize/10,
43 |         1500: min(alpha_init, 1e-4)*stepsize/5,
44 |         2500: min(alpha_init, 1e-4)*stepsize/2,
45 |         3500: min(alpha_init, 1e-4)*stepsize/1.5,
46 |         4500: min(alpha_init, 1e-4)*stepsize,
47 |         5500: min(alpha_init, 1e-4),
48 |         6500: 1e-4,
49 |     },
50 |     world_bound_scale=1,
51 | )
52 | 
53 | 


--------------------------------------------------------------------------------
/configs/deepvoxels/armchair.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_armchair'
 4 | basedir = './logs/deepvoxels'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/deepvoxels/',
 8 |     dataset_type='deepvoxels',
 9 |     scene='armchair',
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/deepvoxels/cube.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_cube'
 4 | basedir = './logs/deepvoxels'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/deepvoxels/',
 8 |     dataset_type='deepvoxels',
 9 |     scene='cube',
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/deepvoxels/greek.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_greek'
 4 | basedir = './logs/deepvoxels'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/deepvoxels/',
 8 |     dataset_type='deepvoxels',
 9 |     scene='greek',
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/deepvoxels/vase.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_vase'
 4 | basedir = './logs/deepvoxels'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/deepvoxels/',
 8 |     dataset_type='deepvoxels',
 9 |     scene='vase',
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/default.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | 
  3 | expname = None                    # experiment name
  4 | basedir = './logs/'               # where to store ckpts and logs
  5 | 
  6 | ''' Template of data options
  7 | '''
  8 | data = dict(
  9 |     datadir=None,                 # path to dataset root folder
 10 |     dataset_type=None,            # blender | nsvf | blendedmvs | tankstemple | deepvoxels | co3d
 11 |     inverse_y=False,              # intrinsict mode (to support blendedmvs, nsvf, tankstemple)
 12 |     flip_x=False,                 # to support co3d
 13 |     flip_y=False,                 # to support co3d
 14 |     annot_path='',                # to support co3d
 15 |     split_path='',                # to support co3d
 16 |     sequence_name='',             # to support co3d
 17 |     load2gpu_on_the_fly=False,    # do not load all images into gpu (to save gpu memory)
 18 |     testskip=1,                   # subsample testset to preview results
 19 |     white_bkgd=False,             # use white background (note that some dataset don't provide alpha and with blended bg color)
 20 |     rand_bkgd=False,              # use random background during training
 21 |     half_res=False,               # [TODO]
 22 |     bd_factor=.75,
 23 |     movie_render_kwargs=dict(),
 24 | 
 25 |     # Below are forward-facing llff specific settings.
 26 |     ndc=False,                    # use ndc coordinate (only for forward-facing; not support yet)
 27 |     spherify=False,               # inward-facing
 28 |     factor=4,                     # [TODO]
 29 |     width=None,                   # enforce image width
 30 |     height=None,                  # enforce image height
 31 |     llffhold=8,                   # testsplit
 32 |     load_depths=False,            # load depth
 33 | 
 34 |     # Below are unbounded inward-facing specific settings.
 35 |     unbounded_inward=False,
 36 |     unbounded_inner_r=1.0,
 37 | )
 38 | 
 39 | ''' Template of training options
 40 | '''
 41 | coarse_train = dict(
 42 |     N_iters=5000,                 # number of optimization steps
 43 |     N_rand=8192,                  # batch size (number of random rays per optimization step)
 44 |     lrate_density=1e-1,           # lr of density voxel grid
 45 |     lrate_k0=1e-1,                # lr of color/feature voxel grid
 46 |     lrate_rgbnet=1e-3,            # lr of the mlp to preduct view-dependent color
 47 |     lrate_decay=20,               # lr decay by 0.1 after every lrate_decay*1000 steps
 48 |     pervoxel_lr=True,             # view-count-based lr
 49 |     pervoxel_lr_downrate=1,       # downsampled image for computing view-count-based lr
 50 |     ray_sampler='random',         # ray sampling strategies
 51 |     weight_main=1.0,              # weight of photometric loss
 52 |     weight_entropy_last=0.01,     # weight of background entropy loss
 53 |     weight_nearclip=0,
 54 |     weight_distortion=0,
 55 |     weight_rgbper=0.1,            # weight of per-point rgb loss
 56 |     tv_every=1,                   # count total variation loss every tv_every step
 57 |     tv_after=0,                   # count total variation loss from tv_from step
 58 |     tv_before=0,                  # count total variation before the given number of iterations
 59 |     tv_dense_before=0,            # count total variation densely before the given number of iterations
 60 |     weight_tv_density=0.0,        # weight of total variation loss of density voxel grid
 61 |     weight_tv_k0=0.0,             # weight of total variation loss of color/feature voxel grid
 62 |     pg_scale=[],                  # checkpoints for progressive scaling
 63 |     decay_after_scale=1.0,        # decay act_shift after scaling
 64 |     skip_zero_grad_fields=[],     # the variable name to skip optimizing parameters w/ zero grad in each iteration
 65 |     maskout_lt_nviews=0,
 66 | )
 67 | 
 68 | fine_train = deepcopy(coarse_train)
 69 | fine_train.update(dict(
 70 |     N_iters=20000,
 71 |     pervoxel_lr=False,
 72 |     ray_sampler='in_maskcache',
 73 |     weight_entropy_last=0.001,
 74 |     weight_rgbper=0.01,
 75 |     pg_scale=[1000, 2000, 3000, 4000],
 76 |     skip_zero_grad_fields=['density', 'k0'],
 77 | ))
 78 | 
 79 | ''' Template of model and rendering options
 80 | '''
 81 | coarse_model_and_render = dict(
 82 |     num_voxels=1024000,           # expected number of voxel
 83 |     num_voxels_base=1024000,      # to rescale delta distance
 84 |     density_type='DenseGrid',     # DenseGrid, TensoRFGrid
 85 |     k0_type='DenseGrid',          # DenseGrid, TensoRFGrid
 86 |     density_config=dict(),
 87 |     k0_config=dict(),
 88 |     mpi_depth=128,                # the number of planes in Multiplane Image (work when ndc=True)
 89 |     nearest=False,                # nearest interpolation
 90 |     pre_act_density=False,        # pre-activated trilinear interpolation
 91 |     in_act_density=False,         # in-activated trilinear interpolation
 92 |     bbox_thres=1e-3,              # threshold to determine known free-space in the fine stage
 93 |     mask_cache_thres=1e-3,        # threshold to determine a tighten BBox in the fine stage
 94 |     rgbnet_dim=0,                 # feature voxel grid dim
 95 |     rgbnet_full_implicit=False,   # let the colors MLP ignore feature voxel grid
 96 |     rgbnet_direct=True,           # set to False to treat the first 3 dim of feature voxel grid as diffuse rgb
 97 |     rgbnet_depth=3,               # depth of the colors MLP (there are rgbnet_depth-1 intermediate features)
 98 |     rgbnet_width=128,             # width of the colors MLP
 99 |     alpha_init=1e-6,              # set the alpha values everywhere at the begin of training
100 |     fast_color_thres=1e-7,        # threshold of alpha value to skip the fine stage sampled point
101 |     maskout_near_cam_vox=True,    # maskout grid points that between cameras and their near planes
102 |     world_bound_scale=1,          # rescale the BBox enclosing the scene
103 |     stepsize=0.5,                 # sampling stepsize in volume rendering
104 | )
105 | 
106 | fine_model_and_render = deepcopy(coarse_model_and_render)
107 | fine_model_and_render.update(dict(
108 |     num_voxels=160**3,
109 |     num_voxels_base=160**3,
110 |     rgbnet_dim=12,
111 |     alpha_init=1e-2,
112 |     fast_color_thres=1e-4,
113 |     maskout_near_cam_vox=False,
114 |     world_bound_scale=1.05,
115 | ))
116 | 
117 | del deepcopy
118 | 


--------------------------------------------------------------------------------
/configs/lf/africa.py:
--------------------------------------------------------------------------------
1 | _base_ = './lf_default.py'
2 | 
3 | expname = 'dvgo_Africa_unbounded'
4 | 
5 | data = dict(
6 |     datadir='./data/lf_data/africa',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/lf/basket.py:
--------------------------------------------------------------------------------
1 | _base_ = './lf_default.py'
2 | 
3 | expname = 'dvgo_Basket_unbounded'
4 | 
5 | data = dict(
6 |     datadir='./data/lf_data/basket',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/lf/lf_default.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | basedir = './logs/lf'
 4 | 
 5 | data = dict(
 6 |     dataset_type='nerfpp',
 7 |     inverse_y=True,
 8 |     white_bkgd=False,
 9 |     rand_bkgd=True,
10 |     unbounded_inward=True,
11 | )
12 | 
13 | coarse_train = dict(N_iters=0)
14 | 
15 | fine_train = dict(
16 |     N_iters=25000,
17 |     N_rand=4096,
18 |     ray_sampler='flatten',
19 |     weight_distortion=1e-2,
20 |     pg_scale=[1000,2000,3000,4000,5000,6000],
21 |     decay_after_scale=1.0,
22 |     tv_before=1e9,
23 |     tv_dense_before=10000,
24 |     weight_tv_density=1e-6,
25 |     weight_tv_k0=1e-7,
26 | )
27 | 
28 | alpha_init = 1e-4
29 | stepsize = 0.5
30 | 
31 | fine_model_and_render = dict(
32 |     num_voxels=256**3,
33 |     num_voxels_base=256**3,
34 |     alpha_init=alpha_init,
35 |     stepsize=stepsize,
36 |     fast_color_thres={
37 |         '_delete_': True,
38 |         0   : alpha_init*stepsize/10,
39 |         1500: min(alpha_init, 1e-4)*stepsize/5,
40 |         2500: min(alpha_init, 1e-4)*stepsize/2,
41 |         3500: min(alpha_init, 1e-4)*stepsize/1.5,
42 |         4500: min(alpha_init, 1e-4)*stepsize,
43 |         5500: min(alpha_init, 1e-4),
44 |         6500: 1e-4,
45 |     },
46 |     world_bound_scale=1,
47 | )
48 | 
49 | 


--------------------------------------------------------------------------------
/configs/lf/ship.py:
--------------------------------------------------------------------------------
1 | _base_ = './lf_default.py'
2 | 
3 | expname = 'dvgo_Ship_unbounded'
4 | 
5 | data = dict(
6 |     datadir='./data/lf_data/ship',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/lf/statue.py:
--------------------------------------------------------------------------------
1 | _base_ = './lf_default.py'
2 | 
3 | expname = 'dvgo_Statue_unbounded'
4 | 
5 | data = dict(
6 |     datadir='./data/lf_data/statue',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/lf/torch.py:
--------------------------------------------------------------------------------
1 | _base_ = './lf_default.py'
2 | 
3 | expname = 'dvgo_Torch_unbounded'
4 | 
5 | data = dict(
6 |     datadir='./data/lf_data/torch',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/fern.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default.py'
2 | 
3 | expname = 'fern'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/fern',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/fern_lg.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default_lg.py'
2 | 
3 | expname = 'fern_lg'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/fern',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/flower.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default.py'
2 | 
3 | expname = 'flower'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/flower',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/flower_lg.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default_lg.py'
2 | 
3 | expname = 'flower_lg'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/flower',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/fortress.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default.py'
2 | 
3 | expname = 'fortress'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/fortress',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/fortress_lg.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default_lg.py'
2 | 
3 | expname = 'fortress_lg'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/fortress',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/horns.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default.py'
2 | 
3 | expname = 'horns'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/horns',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/horns_lg.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default_lg.py'
2 | 
3 | expname = 'horns_lg'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/horns',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/leaves.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default.py'
2 | 
3 | expname = 'leaves'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/leaves',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/leaves_lg.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default_lg.py'
2 | 
3 | expname = 'leaves_lg'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/leaves',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/llff_default.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | basedir = './logs/llff'
 4 | 
 5 | data = dict(
 6 |     dataset_type='llff',
 7 |     ndc=True,
 8 |     width=1008,
 9 |     height=756,
10 | )
11 | 
12 | coarse_train = dict(
13 |     N_iters=0,
14 | )
15 | 
16 | fine_train = dict(
17 |     N_iters=30000,
18 |     N_rand=4096,
19 |     weight_distortion=0.01,
20 |     pg_scale=[2000,4000,6000,8000],
21 |     ray_sampler='flatten',
22 |     tv_before=1e9,
23 |     tv_dense_before=10000,
24 |     weight_tv_density=1e-5,
25 |     weight_tv_k0=1e-6,
26 | )
27 | 
28 | fine_model_and_render = dict(
29 |     num_voxels=256**3,
30 |     mpi_depth=128,
31 |     rgbnet_dim=9,
32 |     rgbnet_width=64,
33 |     world_bound_scale=1,
34 |     fast_color_thres=1e-3,
35 | )
36 | 
37 | 


--------------------------------------------------------------------------------
/configs/llff/llff_default_lg.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | basedir = './logs/llff'
 4 | 
 5 | data = dict(
 6 |     dataset_type='llff',
 7 |     ndc=True,
 8 |     width=1008,
 9 |     height=756,
10 |     rand_bkgd=True,
11 | )
12 | 
13 | coarse_train = dict(
14 |     N_iters=0,
15 | )
16 | 
17 | fine_train = dict(
18 |     N_iters=30000,
19 |     N_rand=4096,
20 |     weight_distortion=0.01,
21 |     pg_scale=[2000,4000,6000,8000],
22 |     decay_after_scale=0.1,
23 |     ray_sampler='flatten',
24 |     tv_before=1e9,
25 |     tv_dense_before=10000,
26 |     weight_tv_density=1e-5,
27 |     weight_tv_k0=1e-6,
28 | )
29 | 
30 | _mpi_depth = 256
31 | _stepsize = 1.0
32 | 
33 | fine_model_and_render = dict(
34 |     num_voxels=384*384*_mpi_depth,
35 |     mpi_depth=_mpi_depth,
36 |     stepsize=_stepsize,
37 |     rgbnet_dim=9,
38 |     rgbnet_width=64,
39 |     world_bound_scale=1,
40 |     fast_color_thres=_stepsize/_mpi_depth/5,
41 | )
42 | 
43 | 


--------------------------------------------------------------------------------
/configs/llff/orchids.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default.py'
2 | 
3 | expname = 'orchids'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/orchids',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/orchids_lg.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default_lg.py'
2 | 
3 | expname = 'orchids_lg'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/orchids',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/room.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default.py'
2 | 
3 | expname = 'room'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/room',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/room_lg.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default_lg.py'
2 | 
3 | expname = 'room_lg'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/room',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/trex.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default.py'
2 | 
3 | expname = 'trex'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/trex',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/llff/trex_lg.py:
--------------------------------------------------------------------------------
1 | _base_ = './llff_default_lg.py'
2 | 
3 | expname = 'trex_lg'
4 | 
5 | data = dict(
6 |     datadir='./data/nerf_llff_data/trex',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/nerf/chair.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_chair'
 4 | basedir = './logs/nerf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/nerf_synthetic/chair',
 8 |     dataset_type='blender',
 9 |     white_bkgd=True,
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/nerf/drums.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_drums'
 4 | basedir = './logs/nerf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/nerf_synthetic/drums',
 8 |     dataset_type='blender',
 9 |     white_bkgd=True,
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/nerf/ficus.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_ficus'
 4 | basedir = './logs/nerf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/nerf_synthetic/ficus',
 8 |     dataset_type='blender',
 9 |     white_bkgd=True,
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/nerf/hotdog.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_hotdog'
 4 | basedir = './logs/nerf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/nerf_synthetic/hotdog',
 8 |     dataset_type='blender',
 9 |     white_bkgd=True,
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/nerf/lego.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_lego'
 4 | basedir = './logs/nerf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/nerf_synthetic/lego',
 8 |     dataset_type='blender',
 9 |     white_bkgd=True,
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/nerf/materials.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_materials'
 4 | basedir = './logs/nerf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/nerf_synthetic/materials',
 8 |     dataset_type='blender',
 9 |     white_bkgd=True,
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/nerf/mic.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_mic'
 4 | basedir = './logs/nerf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/nerf_synthetic/mic',
 8 |     dataset_type='blender',
 9 |     white_bkgd=True,
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/nerf/ship.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_ship'
 4 | basedir = './logs/nerf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/nerf_synthetic/ship',
 8 |     dataset_type='blender',
 9 |     white_bkgd=True,
10 | )
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/nerf/ship.tensorf.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_ship_tensorf'
 4 | basedir = './logs/nerf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/nerf_synthetic/ship',
 8 |     dataset_type='blender',
 9 |     white_bkgd=True,
10 | )
11 | 
12 | fine_train = dict(
13 |     lrate_density=0.02,
14 |     lrate_k0=0.02,
15 |     pg_scale=[1000,2000,3000,4000,5000,6000],
16 | )
17 | 
18 | fine_model_and_render = dict(
19 |     num_voxels=384**3,
20 |     density_type='TensoRFGrid',
21 |     density_config=dict(n_comp=8),
22 |     k0_type='TensoRFGrid',
23 |     k0_config=dict(n_comp=24),
24 | )
25 | 
26 | 


--------------------------------------------------------------------------------
/configs/nerf_unbounded/bicycle.py:
--------------------------------------------------------------------------------
 1 | _base_ = './nerf_unbounded_default.py'
 2 | 
 3 | expname = 'dvgo_bicycle_unbounded'
 4 | 
 5 | data = dict(
 6 |     datadir='./data/360_v2/bicycle',
 7 |     factor=4, # 1237x822
 8 |     movie_render_kwargs=dict(
 9 |         shift_x=0.0,  # positive right
10 |         shift_y=0, # negative down
11 |         shift_z=0,
12 |         scale_r=1.0,
13 |         pitch_deg=-10, # negative look downward
14 |     ),
15 | )
16 | 
17 | 


--------------------------------------------------------------------------------
/configs/nerf_unbounded/bonsai.py:
--------------------------------------------------------------------------------
 1 | _base_ = './nerf_unbounded_default.py'
 2 | 
 3 | expname = 'dvgo_bonsai_unbounded'
 4 | 
 5 | data = dict(
 6 |     datadir='./data/360_v2/bonsai',
 7 |     factor=2, # 1559x1039
 8 |     movie_render_kwargs=dict(
 9 |         shift_x=0.0,  # positive right
10 |         shift_y=0, # negative down
11 |         shift_z=0,
12 |         scale_r=1.0,
13 |         pitch_deg=-30, # negative look downward
14 |     ),
15 | )
16 | 
17 | 


--------------------------------------------------------------------------------
/configs/nerf_unbounded/counter.py:
--------------------------------------------------------------------------------
 1 | _base_ = './nerf_unbounded_default.py'
 2 | 
 3 | expname = 'dvgo_counter_unbounded'
 4 | 
 5 | data = dict(
 6 |     datadir='./data/360_v2/counter',
 7 |     factor=2, # 1558x1038
 8 |     movie_render_kwargs=dict(
 9 |         shift_x=0.0,  # positive right
10 |         shift_y=-0.2, # negative down
11 |         shift_z=0,
12 |         scale_r=0.9,
13 |         pitch_deg=-30, # negative look downward
14 |     ),
15 | )
16 | 
17 | 


--------------------------------------------------------------------------------
/configs/nerf_unbounded/garden.py:
--------------------------------------------------------------------------------
 1 | _base_ = './nerf_unbounded_default.py'
 2 | 
 3 | expname = 'dvgo_garden_unbounded'
 4 | 
 5 | data = dict(
 6 |     datadir='./data/360_v2/garden',
 7 |     factor=4, # 1297x840
 8 |     movie_render_kwargs=dict(
 9 |         shift_x=0.0,  # positive right
10 |         shift_y=-0.0, # negative down
11 |         shift_z=0,
12 |         scale_r=0.9,
13 |         pitch_deg=-30,
14 |     ),
15 | )
16 | 
17 | 


--------------------------------------------------------------------------------
/configs/nerf_unbounded/kitchen.py:
--------------------------------------------------------------------------------
 1 | _base_ = './nerf_unbounded_default.py'
 2 | 
 3 | expname = 'dvgo_kitchen_unbounded'
 4 | 
 5 | data = dict(
 6 |     datadir='./data/360_v2/kitchen',
 7 |     factor=2, # 1558x1039
 8 |     movie_render_kwargs=dict(
 9 |         shift_y=-0.0,
10 |         scale_r=0.9,
11 |         pitch_deg=-40,
12 |     ),
13 | )
14 | 
15 | 


--------------------------------------------------------------------------------
/configs/nerf_unbounded/nerf_unbounded_default.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | basedir = './logs/nerf_unbounded'
 4 | 
 5 | data = dict(
 6 |     dataset_type='llff',
 7 |     spherify=True,
 8 |     factor=4,
 9 |     llffhold=8,
10 |     white_bkgd=True,
11 |     rand_bkgd=True,
12 |     unbounded_inward=True,
13 |     load2gpu_on_the_fly=True,
14 | )
15 | 
16 | coarse_train = dict(N_iters=0)
17 | 
18 | fine_train = dict(
19 |     N_iters=40000,
20 |     N_rand=4096,
21 |     lrate_decay=80,
22 |     ray_sampler='flatten',
23 |     weight_nearclip=1.0,
24 |     weight_distortion=0.01,
25 |     pg_scale=[2000,4000,6000,8000,10000,12000,14000,16000],
26 |     tv_before=20000,
27 |     tv_dense_before=20000,
28 |     weight_tv_density=1e-6,
29 |     weight_tv_k0=1e-7,
30 | )
31 | 
32 | alpha_init = 1e-4
33 | stepsize = 0.5
34 | 
35 | fine_model_and_render = dict(
36 |     num_voxels=320**3,
37 |     num_voxels_base=320**3,
38 |     alpha_init=alpha_init,
39 |     stepsize=stepsize,
40 |     fast_color_thres={
41 |         '_delete_': True,
42 |         0   : alpha_init*stepsize/10,
43 |         1500: min(alpha_init, 1e-4)*stepsize/5,
44 |         2500: min(alpha_init, 1e-4)*stepsize/2,
45 |         3500: min(alpha_init, 1e-4)*stepsize/1.5,
46 |         4500: min(alpha_init, 1e-4)*stepsize,
47 |         5500: min(alpha_init, 1e-4),
48 |         6500: 1e-4,
49 |     },
50 |     world_bound_scale=1,
51 | )
52 | 
53 | 


--------------------------------------------------------------------------------
/configs/nerf_unbounded/room.py:
--------------------------------------------------------------------------------
 1 | _base_ = './nerf_unbounded_default.py'
 2 | 
 3 | expname = 'dvgo_room_unbounded'
 4 | 
 5 | data = dict(
 6 |     datadir='./data/360_v2/room',
 7 |     factor=2, # 1557x1038
 8 |     movie_render_kwargs=dict(
 9 |         shift_x=0.0,  # positive right
10 |         shift_y=-0.3, # negative down
11 |         shift_z=0,
12 |         scale_r=0.2,
13 |         pitch_deg=-40, # negative look downward
14 |     ),
15 | )
16 | 
17 | 


--------------------------------------------------------------------------------
/configs/nerf_unbounded/stump.py:
--------------------------------------------------------------------------------
 1 | _base_ = './nerf_unbounded_default.py'
 2 | 
 3 | expname = 'dvgo_stump_unbounded'
 4 | 
 5 | data = dict(
 6 |     datadir='./data/360_v2/stump',
 7 |     factor=4,
 8 |     movie_render_kwargs=dict(
 9 |         shift_x=0.0,  # positive right
10 |         shift_y=-0.2, # negative down
11 |         shift_z=0,
12 |         scale_r=0.8,
13 |         pitch_deg=-20, # negative look downward
14 |     ),
15 | )
16 | 
17 | 


--------------------------------------------------------------------------------
/configs/nsvf/Bike.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Bike'
 4 | basedir = './logs/nsvf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/Synthetic_NSVF/Bike',
 8 |     dataset_type='nsvf',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/nsvf/Lifestyle.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Lifestyle'
 4 | basedir = './logs/nsvf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/Synthetic_NSVF/Lifestyle',
 8 |     dataset_type='nsvf',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/nsvf/Palace.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Palace'
 4 | basedir = './logs/nsvf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/Synthetic_NSVF/Palace',
 8 |     dataset_type='nsvf',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/nsvf/Robot.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Robot'
 4 | basedir = './logs/nsvf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/Synthetic_NSVF/Robot',
 8 |     dataset_type='nsvf',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/nsvf/Spaceship.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Spaceship'
 4 | basedir = './logs/nsvf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/Synthetic_NSVF/Spaceship',
 8 |     dataset_type='nsvf',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/nsvf/Steamtrain.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Steamtrain'
 4 | basedir = './logs/nsvf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/Synthetic_NSVF/Steamtrain',
 8 |     dataset_type='nsvf',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/nsvf/Toad.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Toad'
 4 | basedir = './logs/nsvf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/Synthetic_NSVF/Toad',
 8 |     dataset_type='nsvf',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/nsvf/Wineholder.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Wineholder'
 4 | basedir = './logs/nsvf_synthetic'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/Synthetic_NSVF/Wineholder',
 8 |     dataset_type='nsvf',
 9 |     inverse_y=True,
10 |     white_bkgd=True,
11 | )
12 | 
13 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Barn.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Barn'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Barn',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 | )
13 | 
14 | coarse_train = dict(
15 |     pervoxel_lr_downrate=2,
16 | )
17 | 
18 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Barn_lg.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Barn_lg'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Barn',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 |     movie_render_kwargs={'flip_up_vec': True},
13 | )
14 | 
15 | coarse_train = dict(
16 |     pervoxel_lr_downrate=2,
17 | )
18 | 
19 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000])
20 | fine_model_and_render = dict(num_voxels=256**3)
21 | 
22 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Caterpillar.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Caterpillar'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Caterpillar',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 | )
13 | 
14 | coarse_train = dict(
15 |     pervoxel_lr_downrate=2,
16 | )
17 | 
18 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Caterpillar_lg.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Caterpillar_lg'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Caterpillar',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 | )
13 | 
14 | coarse_train = dict(
15 |     pervoxel_lr_downrate=2,
16 | )
17 | 
18 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000])
19 | fine_model_and_render = dict(num_voxels=256**3)
20 | 
21 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Family.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Family'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Family',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 | )
13 | 
14 | coarse_train = dict(
15 |     pervoxel_lr_downrate=2,
16 | )
17 | 
18 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Family_lg.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Family_lg'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Family',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 |     movie_render_kwargs={'pitch_deg': 20},
13 | )
14 | 
15 | coarse_train = dict(
16 |     pervoxel_lr_downrate=2,
17 | )
18 | 
19 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000])
20 | fine_model_and_render = dict(num_voxels=256**3)
21 | 
22 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Ignatius.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Ignatius'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Ignatius',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 | )
13 | 
14 | coarse_train = dict(
15 |     pervoxel_lr_downrate=2,
16 | )
17 | 
18 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Ignatius_lg.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Ignatius_lg'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Ignatius',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 | )
13 | 
14 | coarse_train = dict(
15 |     pervoxel_lr_downrate=2,
16 | )
17 | 
18 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000])
19 | fine_model_and_render = dict(num_voxels=256**3)
20 | 
21 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Truck.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Truck'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Truck',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 | )
13 | 
14 | coarse_train = dict(
15 |     pervoxel_lr_downrate=2,
16 | )
17 | 
18 | 


--------------------------------------------------------------------------------
/configs/tankstemple/Truck_lg.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | expname = 'dvgo_Truck_lg'
 4 | basedir = './logs/tanks_and_temple'
 5 | 
 6 | data = dict(
 7 |     datadir='./data/TanksAndTemple/Truck',
 8 |     dataset_type='tankstemple',
 9 |     inverse_y=True,
10 |     load2gpu_on_the_fly=True,
11 |     white_bkgd=True,
12 |     movie_render_kwargs={'flip_up_vec': True, 'shift_y': -0.1},
13 | )
14 | 
15 | coarse_train = dict(
16 |     pervoxel_lr_downrate=2,
17 | )
18 | 
19 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000])
20 | fine_model_and_render = dict(num_voxels=256**3)
21 | 
22 | 


--------------------------------------------------------------------------------
/configs/tankstemple_unbounded/M60.py:
--------------------------------------------------------------------------------
1 | _base_ = './tt_default.py'
2 | 
3 | expname = 'dvgo_M60_unbounded'
4 | 
5 | data = dict(
6 |     datadir='./data/tanks_and_temples/tat_intermediate_M60',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/tankstemple_unbounded/Playground.py:
--------------------------------------------------------------------------------
1 | _base_ = './tt_default.py'
2 | 
3 | expname = 'dvgo_Playground_unbounded'
4 | 
5 | data = dict(
6 |     datadir='./data/tanks_and_temples/tat_intermediate_Playground',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/tankstemple_unbounded/Train.py:
--------------------------------------------------------------------------------
1 | _base_ = './tt_default.py'
2 | 
3 | expname = 'dvgo_Train_unbounded'
4 | 
5 | data = dict(
6 |     datadir='./data/tanks_and_temples/tat_intermediate_Train',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/tankstemple_unbounded/Truck.py:
--------------------------------------------------------------------------------
1 | _base_ = './tt_default.py'
2 | 
3 | expname = 'dvgo_Truck_unbounded'
4 | 
5 | data = dict(
6 |     datadir='./data/tanks_and_temples/tat_training_Truck',
7 | )
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/tankstemple_unbounded/tt_default.py:
--------------------------------------------------------------------------------
 1 | _base_ = '../default.py'
 2 | 
 3 | basedir = './logs/tanks_and_temple_unbounded'
 4 | 
 5 | data = dict(
 6 |     dataset_type='nerfpp',
 7 |     inverse_y=True,
 8 |     white_bkgd=True,
 9 |     rand_bkgd=True,
10 |     unbounded_inward=True,
11 |     load2gpu_on_the_fly=True,
12 | )
13 | 
14 | coarse_train = dict(N_iters=0)
15 | 
16 | fine_train = dict(
17 |     N_iters=30000,
18 |     N_rand=4096,
19 |     ray_sampler='flatten',
20 |     weight_distortion=0.01,
21 |     pg_scale=[1000,2000,3000,4000,5000,6000,7000],
22 |     tv_before=1e9,
23 |     tv_dense_before=10000,
24 |     weight_tv_density=1e-6,
25 |     weight_tv_k0=1e-7,
26 | )
27 | 
28 | alpha_init = 1e-4
29 | stepsize = 0.5
30 | 
31 | fine_model_and_render = dict(
32 |     num_voxels=320**3,
33 |     num_voxels_base=320**3,
34 |     alpha_init=alpha_init,
35 |     stepsize=stepsize,
36 |     fast_color_thres={
37 |         '_delete_': True,
38 |         0   : alpha_init*stepsize/10,
39 |         1500: min(alpha_init, 1e-4)*stepsize/5,
40 |         2500: min(alpha_init, 1e-4)*stepsize/2,
41 |         3500: min(alpha_init, 1e-4)*stepsize/1.5,
42 |         4500: min(alpha_init, 1e-4)*stepsize,
43 |         5500: min(alpha_init, 1e-4),
44 |         6500: 1e-4,
45 |     },
46 |     world_bound_scale=1,
47 |     contracted_norm='l2',
48 | )
49 | 
50 | 


--------------------------------------------------------------------------------
/figs/debug_cam_and_bbox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunset1995/DirectVoxGO/341e1fc4e96efff146d42cd6f31b8199a3e536f7/figs/debug_cam_and_bbox.png


--------------------------------------------------------------------------------
/figs/debug_coarse_volume.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunset1995/DirectVoxGO/341e1fc4e96efff146d42cd6f31b8199a3e536f7/figs/debug_coarse_volume.png


--------------------------------------------------------------------------------
/lib/cuda/adam_upd.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | // CUDA forward declarations
 6 | 
 7 | void adam_upd_cuda(
 8 |     torch::Tensor param,
 9 |     torch::Tensor grad,
10 |     torch::Tensor exp_avg,
11 |     torch::Tensor exp_avg_sq,
12 |     int step, float beta1, float beta2, float lr, float eps);
13 | 
14 | void masked_adam_upd_cuda(
15 |     torch::Tensor param,
16 |     torch::Tensor grad,
17 |     torch::Tensor exp_avg,
18 |     torch::Tensor exp_avg_sq,
19 |     int step, float beta1, float beta2, float lr, float eps);
20 | 
21 | void adam_upd_with_perlr_cuda(
22 |     torch::Tensor param,
23 |     torch::Tensor grad,
24 |     torch::Tensor exp_avg,
25 |     torch::Tensor exp_avg_sq,
26 |     torch::Tensor perlr,
27 |     int step, float beta1, float beta2, float lr, float eps);
28 | 
29 | 
30 | // C++ interface
31 | 
32 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
33 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
34 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
35 | 
36 | void adam_upd(
37 |     torch::Tensor param,
38 |     torch::Tensor grad,
39 |     torch::Tensor exp_avg,
40 |     torch::Tensor exp_avg_sq,
41 |     int step, float beta1, float beta2, float lr, float eps) {
42 |   CHECK_INPUT(param);
43 |   CHECK_INPUT(grad);
44 |   CHECK_INPUT(exp_avg);
45 |   CHECK_INPUT(exp_avg_sq);
46 |   adam_upd_cuda(param, grad, exp_avg, exp_avg_sq,
47 |           step, beta1, beta2, lr, eps);
48 | }
49 | 
50 | void masked_adam_upd(
51 |     torch::Tensor param,
52 |     torch::Tensor grad,
53 |     torch::Tensor exp_avg,
54 |     torch::Tensor exp_avg_sq,
55 |     int step, float beta1, float beta2, float lr, float eps) {
56 |   CHECK_INPUT(param);
57 |   CHECK_INPUT(grad);
58 |   CHECK_INPUT(exp_avg);
59 |   CHECK_INPUT(exp_avg_sq);
60 |   masked_adam_upd_cuda(param, grad, exp_avg, exp_avg_sq,
61 |           step, beta1, beta2, lr, eps);
62 | }
63 | 
64 | void adam_upd_with_perlr(
65 |     torch::Tensor param,
66 |     torch::Tensor grad,
67 |     torch::Tensor exp_avg,
68 |     torch::Tensor exp_avg_sq,
69 |     torch::Tensor perlr,
70 |     int step, float beta1, float beta2, float lr, float eps) {
71 |   CHECK_INPUT(param);
72 |   CHECK_INPUT(grad);
73 |   CHECK_INPUT(exp_avg);
74 |   CHECK_INPUT(exp_avg_sq);
75 |   adam_upd_with_perlr_cuda(param, grad, exp_avg, exp_avg_sq, perlr,
76 |           step, beta1, beta2, lr, eps);
77 | }
78 | 
79 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
80 |   m.def("adam_upd", &adam_upd,
81 |           "Adam update");
82 |   m.def("masked_adam_upd", &masked_adam_upd,
83 |           "Adam update ignoring zero grad");
84 |   m.def("adam_upd_with_perlr", &adam_upd_with_perlr,
85 |           "Adam update ignoring zero grad with per-voxel lr");
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/lib/cuda/adam_upd_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | 
  3 | #include <cuda.h>
  4 | #include <cuda_runtime.h>
  5 | 
  6 | #include <vector>
  7 | 
  8 | template <typename scalar_t>
  9 | __global__ void adam_upd_cuda_kernel(
 10 |     scalar_t* __restrict__ param,
 11 |     const scalar_t* __restrict__ grad,
 12 |     scalar_t* __restrict__ exp_avg,
 13 |     scalar_t* __restrict__ exp_avg_sq,
 14 |     const size_t N,
 15 |     const float step_size, const float beta1, const float beta2, const float eps) {
 16 | 
 17 |   const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
 18 |   if(index<N) {
 19 |     exp_avg[index] = beta1 * exp_avg[index] + (1-beta1) * grad[index];
 20 |     exp_avg_sq[index] = beta2 * exp_avg_sq[index] + (1-beta2) * grad[index] * grad[index];
 21 |     param[index] -= step_size * exp_avg[index] / (sqrt(exp_avg_sq[index]) + eps);
 22 |   }
 23 | }
 24 | 
 25 | template <typename scalar_t>
 26 | __global__ void masked_adam_upd_cuda_kernel(
 27 |     scalar_t* __restrict__ param,
 28 |     const scalar_t* __restrict__ grad,
 29 |     scalar_t* __restrict__ exp_avg,
 30 |     scalar_t* __restrict__ exp_avg_sq,
 31 |     const size_t N,
 32 |     const float step_size, const float beta1, const float beta2, const float eps) {
 33 | 
 34 |   const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
 35 |   if(index<N && grad[index]!=0) {
 36 |     exp_avg[index] = beta1 * exp_avg[index] + (1-beta1) * grad[index];
 37 |     exp_avg_sq[index] = beta2 * exp_avg_sq[index] + (1-beta2) * grad[index] * grad[index];
 38 |     param[index] -= step_size * exp_avg[index] / (sqrt(exp_avg_sq[index]) + eps);
 39 |   }
 40 | }
 41 | 
 42 | template <typename scalar_t>
 43 | __global__ void adam_upd_with_perlr_cuda_kernel(
 44 |     scalar_t* __restrict__ param,
 45 |     const scalar_t* __restrict__ grad,
 46 |     scalar_t* __restrict__ exp_avg,
 47 |     scalar_t* __restrict__ exp_avg_sq,
 48 |     scalar_t* __restrict__ perlr,
 49 |     const size_t N,
 50 |     const float step_size, const float beta1, const float beta2, const float eps) {
 51 | 
 52 |   const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
 53 |   if(index<N) {
 54 |     exp_avg[index] = beta1 * exp_avg[index] + (1-beta1) * grad[index];
 55 |     exp_avg_sq[index] = beta2 * exp_avg_sq[index] + (1-beta2) * grad[index] * grad[index];
 56 |     param[index] -= step_size * perlr[index] * exp_avg[index] / (sqrt(exp_avg_sq[index]) + eps);
 57 |   }
 58 | }
 59 | 
 60 | void adam_upd_cuda(
 61 |     torch::Tensor param,
 62 |     torch::Tensor grad,
 63 |     torch::Tensor exp_avg,
 64 |     torch::Tensor exp_avg_sq,
 65 |     const int step, const float beta1, const float beta2, const float lr, const float eps) {
 66 | 
 67 |   const size_t N = param.numel();
 68 | 
 69 |   const int threads = 256;
 70 |   const int blocks = (N + threads - 1) / threads;
 71 | 
 72 |   const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step));
 73 | 
 74 |   AT_DISPATCH_FLOATING_TYPES(param.type(), "adam_upd_cuda", ([&] {
 75 |     adam_upd_cuda_kernel<scalar_t><<<blocks, threads>>>(
 76 |         param.data<scalar_t>(),
 77 |         grad.data<scalar_t>(),
 78 |         exp_avg.data<scalar_t>(),
 79 |         exp_avg_sq.data<scalar_t>(),
 80 |         N, step_size, beta1, beta2, eps);
 81 |   }));
 82 | }
 83 | 
 84 | void masked_adam_upd_cuda(
 85 |     torch::Tensor param,
 86 |     torch::Tensor grad,
 87 |     torch::Tensor exp_avg,
 88 |     torch::Tensor exp_avg_sq,
 89 |     const int step, const float beta1, const float beta2, const float lr, const float eps) {
 90 | 
 91 |   const size_t N = param.numel();
 92 | 
 93 |   const int threads = 256;
 94 |   const int blocks = (N + threads - 1) / threads;
 95 | 
 96 |   const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step));
 97 | 
 98 |   AT_DISPATCH_FLOATING_TYPES(param.type(), "masked_adam_upd_cuda", ([&] {
 99 |     masked_adam_upd_cuda_kernel<scalar_t><<<blocks, threads>>>(
100 |         param.data<scalar_t>(),
101 |         grad.data<scalar_t>(),
102 |         exp_avg.data<scalar_t>(),
103 |         exp_avg_sq.data<scalar_t>(),
104 |         N, step_size, beta1, beta2, eps);
105 |   }));
106 | }
107 | 
108 | void adam_upd_with_perlr_cuda(
109 |     torch::Tensor param,
110 |     torch::Tensor grad,
111 |     torch::Tensor exp_avg,
112 |     torch::Tensor exp_avg_sq,
113 |     torch::Tensor perlr,
114 |     const int step, const float beta1, const float beta2, const float lr, const float eps) {
115 | 
116 |   const size_t N = param.numel();
117 | 
118 |   const int threads = 256;
119 |   const int blocks = (N + threads - 1) / threads;
120 | 
121 |   const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step));
122 | 
123 |   AT_DISPATCH_FLOATING_TYPES(param.type(), "adam_upd_with_perlr_cuda", ([&] {
124 |     adam_upd_with_perlr_cuda_kernel<scalar_t><<<blocks, threads>>>(
125 |         param.data<scalar_t>(),
126 |         grad.data<scalar_t>(),
127 |         exp_avg.data<scalar_t>(),
128 |         exp_avg_sq.data<scalar_t>(),
129 |         perlr.data<scalar_t>(),
130 |         N, step_size, beta1, beta2, eps);
131 |   }));
132 | }
133 | 
134 | 


--------------------------------------------------------------------------------
/lib/cuda/render_utils.cpp:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | 
  3 | #include <vector>
  4 | 
  5 | // CUDA forward declarations
  6 | 
  7 | std::vector<torch::Tensor> infer_t_minmax_cuda(
  8 |         torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max,
  9 |         const float near, const float far);
 10 | 
 11 | torch::Tensor infer_n_samples_cuda(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist);
 12 | 
 13 | std::vector<torch::Tensor> infer_ray_start_dir_cuda(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min);
 14 | 
 15 | std::vector<torch::Tensor> sample_pts_on_rays_cuda(
 16 |         torch::Tensor rays_o, torch::Tensor rays_d,
 17 |         torch::Tensor xyz_min, torch::Tensor xyz_max,
 18 |         const float near, const float far, const float stepdist);
 19 | 
 20 | std::vector<torch::Tensor> sample_ndc_pts_on_rays_cuda(
 21 |         torch::Tensor rays_o, torch::Tensor rays_d,
 22 |         torch::Tensor xyz_min, torch::Tensor xyz_max,
 23 |         const int N_samples);
 24 | 
 25 | torch::Tensor sample_bg_pts_on_rays_cuda(
 26 |         torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max,
 27 |         const float bg_preserve, const int N_samples);
 28 | 
 29 | torch::Tensor maskcache_lookup_cuda(torch::Tensor world, torch::Tensor xyz, torch::Tensor xyz2ijk_scale, torch::Tensor xyz2ijk_shift);
 30 | 
 31 | std::vector<torch::Tensor> raw2alpha_cuda(torch::Tensor density, const float shift, const float interval);
 32 | std::vector<torch::Tensor> raw2alpha_nonuni_cuda(torch::Tensor density, const float shift, torch::Tensor interval);
 33 | 
 34 | torch::Tensor raw2alpha_backward_cuda(torch::Tensor exp, torch::Tensor grad_back, const float interval);
 35 | torch::Tensor raw2alpha_nonuni_backward_cuda(torch::Tensor exp, torch::Tensor grad_back, torch::Tensor interval);
 36 | 
 37 | std::vector<torch::Tensor> alpha2weight_cuda(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays);
 38 | 
 39 | torch::Tensor alpha2weight_backward_cuda(
 40 |         torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last,
 41 |         torch::Tensor i_start, torch::Tensor i_end, const int n_rays,
 42 |         torch::Tensor grad_weights, torch::Tensor grad_last);
 43 | 
 44 | // C++ interface
 45 | 
 46 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
 47 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
 48 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
 49 | 
 50 | std::vector<torch::Tensor> infer_t_minmax(
 51 |         torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max,
 52 |         const float near, const float far) {
 53 |   CHECK_INPUT(rays_o);
 54 |   CHECK_INPUT(rays_d);
 55 |   CHECK_INPUT(xyz_min);
 56 |   CHECK_INPUT(xyz_max);
 57 |   return infer_t_minmax_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far);
 58 | }
 59 | 
 60 | torch::Tensor infer_n_samples(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist) {
 61 |   CHECK_INPUT(rays_d);
 62 |   CHECK_INPUT(t_min);
 63 |   CHECK_INPUT(t_max);
 64 |   return infer_n_samples_cuda(rays_d, t_min, t_max, stepdist);
 65 | }
 66 | 
 67 | std::vector<torch::Tensor> infer_ray_start_dir(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min) {
 68 |   CHECK_INPUT(rays_o);
 69 |   CHECK_INPUT(rays_d);
 70 |   CHECK_INPUT(t_min);
 71 |   return infer_ray_start_dir_cuda(rays_o, rays_d, t_min);
 72 | }
 73 | 
 74 | std::vector<torch::Tensor> sample_pts_on_rays(
 75 |         torch::Tensor rays_o, torch::Tensor rays_d,
 76 |         torch::Tensor xyz_min, torch::Tensor xyz_max,
 77 |         const float near, const float far, const float stepdist) {
 78 |   CHECK_INPUT(rays_o);
 79 |   CHECK_INPUT(rays_d);
 80 |   CHECK_INPUT(xyz_min);
 81 |   CHECK_INPUT(xyz_max);
 82 |   assert(rays_o.dim()==2);
 83 |   assert(rays_o.size(1)==3);
 84 |   return sample_pts_on_rays_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far, stepdist);
 85 | }
 86 | 
 87 | std::vector<torch::Tensor> sample_ndc_pts_on_rays(
 88 |         torch::Tensor rays_o, torch::Tensor rays_d,
 89 |         torch::Tensor xyz_min, torch::Tensor xyz_max,
 90 |         const int N_samples) {
 91 |   CHECK_INPUT(rays_o);
 92 |   CHECK_INPUT(rays_d);
 93 |   CHECK_INPUT(xyz_min);
 94 |   CHECK_INPUT(xyz_max);
 95 |   assert(rays_o.dim()==2);
 96 |   assert(rays_o.size(1)==3);
 97 |   return sample_ndc_pts_on_rays_cuda(rays_o, rays_d, xyz_min, xyz_max, N_samples);
 98 | }
 99 | 
100 | torch::Tensor sample_bg_pts_on_rays(
101 |         torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max,
102 |         const float bg_preserve, const int N_samples) {
103 |   CHECK_INPUT(rays_o);
104 |   CHECK_INPUT(rays_d);
105 |   CHECK_INPUT(t_max);
106 |   return sample_bg_pts_on_rays_cuda(rays_o, rays_d, t_max, bg_preserve, N_samples);
107 | }
108 | 
109 | torch::Tensor maskcache_lookup(torch::Tensor world, torch::Tensor xyz, torch::Tensor xyz2ijk_scale, torch::Tensor xyz2ijk_shift) {
110 |   CHECK_INPUT(world);
111 |   CHECK_INPUT(xyz);
112 |   CHECK_INPUT(xyz2ijk_scale);
113 |   CHECK_INPUT(xyz2ijk_shift);
114 |   assert(world.dim()==3);
115 |   assert(xyz.dim()==2);
116 |   assert(xyz.size(1)==3);
117 |   return maskcache_lookup_cuda(world, xyz, xyz2ijk_scale, xyz2ijk_shift);
118 | }
119 | 
120 | std::vector<torch::Tensor> raw2alpha(torch::Tensor density, const float shift, const float interval) {
121 |   CHECK_INPUT(density);
122 |   assert(density.dim()==1);
123 |   return raw2alpha_cuda(density, shift, interval);
124 | }
125 | std::vector<torch::Tensor> raw2alpha_nonuni(torch::Tensor density, const float shift, torch::Tensor interval) {
126 |   CHECK_INPUT(density);
127 |   assert(density.dim()==1);
128 |   return raw2alpha_nonuni_cuda(density, shift, interval);
129 | }
130 | 
131 | torch::Tensor raw2alpha_backward(torch::Tensor exp, torch::Tensor grad_back, const float interval) {
132 |   CHECK_INPUT(exp);
133 |   CHECK_INPUT(grad_back);
134 |   return raw2alpha_backward_cuda(exp, grad_back, interval);
135 | }
136 | torch::Tensor raw2alpha_nonuni_backward(torch::Tensor exp, torch::Tensor grad_back, torch::Tensor interval) {
137 |   CHECK_INPUT(exp);
138 |   CHECK_INPUT(grad_back);
139 |   return raw2alpha_nonuni_backward_cuda(exp, grad_back, interval);
140 | }
141 | 
142 | std::vector<torch::Tensor> alpha2weight(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays) {
143 |   CHECK_INPUT(alpha);
144 |   CHECK_INPUT(ray_id);
145 |   assert(alpha.dim()==1);
146 |   assert(ray_id.dim()==1);
147 |   assert(alpha.sizes()==ray_id.sizes());
148 |   return alpha2weight_cuda(alpha, ray_id, n_rays);
149 | }
150 | 
151 | torch::Tensor alpha2weight_backward(
152 |         torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last,
153 |         torch::Tensor i_start, torch::Tensor i_end, const int n_rays,
154 |         torch::Tensor grad_weights, torch::Tensor grad_last) {
155 |   CHECK_INPUT(alpha);
156 |   CHECK_INPUT(weight);
157 |   CHECK_INPUT(T);
158 |   CHECK_INPUT(alphainv_last);
159 |   CHECK_INPUT(i_start);
160 |   CHECK_INPUT(i_end);
161 |   CHECK_INPUT(grad_weights);
162 |   CHECK_INPUT(grad_last);
163 |   return alpha2weight_backward_cuda(
164 |           alpha, weight, T, alphainv_last,
165 |           i_start, i_end, n_rays,
166 |           grad_weights, grad_last);
167 | }
168 | 
169 | 
170 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
171 |   m.def("infer_t_minmax", &infer_t_minmax, "Inference t_min and t_max of ray-bbox intersection");
172 |   m.def("infer_n_samples", &infer_n_samples, "Inference the number of points to sample on each ray");
173 |   m.def("infer_ray_start_dir", &infer_ray_start_dir, "Inference the starting point and shooting direction of each ray");
174 |   m.def("sample_pts_on_rays", &sample_pts_on_rays, "Sample points on rays");
175 |   m.def("sample_ndc_pts_on_rays", &sample_ndc_pts_on_rays, "Sample points on rays");
176 |   m.def("sample_bg_pts_on_rays", &sample_bg_pts_on_rays, "Sample points on bg");
177 |   m.def("maskcache_lookup", &maskcache_lookup, "Lookup to skip know freespace.");
178 |   m.def("raw2alpha", &raw2alpha, "Raw values [-inf, inf] to alpha [0, 1].");
179 |   m.def("raw2alpha_backward", &raw2alpha_backward, "Backward pass of the raw to alpha");
180 |   m.def("raw2alpha_nonuni", &raw2alpha_nonuni, "Raw values [-inf, inf] to alpha [0, 1].");
181 |   m.def("raw2alpha_nonuni_backward", &raw2alpha_nonuni_backward, "Backward pass of the raw to alpha");
182 |   m.def("alpha2weight", &alpha2weight, "Per-point alpha to accumulated blending weight");
183 |   m.def("alpha2weight_backward", &alpha2weight_backward, "Backward pass of alpha2weight");
184 | }
185 | 
186 | 


--------------------------------------------------------------------------------
/lib/cuda/total_variation.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | // CUDA forward declarations
 6 | 
 7 | void total_variation_add_grad_cuda(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode);
 8 | 
 9 | 
10 | // C++ interface
11 | 
12 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
13 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
14 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
15 | 
16 | void total_variation_add_grad(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode) {
17 |   CHECK_INPUT(param);
18 |   CHECK_INPUT(grad);
19 |   total_variation_add_grad_cuda(param, grad, wx, wy, wz, dense_mode);
20 | }
21 | 
22 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
23 |   m.def("total_variation_add_grad", &total_variation_add_grad, "Add total variation grad");
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/cuda/total_variation_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cuda.h>
 4 | #include <cuda_runtime.h>
 5 | 
 6 | #include <vector>
 7 | 
 8 | template <typename scalar_t, typename bound_t>
 9 | __device__ __forceinline__ scalar_t clamp(const scalar_t v, const bound_t lo, const bound_t hi) {
10 |   return min(max(v, lo), hi);
11 | }
12 | 
13 | template <typename scalar_t, bool dense_mode>
14 | __global__ void total_variation_add_grad_cuda_kernel(
15 |     const scalar_t* __restrict__ param,
16 |     scalar_t* __restrict__ grad,
17 |     float wx, float wy, float wz,
18 |     const size_t sz_i, const size_t sz_j, const size_t sz_k, const size_t N) {
19 | 
20 |   const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
21 |   if(index<N && (dense_mode || grad[index]!=0)) {
22 |     const size_t k = index % sz_k;
23 |     const size_t j = index / sz_k % sz_j;
24 |     const size_t i = index / sz_k / sz_j % sz_i;
25 | 
26 |     float grad_to_add = 0;
27 |     grad_to_add += (k==0      ? 0 : wz * clamp(param[index]-param[index-1], -1.f, 1.f));
28 |     grad_to_add += (k==sz_k-1 ? 0 : wz * clamp(param[index]-param[index+1], -1.f, 1.f));
29 |     grad_to_add += (j==0      ? 0 : wy * clamp(param[index]-param[index-sz_k], -1.f, 1.f));
30 |     grad_to_add += (j==sz_j-1 ? 0 : wy * clamp(param[index]-param[index+sz_k], -1.f, 1.f));
31 |     grad_to_add += (i==0      ? 0 : wz * clamp(param[index]-param[index-sz_k*sz_j], -1.f, 1.f));
32 |     grad_to_add += (i==sz_i-1 ? 0 : wz * clamp(param[index]-param[index+sz_k*sz_j], -1.f, 1.f));
33 |     grad[index] += grad_to_add;
34 |   }
35 | }
36 | 
37 | void total_variation_add_grad_cuda(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode) {
38 |   const size_t N = param.numel();
39 |   const size_t sz_i = param.size(2);
40 |   const size_t sz_j = param.size(3);
41 |   const size_t sz_k = param.size(4);
42 |   const int threads = 256;
43 |   const int blocks = (N + threads - 1) / threads;
44 | 
45 |   wx /= 6;
46 |   wy /= 6;
47 |   wz /= 6;
48 | 
49 |   if(dense_mode) {
50 |     AT_DISPATCH_FLOATING_TYPES(param.type(), "total_variation_add_grad_cuda", ([&] {
51 |       total_variation_add_grad_cuda_kernel<scalar_t,true><<<blocks, threads>>>(
52 |           param.data<scalar_t>(),
53 |           grad.data<scalar_t>(),
54 |           wx, wy, wz,
55 |           sz_i, sz_j, sz_k, N);
56 |     }));
57 |   }
58 |   else {
59 |      AT_DISPATCH_FLOATING_TYPES(param.type(), "total_variation_add_grad_cuda", ([&] {
60 |       total_variation_add_grad_cuda_kernel<scalar_t,false><<<blocks, threads>>>(
61 |           param.data<scalar_t>(),
62 |           grad.data<scalar_t>(),
63 |           wx, wy, wz,
64 |           sz_i, sz_j, sz_k, N);
65 |     }));
66 |   }
67 | }
68 | 
69 | 


--------------------------------------------------------------------------------
/lib/cuda/ub360_utils.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | // CUDA forward declarations
 6 | 
 7 | torch::Tensor cumdist_thres_cuda(torch::Tensor dist, float thres);
 8 | 
 9 | // C++ interface
10 | 
11 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
12 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
13 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
14 | 
15 | torch::Tensor cumdist_thres(torch::Tensor dist, float thres) {
16 |   CHECK_INPUT(dist);
17 |   return cumdist_thres_cuda(dist, thres);
18 | }
19 | 
20 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
21 |   m.def("cumdist_thres", &cumdist_thres, "Generate mask for cumulative dist.");
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/lib/cuda/ub360_utils_kernel.cu:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cuda.h>
 4 | #include <cuda_runtime.h>
 5 | 
 6 | #include <vector>
 7 | 
 8 | /*
 9 |    helper function to skip oversampled points,
10 |    especially near the foreground scene bbox boundary
11 |    */
12 | template <typename scalar_t>
13 | __global__ void cumdist_thres_cuda_kernel(
14 |         scalar_t* __restrict__ dist,
15 |         const float thres,
16 |         const int n_rays,
17 |         const int n_pts,
18 |         bool* __restrict__ mask) {
19 |   const int i_ray = blockIdx.x * blockDim.x + threadIdx.x;
20 |   if(i_ray<n_rays) {
21 |     float cum_dist = 0;
22 |     const int i_s = i_ray * n_pts;
23 |     const int i_t = i_s + n_pts;
24 |     int i;
25 |     for(i=i_s; i<i_t; ++i) {
26 |       cum_dist += dist[i];
27 |       bool over = (cum_dist > thres);
28 |       cum_dist *= float(!over);
29 |       mask[i] = over;
30 |     }
31 |   }
32 | }
33 | 
34 | torch::Tensor cumdist_thres_cuda(torch::Tensor dist, float thres) {
35 |   const int n_rays = dist.size(0);
36 |   const int n_pts = dist.size(1);
37 |   const int threads = 256;
38 |   const int blocks = (n_rays + threads - 1) / threads;
39 |   auto mask = torch::zeros({n_rays, n_pts}, torch::dtype(torch::kBool).device(torch::kCUDA));
40 |   AT_DISPATCH_FLOATING_TYPES(dist.type(), "cumdist_thres_cuda", ([&] {
41 |     cumdist_thres_cuda_kernel<scalar_t><<<blocks, threads>>>(
42 |         dist.data<scalar_t>(), thres,
43 |         n_rays, n_pts,
44 |         mask.data<bool>());
45 |   }));
46 |   return mask;
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/lib/dbvgo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import functools
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | from torch_scatter import segment_coo
 11 | 
 12 | from . import grid
 13 | from .dvgo import Raw2Alpha, Alphas2Weights, render_utils_cuda
 14 | from .dmpigo import create_full_step_id
 15 | 
 16 | 
 17 | '''Model'''
 18 | class DirectBiVoxGO(nn.Module):
 19 |     def __init__(self, xyz_min, xyz_max,
 20 |                  num_voxels=0, num_voxels_base=0,
 21 |                  alpha_init=None,
 22 |                  mask_cache_world_size=None,
 23 |                  fast_color_thres=0, bg_preserve=0.5,
 24 |                  density_type='DenseGrid', k0_type='DenseGrid',
 25 |                  density_config={}, k0_config={},
 26 |                  rgbnet_dim=0, bg_use_mlp=True,
 27 |                  rgbnet_depth=3, rgbnet_width=128,
 28 |                  viewbase_pe=4,
 29 |                  **kwargs):
 30 |         super(DirectBiVoxGO, self).__init__()
 31 |         xyz_min = torch.Tensor(xyz_min)
 32 |         xyz_max = torch.Tensor(xyz_max)
 33 |         assert len(((xyz_max - xyz_min) * 100000).long().unique()), 'scene bbox must be a cube in DirectBiVoxGO'
 34 |         self.register_buffer('scene_center', (xyz_min + xyz_max) * 0.5)
 35 |         self.register_buffer('scene_radius', (xyz_max - xyz_min) * 0.5)
 36 |         self.register_buffer('xyz_min', torch.Tensor([-1,-1,-1]))
 37 |         self.register_buffer('xyz_max', torch.Tensor([1,1,1]))
 38 |         self.fast_color_thres = fast_color_thres
 39 |         self.bg_preserve = bg_preserve
 40 | 
 41 |         # determine based grid resolution
 42 |         self.num_voxels_base = num_voxels_base
 43 |         self.voxel_size_base = ((self.xyz_max - self.xyz_min).prod() / self.num_voxels_base).pow(1/3)
 44 | 
 45 |         # determine the density bias shift
 46 |         self.alpha_init = alpha_init
 47 |         self.register_buffer('act_shift', torch.FloatTensor([np.log(1/(1-alpha_init) - 1)]))
 48 |         print('dvgo: set density bias shift to', self.act_shift)
 49 | 
 50 |         # determine init grid resolution
 51 |         self._set_grid_resolution(num_voxels)
 52 | 
 53 |         # init density voxel grid
 54 |         self.density_type = density_type
 55 |         self.density_config = density_config
 56 |         self.density = nn.ModuleList([
 57 |             grid.create_grid(
 58 |                 density_type, channels=1, world_size=self.world_size,
 59 |                 xyz_min=self.xyz_min, xyz_max=self.xyz_max,
 60 |                 config=self.density_config)
 61 |             for _ in range(2)
 62 |         ])
 63 | 
 64 |         # init color representation
 65 |         self.rgbnet_kwargs = {
 66 |             'rgbnet_dim': rgbnet_dim,
 67 |             'rgbnet_depth': rgbnet_depth, 'rgbnet_width': rgbnet_width,
 68 |             'viewbase_pe': viewbase_pe,
 69 |         }
 70 |         self.k0_type = k0_type
 71 |         self.k0_config = k0_config
 72 |         if rgbnet_dim <= 0:
 73 |             # color voxel grid  (coarse stage)
 74 |             self.k0_dim = 3
 75 |             self.k0 = nn.ModuleList([
 76 |                 grid.create_grid(
 77 |                     k0_type, channels=self.k0_dim, world_size=self.world_size,
 78 |                     xyz_min=self.xyz_min, xyz_max=self.xyz_max,
 79 |                     config=self.k0_config)
 80 |                 for _ in range(2)
 81 |             ])
 82 |             self.rgbnet = None
 83 |         else:
 84 |             # feature voxel grid + shallow MLP  (fine stage)
 85 |             self.k0_dim = rgbnet_dim
 86 |             self.k0 = nn.ModuleList([
 87 |                 grid.create_grid(
 88 |                     k0_type, channels=self.k0_dim, world_size=self.world_size,
 89 |                     xyz_min=self.xyz_min, xyz_max=self.xyz_max,
 90 |                     config=self.k0_config)
 91 |                 for _ in range(2)
 92 |             ])
 93 |             self.register_buffer('viewfreq', torch.FloatTensor([(2**i) for i in range(viewbase_pe)]))
 94 |             dim0 = (3+3*viewbase_pe*2)
 95 |             dim0 += self.k0_dim
 96 |             self.rgbnet = nn.ModuleList([
 97 |                 nn.Sequential(
 98 |                     nn.Linear(dim0, rgbnet_width), nn.ReLU(inplace=True),
 99 |                     *[
100 |                         nn.Sequential(nn.Linear(rgbnet_width, rgbnet_width), nn.ReLU(inplace=True))
101 |                         for _ in range(rgbnet_depth-2)
102 |                     ],
103 |                     nn.Linear(rgbnet_width, 3),
104 |                 )
105 |                 for _ in range(2)
106 |             ])
107 |             nn.init.constant_(self.rgbnet[0][-1].bias, 0)
108 |             nn.init.constant_(self.rgbnet[1][-1].bias, 0)
109 |             if not bg_use_mlp:
110 |                 self.k0[1] = grid.create_grid(
111 |                     k0_type, channels=3, world_size=self.world_size,
112 |                     xyz_min=self.xyz_min, xyz_max=self.xyz_max,
113 |                     config=self.k0_config)
114 |                 self.rgbnet[1] = None
115 |             print('dvgo: feature voxel grid', self.k0)
116 |             print('dvgo: mlp', self.rgbnet)
117 | 
118 |         # Using the coarse geometry if provided (used to determine known free space and unknown space)
119 |         # Re-implement as occupancy grid (2021/1/31)
120 |         if mask_cache_world_size is None:
121 |             mask_cache_world_size = self.world_size
122 |         mask = torch.ones(list(mask_cache_world_size), dtype=torch.bool)
123 |         self.mask_cache = nn.ModuleList([
124 |             grid.MaskGrid(
125 |                 path=None, mask=mask,
126 |                 xyz_min=self.xyz_min, xyz_max=self.xyz_max)
127 |             for _ in range(2)
128 |         ])
129 | 
130 |     def _set_grid_resolution(self, num_voxels):
131 |         # Determine grid resolution
132 |         self.num_voxels = num_voxels
133 |         self.voxel_size = ((self.xyz_max - self.xyz_min).prod() / num_voxels).pow(1/3)
134 |         self.world_size = ((self.xyz_max - self.xyz_min) / self.voxel_size).long()
135 |         self.voxel_size_ratio = self.voxel_size / self.voxel_size_base
136 |         print('dvgo: voxel_size      ', self.voxel_size)
137 |         print('dvgo: world_size      ', self.world_size)
138 |         print('dvgo: voxel_size_base ', self.voxel_size_base)
139 |         print('dvgo: voxel_size_ratio', self.voxel_size_ratio)
140 | 
141 |     def get_kwargs(self):
142 |         return {
143 |             'xyz_min': self.xyz_min.cpu().numpy(),
144 |             'xyz_max': self.xyz_max.cpu().numpy(),
145 |             'num_voxels': self.num_voxels,
146 |             'num_voxels_base': self.num_voxels_base,
147 |             'alpha_init': self.alpha_init,
148 |             'voxel_size_ratio': self.voxel_size_ratio,
149 |             'mask_cache_world_size': list(self.mask_cache[0].mask.shape),
150 |             'fast_color_thres': self.fast_color_thres,
151 |             'density_type': self.density_type,
152 |             'k0_type': self.k0_type,
153 |             'density_config': self.density_config,
154 |             'k0_config': self.k0_config,
155 |             **self.rgbnet_kwargs,
156 |         }
157 | 
158 |     @torch.no_grad()
159 |     def scale_volume_grid(self, num_voxels):
160 |         print('dvgo: scale_volume_grid start')
161 |         ori_world_size = self.world_size
162 |         self._set_grid_resolution(num_voxels)
163 |         print('dvgo: scale_volume_grid scale world_size from', ori_world_size.tolist(), 'to', self.world_size.tolist())
164 | 
165 |         self.density[0].scale_volume_grid(self.world_size)
166 |         self.density[1].scale_volume_grid(self.world_size)
167 |         self.k0[0].scale_volume_grid(self.world_size)
168 |         self.k0[1].scale_volume_grid(self.world_size)
169 | 
170 |         if np.prod(list(self.world_size)) <= 256**3:
171 |             self_grid_xyz = torch.stack(torch.meshgrid(
172 |                 torch.linspace(self.xyz_min[0], self.xyz_max[0], self.world_size[0]),
173 |                 torch.linspace(self.xyz_min[1], self.xyz_max[1], self.world_size[1]),
174 |                 torch.linspace(self.xyz_min[2], self.xyz_max[2], self.world_size[2]),
175 |             ), -1)
176 |             self_alpha = [
177 |                 F.max_pool3d(self.activate_density(self.density[0].get_dense_grid()), kernel_size=3, padding=1, stride=1)[0,0],
178 |                 F.max_pool3d(self.activate_density(self.density[1].get_dense_grid()), kernel_size=3, padding=1, stride=1)[0,0],
179 |             ]
180 |             self.mask_cache = nn.ModuleList([
181 |                 grid.MaskGrid(
182 |                     path=None, mask=(self_alpha[i]>self.fast_color_thres),
183 |                     xyz_min=self.xyz_min, xyz_max=self.xyz_max)
184 |                 for i in range(2)
185 |             ])
186 | 
187 |         print('dvgo: scale_volume_grid finish')
188 | 
189 |     @torch.no_grad()
190 |     def update_occupancy_cache(self):
191 |         cache_grid_xyz = torch.stack(torch.meshgrid(
192 |             torch.linspace(self.xyz_min[0], self.xyz_max[0], self.mask_cache[0].mask.shape[0]),
193 |             torch.linspace(self.xyz_min[1], self.xyz_max[1], self.mask_cache[0].mask.shape[1]),
194 |             torch.linspace(self.xyz_min[2], self.xyz_max[2], self.mask_cache[0].mask.shape[2]),
195 |         ), -1)
196 |         for i in range(2):
197 |             cache_grid_density = self.density[i](cache_grid_xyz)[None,None]
198 |             cache_grid_alpha = self.activate_density(cache_grid_density)
199 |             cache_grid_alpha = F.max_pool3d(cache_grid_alpha, kernel_size=3, padding=1, stride=1)[0,0]
200 |             self.mask_cache[i].mask &= (cache_grid_alpha > self.fast_color_thres)
201 | 
202 |     def density_total_variation_add_grad(self, weight, dense_mode):
203 |         w = weight * self.world_size.max() / 128
204 |         self.density[0].total_variation_add_grad(w, w, w, dense_mode)
205 |         self.density[1].total_variation_add_grad(w, w, w, dense_mode)
206 | 
207 |     def k0_total_variation_add_grad(self, weight, dense_mode):
208 |         w = weight * self.world_size.max() / 128
209 |         self.k0[0].total_variation_add_grad(w, w, w, dense_mode)
210 |         self.k0[1].total_variation_add_grad(w, w, w, dense_mode)
211 | 
212 |     def activate_density(self, density, interval=None):
213 |         interval = interval if interval is not None else self.voxel_size_ratio
214 |         shape = density.shape
215 |         return Raw2Alpha.apply(density.flatten(), self.act_shift, interval).reshape(shape)
216 | 
217 |     def sample_ray(self, ori_rays_o, ori_rays_d, stepsize, is_train=False, **render_kwargs):
218 |         '''Sample query points on rays.
219 |         All the output points are sorted from near to far.
220 |         Input:
221 |             rays_o, rayd_d:   both in [N, 3] indicating ray configurations.
222 |             near, far:        the near and far distance of the rays.
223 |             stepsize:         the number of voxels of each sample step.
224 |         Output:
225 |             ray_pts:          [M, 3] storing all the sampled points.
226 |             ray_id:           [M]    the index of the ray of each point.
227 |             step_id:          [M]    the i'th step on a ray of each point.
228 |         '''
229 |         rays_o = (ori_rays_o - self.scene_center) / self.scene_radius
230 |         rays_d = ori_rays_d / ori_rays_d.norm(dim=-1, keepdim=True)
231 |         # sample query points in inter scene
232 |         near = 0
233 |         far = 2 * np.sqrt(3)
234 |         stepdist = stepsize * self.voxel_size
235 |         ray_pts, mask_outbbox, ray_id, step_id, N_steps, t_min, t_max = render_utils_cuda.sample_pts_on_rays(
236 |             rays_o, rays_d, self.xyz_min, self.xyz_max, near, far, stepdist)
237 |         mask_inbbox = ~mask_outbbox
238 |         ray_pts = ray_pts[mask_inbbox]
239 |         ray_id = ray_id[mask_inbbox]
240 |         step_id = step_id[mask_inbbox]
241 |         # sample query points in outer scene
242 |         N_outer = int(np.sqrt(3) / stepdist.item() * (1-self.bg_preserve)) + 1
243 |         ray_pts_outer = render_utils_cuda.sample_bg_pts_on_rays(
244 |             rays_o, rays_d, t_max, self.bg_preserve, N_outer)
245 |         return ray_pts, ray_id, step_id, ray_pts_outer
246 | 
247 |     def _forward(self, ray_pts, viewdirs, interval, N,
248 |                  mask_grid, density_grid, k0_grid, rgbnet=None,
249 |                  ray_id=None, step_id=None, prev_alphainv_last=None):
250 |         # preprocess for bg queries
251 |         if ray_id is None:
252 |             # ray_pts is [N, M, 3] in bg query
253 |             assert len(ray_pts.shape) == 3
254 |             ray_id, step_id = create_full_step_id(ray_pts.shape[:2])
255 |             ray_pts = ray_pts.reshape(-1, 3)
256 | 
257 |         # skip ray which is already occluded by fg
258 |         if prev_alphainv_last is not None:
259 |             mask = (prev_alphainv_last > self.fast_color_thres)
260 |             ray_id = ray_id.view(N,-1)[mask].view(-1)
261 |             step_id = step_id.view(N,-1)[mask].view(-1)
262 |             ray_pts = ray_pts.view(N,-1,3)[mask].view(-1,3)
263 | 
264 |         # skip known free space
265 |         mask = mask_grid(ray_pts)
266 |         ray_pts = ray_pts[mask]
267 |         ray_id = ray_id[mask]
268 |         step_id = step_id[mask]
269 | 
270 |         # query for alpha w/ post-activation
271 |         density = density_grid(ray_pts)
272 |         alpha = self.activate_density(density, interval)
273 |         if self.fast_color_thres > 0:
274 |             mask = (alpha > self.fast_color_thres)
275 |             ray_pts = ray_pts[mask]
276 |             ray_id = ray_id[mask]
277 |             step_id = step_id[mask]
278 |             density = density[mask]
279 |             alpha = alpha[mask]
280 | 
281 |         # compute accumulated transmittance
282 |         weights, alphainv_last = Alphas2Weights.apply(alpha, ray_id, N)
283 |         if self.fast_color_thres > 0:
284 |             mask = (weights > self.fast_color_thres)
285 |             weights = weights[mask]
286 |             alpha = alpha[mask]
287 |             ray_pts = ray_pts[mask]
288 |             ray_id = ray_id[mask]
289 |             step_id = step_id[mask]
290 | 
291 |         # query for color
292 |         k0 = k0_grid(ray_pts)
293 |         if rgbnet is None:
294 |             # no view-depend effect
295 |             rgb = torch.sigmoid(k0)
296 |         else:
297 |             # view-dependent color emission
298 |             k0_view = k0
299 |             viewdirs_emb = (viewdirs.unsqueeze(-1) * self.viewfreq).flatten(-2)
300 |             viewdirs_emb = torch.cat([viewdirs, viewdirs_emb.sin(), viewdirs_emb.cos()], -1)
301 |             viewdirs_emb = viewdirs_emb.flatten(0,-2)[ray_id]
302 |             rgb_feat = torch.cat([k0_view, viewdirs_emb], -1)
303 |             rgb_logit = rgbnet(rgb_feat)
304 |             rgb = torch.sigmoid(rgb_logit)
305 | 
306 |         return dict(
307 |             rgb=rgb, alpha=alpha, weights=weights, alphainv_last=alphainv_last,
308 |             ray_id=ray_id, step_id=step_id)
309 | 
310 |     def forward(self, rays_o, rays_d, viewdirs, global_step=None, **render_kwargs):
311 |         '''Volume rendering
312 |         @rays_o:   [N, 3] the starting point of the N shooting rays.
313 |         @rays_d:   [N, 3] the shooting direction of the N rays.
314 |         @viewdirs: [N, 3] viewing direction to compute positional embedding for MLP.
315 |         '''
316 |         assert len(rays_o.shape)==2 and rays_o.shape[-1]==3, 'Only suuport point queries in [N, 3] format'
317 | 
318 |         ret_dict = {}
319 |         N = len(rays_o)
320 | 
321 |         # sample points on rays
322 |         ray_pts, ray_id, step_id, ray_pts_outer = self.sample_ray(
323 |                 ori_rays_o=rays_o, ori_rays_d=rays_d, is_train=global_step is not None, **render_kwargs)
324 |         interval = render_kwargs['stepsize'] * self.voxel_size_ratio
325 | 
326 |         # query for foreground
327 |         fg = self._forward(
328 |                 ray_pts=ray_pts, viewdirs=viewdirs,
329 |                 interval=interval, N=N,
330 |                 mask_grid=self.mask_cache[0],
331 |                 density_grid=self.density[0],
332 |                 k0_grid=self.k0[0],
333 |                 rgbnet=self.rgbnet[0],
334 |                 ray_id=ray_id, step_id=step_id)
335 | 
336 |         # query for background
337 |         bg = self._forward(
338 |                 ray_pts=ray_pts_outer, viewdirs=viewdirs,
339 |                 interval=interval, N=N,
340 |                 mask_grid=self.mask_cache[1],
341 |                 density_grid=self.density[1],
342 |                 k0_grid=self.k0[1],
343 |                 rgbnet=self.rgbnet[1],
344 |                 prev_alphainv_last=fg['alphainv_last'])
345 | 
346 |         # Ray marching
347 |         rgb_marched_fg = segment_coo(
348 |                 src=(fg['weights'].unsqueeze(-1) * fg['rgb']),
349 |                 index=fg['ray_id'],
350 |                 out=torch.zeros([N, 3]),
351 |                 reduce='sum')
352 |         rgb_marched_bg = segment_coo(
353 |                 src=(bg['weights'].unsqueeze(-1) * bg['rgb']),
354 |                 index=bg['ray_id'],
355 |                 out=torch.zeros([N, 3]),
356 |                 reduce='sum')
357 |         rgb_marched = rgb_marched_fg + \
358 |                       fg['alphainv_last'].unsqueeze(-1) * rgb_marched_bg + \
359 |                       (fg['alphainv_last'] * bg['alphainv_last']).unsqueeze(-1) * render_kwargs['bg']
360 |         ret_dict.update({
361 |             'rgb_marched': rgb_marched,
362 |             'alphainv_last': torch.cat([fg['alphainv_last'], bg['alphainv_last']]),
363 |             'weights': torch.cat([fg['weights'], bg['weights']]),
364 |             'raw_alpha': torch.cat([fg['alpha'], bg['alpha']]),
365 |             'raw_rgb': torch.cat([fg['rgb'], bg['rgb']]),
366 |             'ray_id': torch.cat([fg['ray_id'], bg['ray_id']]),
367 |         })
368 | 
369 |         if render_kwargs.get('render_depth', False):
370 |             # TODO: add bg
371 |             with torch.no_grad():
372 |                 depth_fg = segment_coo(
373 |                         src=(fg['weights'] * fg['step_id']),
374 |                         index=fg['ray_id'],
375 |                         out=torch.zeros([N]),
376 |                         reduce='sum')
377 |                 depth_bg = segment_coo(
378 |                         src=(bg['weights'] * bg['step_id']),
379 |                         index=bg['ray_id'],
380 |                         out=torch.zeros([N]),
381 |                         reduce='sum')
382 |                 depth_fg_last = segment_coo(
383 |                         src=fg['step_id'].float(),
384 |                         index=fg['ray_id'],
385 |                         out=torch.zeros([N]),
386 |                         reduce='max')
387 |                 depth_bg_last = segment_coo(
388 |                         src=bg['step_id'].float(),
389 |                         index=bg['ray_id'],
390 |                         out=depth_fg_last.clone(),
391 |                         reduce='max')
392 |                 depth = depth_fg + \
393 |                         fg['alphainv_last'] * (1 + depth_fg_last + depth_bg) + \
394 |                         fg['alphainv_last'] * bg['alphainv_last'] * (2 + depth_fg_last + depth_bg_last)
395 |             ret_dict.update({'depth': depth})
396 | 
397 |         return ret_dict
398 | 
399 | 


--------------------------------------------------------------------------------
/lib/dmpigo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import functools
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torch import Tensor
 10 | from einops import rearrange
 11 | from torch_scatter import scatter_add, segment_coo
 12 | 
 13 | from . import grid
 14 | from .dvgo import Raw2Alpha, Alphas2Weights, render_utils_cuda
 15 | 
 16 | 
 17 | '''Model'''
 18 | class DirectMPIGO(torch.nn.Module):
 19 |     def __init__(self, xyz_min, xyz_max,
 20 |                  num_voxels=0, mpi_depth=0,
 21 |                  mask_cache_path=None, mask_cache_thres=1e-3, mask_cache_world_size=None,
 22 |                  fast_color_thres=0,
 23 |                  density_type='DenseGrid', k0_type='DenseGrid',
 24 |                  density_config={}, k0_config={},
 25 |                  rgbnet_dim=0,
 26 |                  rgbnet_depth=3, rgbnet_width=128,
 27 |                  viewbase_pe=0,
 28 |                  **kwargs):
 29 |         super(DirectMPIGO, self).__init__()
 30 |         self.register_buffer('xyz_min', torch.Tensor(xyz_min))
 31 |         self.register_buffer('xyz_max', torch.Tensor(xyz_max))
 32 |         self.fast_color_thres = fast_color_thres
 33 | 
 34 |         # determine init grid resolution
 35 |         self._set_grid_resolution(num_voxels, mpi_depth)
 36 | 
 37 |         # init density voxel grid
 38 |         self.density_type = density_type
 39 |         self.density_config = density_config
 40 |         self.density = grid.create_grid(
 41 |                 density_type, channels=1, world_size=self.world_size,
 42 |                 xyz_min=self.xyz_min, xyz_max=self.xyz_max,
 43 |                 config=self.density_config)
 44 | 
 45 |         # init density bias so that the initial contribution (the alpha values)
 46 |         # of each query points on a ray is equal
 47 |         self.act_shift = grid.DenseGrid(
 48 |                 channels=1, world_size=[1,1,mpi_depth],
 49 |                 xyz_min=xyz_min, xyz_max=xyz_max)
 50 |         self.act_shift.grid.requires_grad = False
 51 |         with torch.no_grad():
 52 |             g = np.full([mpi_depth], 1./mpi_depth - 1e-6)
 53 |             p = [1-g[0]]
 54 |             for i in range(1, len(g)):
 55 |                 p.append((1-g[:i+1].sum())/(1-g[:i].sum()))
 56 |             for i in range(len(p)):
 57 |                 self.act_shift.grid[..., i].fill_(np.log(p[i] ** (-1/self.voxel_size_ratio) - 1))
 58 | 
 59 |         # init color representation
 60 |         # feature voxel grid + shallow MLP  (fine stage)
 61 |         self.rgbnet_kwargs = {
 62 |             'rgbnet_dim': rgbnet_dim,
 63 |             'rgbnet_depth': rgbnet_depth, 'rgbnet_width': rgbnet_width,
 64 |             'viewbase_pe': viewbase_pe,
 65 |         }
 66 |         self.k0_type = k0_type
 67 |         self.k0_config = k0_config
 68 |         if rgbnet_dim <= 0:
 69 |             # color voxel grid  (coarse stage)
 70 |             self.k0_dim = 3
 71 |             self.k0 = grid.create_grid(
 72 |                 k0_type, channels=self.k0_dim, world_size=self.world_size,
 73 |                 xyz_min=self.xyz_min, xyz_max=self.xyz_max,
 74 |                 config=self.k0_config)
 75 |             self.rgbnet = None
 76 |         else:
 77 |             self.k0_dim = rgbnet_dim
 78 |             self.k0 = grid.create_grid(
 79 |                     k0_type, channels=self.k0_dim, world_size=self.world_size,
 80 |                     xyz_min=self.xyz_min, xyz_max=self.xyz_max,
 81 |                     config=self.k0_config)
 82 |             self.register_buffer('viewfreq', torch.FloatTensor([(2**i) for i in range(viewbase_pe)]))
 83 |             dim0 = (3+3*viewbase_pe*2) + self.k0_dim
 84 |             self.rgbnet = nn.Sequential(
 85 |                 nn.Linear(dim0, rgbnet_width), nn.ReLU(inplace=True),
 86 |                 *[
 87 |                     nn.Sequential(nn.Linear(rgbnet_width, rgbnet_width), nn.ReLU(inplace=True))
 88 |                     for _ in range(rgbnet_depth-2)
 89 |                 ],
 90 |                 nn.Linear(rgbnet_width, 3),
 91 |             )
 92 |             nn.init.constant_(self.rgbnet[-1].bias, 0)
 93 | 
 94 |         print('dmpigo: densitye grid', self.density)
 95 |         print('dmpigo: feature grid', self.k0)
 96 |         print('dmpigo: mlp', self.rgbnet)
 97 | 
 98 |         # Using the coarse geometry if provided (used to determine known free space and unknown space)
 99 |         # Re-implement as occupancy grid (2021/1/31)
100 |         self.mask_cache_path = mask_cache_path
101 |         self.mask_cache_thres = mask_cache_thres
102 |         if mask_cache_world_size is None:
103 |             mask_cache_world_size = self.world_size
104 |         if mask_cache_path is not None and mask_cache_path:
105 |             mask_cache = grid.MaskGrid(
106 |                     path=mask_cache_path,
107 |                     mask_cache_thres=mask_cache_thres).to(self.xyz_min.device)
108 |             self_grid_xyz = torch.stack(torch.meshgrid(
109 |                 torch.linspace(self.xyz_min[0], self.xyz_max[0], mask_cache_world_size[0]),
110 |                 torch.linspace(self.xyz_min[1], self.xyz_max[1], mask_cache_world_size[1]),
111 |                 torch.linspace(self.xyz_min[2], self.xyz_max[2], mask_cache_world_size[2]),
112 |             ), -1)
113 |             mask = mask_cache(self_grid_xyz)
114 |         else:
115 |             mask = torch.ones(list(mask_cache_world_size), dtype=torch.bool)
116 |         self.mask_cache = grid.MaskGrid(
117 |                 path=None, mask=mask,
118 |                 xyz_min=self.xyz_min, xyz_max=self.xyz_max)
119 | 
120 |     def _set_grid_resolution(self, num_voxels, mpi_depth):
121 |         # Determine grid resolution
122 |         self.num_voxels = num_voxels
123 |         self.mpi_depth = mpi_depth
124 |         r = (num_voxels / self.mpi_depth / (self.xyz_max - self.xyz_min)[:2].prod()).sqrt()
125 |         self.world_size = torch.zeros(3, dtype=torch.long)
126 |         self.world_size[:2] = (self.xyz_max - self.xyz_min)[:2] * r
127 |         self.world_size[2] = self.mpi_depth
128 |         self.voxel_size_ratio = 256. / mpi_depth
129 |         print('dmpigo: world_size      ', self.world_size)
130 |         print('dmpigo: voxel_size_ratio', self.voxel_size_ratio)
131 | 
132 |     def get_kwargs(self):
133 |         return {
134 |             'xyz_min': self.xyz_min.cpu().numpy(),
135 |             'xyz_max': self.xyz_max.cpu().numpy(),
136 |             'num_voxels': self.num_voxels,
137 |             'mpi_depth': self.mpi_depth,
138 |             'voxel_size_ratio': self.voxel_size_ratio,
139 |             'mask_cache_path': self.mask_cache_path,
140 |             'mask_cache_thres': self.mask_cache_thres,
141 |             'mask_cache_world_size': list(self.mask_cache.mask.shape),
142 |             'fast_color_thres': self.fast_color_thres,
143 |             'density_type': self.density_type,
144 |             'k0_type': self.k0_type,
145 |             'density_config': self.density_config,
146 |             'k0_config': self.k0_config,
147 |             **self.rgbnet_kwargs,
148 |         }
149 | 
150 |     @torch.no_grad()
151 |     def scale_volume_grid(self, num_voxels, mpi_depth):
152 |         print('dmpigo: scale_volume_grid start')
153 |         ori_world_size = self.world_size
154 |         self._set_grid_resolution(num_voxels, mpi_depth)
155 |         print('dmpigo: scale_volume_grid scale world_size from', ori_world_size.tolist(), 'to', self.world_size.tolist())
156 | 
157 |         self.density.scale_volume_grid(self.world_size)
158 |         self.k0.scale_volume_grid(self.world_size)
159 | 
160 |         if np.prod(self.world_size.tolist()) <= 256**3:
161 |             self_grid_xyz = torch.stack(torch.meshgrid(
162 |                 torch.linspace(self.xyz_min[0], self.xyz_max[0], self.world_size[0]),
163 |                 torch.linspace(self.xyz_min[1], self.xyz_max[1], self.world_size[1]),
164 |                 torch.linspace(self.xyz_min[2], self.xyz_max[2], self.world_size[2]),
165 |             ), -1)
166 |             dens = self.density.get_dense_grid() + self.act_shift.grid
167 |             self_alpha = F.max_pool3d(self.activate_density(dens), kernel_size=3, padding=1, stride=1)[0,0]
168 |             self.mask_cache = grid.MaskGrid(
169 |                     path=None, mask=self.mask_cache(self_grid_xyz) & (self_alpha>self.fast_color_thres),
170 |                     xyz_min=self.xyz_min, xyz_max=self.xyz_max)
171 | 
172 |         print('dmpigo: scale_volume_grid finish')
173 | 
174 |     @torch.no_grad()
175 |     def update_occupancy_cache(self):
176 |         ori_p = self.mask_cache.mask.float().mean().item()
177 |         cache_grid_xyz = torch.stack(torch.meshgrid(
178 |             torch.linspace(self.xyz_min[0], self.xyz_max[0], self.mask_cache.mask.shape[0]),
179 |             torch.linspace(self.xyz_min[1], self.xyz_max[1], self.mask_cache.mask.shape[1]),
180 |             torch.linspace(self.xyz_min[2], self.xyz_max[2], self.mask_cache.mask.shape[2]),
181 |         ), -1)
182 |         cache_grid_density = self.density(cache_grid_xyz)[None,None]
183 |         cache_grid_alpha = self.activate_density(cache_grid_density)
184 |         cache_grid_alpha = F.max_pool3d(cache_grid_alpha, kernel_size=3, padding=1, stride=1)[0,0]
185 |         self.mask_cache.mask &= (cache_grid_alpha > self.fast_color_thres)
186 |         new_p = self.mask_cache.mask.float().mean().item()
187 |         print(f'dmpigo: update mask_cache {ori_p:.4f} => {new_p:.4f}')
188 | 
189 |     def update_occupancy_cache_lt_nviews(self, rays_o_tr, rays_d_tr, imsz, render_kwargs, maskout_lt_nviews):
190 |         print('dmpigo: update mask_cache lt_nviews start')
191 |         eps_time = time.time()
192 |         count = torch.zeros_like(self.density.get_dense_grid()).long()
193 |         device = count.device
194 |         for rays_o_, rays_d_ in zip(rays_o_tr.split(imsz), rays_d_tr.split(imsz)):
195 |             ones = grid.DenseGrid(1, self.world_size, self.xyz_min, self.xyz_max)
196 |             for rays_o, rays_d in zip(rays_o_.split(8192), rays_d_.split(8192)):
197 |                 ray_pts, ray_id, step_id, N_samples = self.sample_ray(
198 |                         rays_o=rays_o.to(device), rays_d=rays_d.to(device), **render_kwargs)
199 |                 ones(ray_pts).sum().backward()
200 |             count.data += (ones.grid.grad > 1)
201 |         ori_p = self.mask_cache.mask.float().mean().item()
202 |         self.mask_cache.mask &= (count >= maskout_lt_nviews)[0,0]
203 |         new_p = self.mask_cache.mask.float().mean().item()
204 |         print(f'dmpigo: update mask_cache {ori_p:.4f} => {new_p:.4f}')
205 |         torch.cuda.empty_cache()
206 |         eps_time = time.time() - eps_time
207 |         print(f'dmpigo: update mask_cache lt_nviews finish (eps time:', eps_time, 'sec)')
208 | 
209 |     def density_total_variation_add_grad(self, weight, dense_mode):
210 |         wxy = weight * self.world_size[:2].max() / 128
211 |         wz = weight * self.mpi_depth / 128
212 |         self.density.total_variation_add_grad(wxy, wxy, wz, dense_mode)
213 | 
214 |     def k0_total_variation_add_grad(self, weight, dense_mode):
215 |         wxy = weight * self.world_size[:2].max() / 128
216 |         wz = weight * self.mpi_depth / 128
217 |         self.k0.total_variation_add_grad(wxy, wxy, wz, dense_mode)
218 | 
219 |     def activate_density(self, density, interval=None):
220 |         interval = interval if interval is not None else self.voxel_size_ratio
221 |         shape = density.shape
222 |         return Raw2Alpha.apply(density.flatten(), 0, interval).reshape(shape)
223 | 
224 |     def sample_ray(self, rays_o, rays_d, near, far, stepsize, **render_kwargs):
225 |         '''Sample query points on rays.
226 |         All the output points are sorted from near to far.
227 |         Input:
228 |             rays_o, rayd_d:   both in [N, 3] indicating ray configurations.
229 |             near, far:        the near and far distance of the rays.
230 |             stepsize:         the number of voxels of each sample step.
231 |         Output:
232 |             ray_pts:          [M, 3] storing all the sampled points.
233 |             ray_id:           [M]    the index of the ray of each point.
234 |             step_id:          [M]    the i'th step on a ray of each point.
235 |         '''
236 |         assert near==0 and far==1
237 |         rays_o = rays_o.contiguous()
238 |         rays_d = rays_d.contiguous()
239 |         N_samples = int((self.mpi_depth-1)/stepsize) + 1
240 |         ray_pts, mask_outbbox = render_utils_cuda.sample_ndc_pts_on_rays(
241 |             rays_o, rays_d, self.xyz_min, self.xyz_max, N_samples)
242 |         mask_inbbox = ~mask_outbbox
243 |         ray_pts = ray_pts[mask_inbbox]
244 |         if mask_inbbox.all():
245 |             ray_id, step_id = create_full_step_id(mask_inbbox.shape)
246 |         else:
247 |             ray_id = torch.arange(mask_inbbox.shape[0]).view(-1,1).expand_as(mask_inbbox)[mask_inbbox]
248 |             step_id = torch.arange(mask_inbbox.shape[1]).view(1,-1).expand_as(mask_inbbox)[mask_inbbox]
249 |         return ray_pts, ray_id, step_id, N_samples
250 | 
251 |     def forward(self, rays_o, rays_d, viewdirs, global_step=None, **render_kwargs):
252 |         '''Volume rendering
253 |         @rays_o:   [N, 3] the starting point of the N shooting rays.
254 |         @rays_d:   [N, 3] the shooting direction of the N rays.
255 |         @viewdirs: [N, 3] viewing direction to compute positional embedding for MLP.
256 |         '''
257 |         assert len(rays_o.shape)==2 and rays_o.shape[-1]==3, 'Only suuport point queries in [N, 3] format'
258 | 
259 |         ret_dict = {}
260 |         N = len(rays_o)
261 | 
262 |         # sample points on rays
263 |         ray_pts, ray_id, step_id, N_samples = self.sample_ray(
264 |                 rays_o=rays_o, rays_d=rays_d, **render_kwargs)
265 |         interval = render_kwargs['stepsize'] * self.voxel_size_ratio
266 | 
267 |         # skip known free space
268 |         if self.mask_cache is not None:
269 |             mask = self.mask_cache(ray_pts)
270 |             ray_pts = ray_pts[mask]
271 |             ray_id = ray_id[mask]
272 |             step_id = step_id[mask]
273 | 
274 |         # query for alpha w/ post-activation
275 |         density = self.density(ray_pts) + self.act_shift(ray_pts)
276 |         alpha = self.activate_density(density, interval)
277 |         if self.fast_color_thres > 0:
278 |             mask = (alpha > self.fast_color_thres)
279 |             ray_pts = ray_pts[mask]
280 |             ray_id = ray_id[mask]
281 |             step_id = step_id[mask]
282 |             alpha = alpha[mask]
283 | 
284 |         # compute accumulated transmittance
285 |         weights, alphainv_last = Alphas2Weights.apply(alpha, ray_id, N)
286 |         if self.fast_color_thres > 0:
287 |             mask = (weights > self.fast_color_thres)
288 |             ray_pts = ray_pts[mask]
289 |             ray_id = ray_id[mask]
290 |             step_id = step_id[mask]
291 |             alpha = alpha[mask]
292 |             weights = weights[mask]
293 | 
294 |         # query for color
295 |         vox_emb = self.k0(ray_pts)
296 | 
297 |         if self.rgbnet is None:
298 |             # no view-depend effect
299 |             rgb = torch.sigmoid(vox_emb)
300 |         else:
301 |             # view-dependent color emission
302 |             viewdirs_emb = (viewdirs.unsqueeze(-1) * self.viewfreq).flatten(-2)
303 |             viewdirs_emb = torch.cat([viewdirs, viewdirs_emb.sin(), viewdirs_emb.cos()], -1)
304 |             viewdirs_emb = viewdirs_emb[ray_id]
305 |             rgb_feat = torch.cat([vox_emb, viewdirs_emb], -1)
306 |             rgb_logit = self.rgbnet(rgb_feat)
307 |             rgb = torch.sigmoid(rgb_logit)
308 | 
309 |         # Ray marching
310 |         rgb_marched = segment_coo(
311 |                 src=(weights.unsqueeze(-1) * rgb),
312 |                 index=ray_id,
313 |                 out=torch.zeros([N, 3]),
314 |                 reduce='sum')
315 |         if render_kwargs.get('rand_bkgd', False) and global_step is not None:
316 |             rgb_marched += (alphainv_last.unsqueeze(-1) * torch.rand_like(rgb_marched))
317 |         else:
318 |             rgb_marched += (alphainv_last.unsqueeze(-1) * render_kwargs['bg'])
319 |         s = (step_id+0.5) / N_samples
320 |         ret_dict.update({
321 |             'alphainv_last': alphainv_last,
322 |             'weights': weights,
323 |             'rgb_marched': rgb_marched,
324 |             'raw_alpha': alpha,
325 |             'raw_rgb': rgb,
326 |             'ray_id': ray_id,
327 |             'n_max': N_samples,
328 |             's': s,
329 |         })
330 | 
331 |         if render_kwargs.get('render_depth', False):
332 |             with torch.no_grad():
333 |                 depth = segment_coo(
334 |                         src=(weights * s),
335 |                         index=ray_id,
336 |                         out=torch.zeros([N]),
337 |                         reduce='sum')
338 |             ret_dict.update({'depth': depth})
339 | 
340 |         return ret_dict
341 | 
342 | 
343 | @functools.lru_cache(maxsize=128)
344 | def create_full_step_id(shape):
345 |     ray_id = torch.arange(shape[0]).view(-1,1).expand(shape).flatten()
346 |     step_id = torch.arange(shape[1]).view(1,-1).expand(shape).flatten()
347 |     return ray_id, step_id
348 | 
349 | 


--------------------------------------------------------------------------------
/lib/grid.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import functools
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | from torch.utils.cpp_extension import load
 11 | parent_dir = os.path.dirname(os.path.abspath(__file__))
 12 | render_utils_cuda = load(
 13 |         name='render_utils_cuda',
 14 |         sources=[
 15 |             os.path.join(parent_dir, path)
 16 |             for path in ['cuda/render_utils.cpp', 'cuda/render_utils_kernel.cu']],
 17 |         verbose=True)
 18 | 
 19 | total_variation_cuda = load(
 20 |         name='total_variation_cuda',
 21 |         sources=[
 22 |             os.path.join(parent_dir, path)
 23 |             for path in ['cuda/total_variation.cpp', 'cuda/total_variation_kernel.cu']],
 24 |         verbose=True)
 25 | 
 26 | 
 27 | def create_grid(type, **kwargs):
 28 |     if type == 'DenseGrid':
 29 |         return DenseGrid(**kwargs)
 30 |     elif type == 'TensoRFGrid':
 31 |         return TensoRFGrid(**kwargs)
 32 |     else:
 33 |         raise NotImplementedError
 34 | 
 35 | 
 36 | ''' Dense 3D grid
 37 | '''
 38 | class DenseGrid(nn.Module):
 39 |     def __init__(self, channels, world_size, xyz_min, xyz_max, **kwargs):
 40 |         super(DenseGrid, self).__init__()
 41 |         self.channels = channels
 42 |         self.world_size = world_size
 43 |         self.register_buffer('xyz_min', torch.Tensor(xyz_min))
 44 |         self.register_buffer('xyz_max', torch.Tensor(xyz_max))
 45 |         self.grid = nn.Parameter(torch.zeros([1, channels, *world_size]))
 46 | 
 47 |     def forward(self, xyz):
 48 |         '''
 49 |         xyz: global coordinates to query
 50 |         '''
 51 |         shape = xyz.shape[:-1]
 52 |         xyz = xyz.reshape(1,1,1,-1,3)
 53 |         ind_norm = ((xyz - self.xyz_min) / (self.xyz_max - self.xyz_min)).flip((-1,)) * 2 - 1
 54 |         out = F.grid_sample(self.grid, ind_norm, mode='bilinear', align_corners=True)
 55 |         out = out.reshape(self.channels,-1).T.reshape(*shape,self.channels)
 56 |         if self.channels == 1:
 57 |             out = out.squeeze(-1)
 58 |         return out
 59 | 
 60 |     def scale_volume_grid(self, new_world_size):
 61 |         if self.channels == 0:
 62 |             self.grid = nn.Parameter(torch.zeros([1, self.channels, *new_world_size]))
 63 |         else:
 64 |             self.grid = nn.Parameter(
 65 |                 F.interpolate(self.grid.data, size=tuple(new_world_size), mode='trilinear', align_corners=True))
 66 | 
 67 |     def total_variation_add_grad(self, wx, wy, wz, dense_mode):
 68 |         '''Add gradients by total variation loss in-place'''
 69 |         total_variation_cuda.total_variation_add_grad(
 70 |             self.grid, self.grid.grad, wx, wy, wz, dense_mode)
 71 | 
 72 |     def get_dense_grid(self):
 73 |         return self.grid
 74 | 
 75 |     @torch.no_grad()
 76 |     def __isub__(self, val):
 77 |         self.grid.data -= val
 78 |         return self
 79 | 
 80 |     def extra_repr(self):
 81 |         return f'channels={self.channels}, world_size={self.world_size.tolist()}'
 82 | 
 83 | 
 84 | ''' Vector-Matrix decomposited grid
 85 | See TensoRF: Tensorial Radiance Fields (https://arxiv.org/abs/2203.09517)
 86 | '''
 87 | class TensoRFGrid(nn.Module):
 88 |     def __init__(self, channels, world_size, xyz_min, xyz_max, config):
 89 |         super(TensoRFGrid, self).__init__()
 90 |         self.channels = channels
 91 |         self.world_size = world_size
 92 |         self.config = config
 93 |         self.register_buffer('xyz_min', torch.Tensor(xyz_min))
 94 |         self.register_buffer('xyz_max', torch.Tensor(xyz_max))
 95 |         X, Y, Z = world_size
 96 |         R = config['n_comp']
 97 |         Rxy = config.get('n_comp_xy', R)
 98 |         self.xy_plane = nn.Parameter(torch.randn([1, Rxy, X, Y]) * 0.1)
 99 |         self.xz_plane = nn.Parameter(torch.randn([1, R, X, Z]) * 0.1)
100 |         self.yz_plane = nn.Parameter(torch.randn([1, R, Y, Z]) * 0.1)
101 |         self.x_vec = nn.Parameter(torch.randn([1, R, X, 1]) * 0.1)
102 |         self.y_vec = nn.Parameter(torch.randn([1, R, Y, 1]) * 0.1)
103 |         self.z_vec = nn.Parameter(torch.randn([1, Rxy, Z, 1]) * 0.1)
104 |         if self.channels > 1:
105 |             self.f_vec = nn.Parameter(torch.ones([R+R+Rxy, channels]))
106 |             nn.init.kaiming_uniform_(self.f_vec, a=np.sqrt(5))
107 | 
108 |     def forward(self, xyz):
109 |         '''
110 |         xyz: global coordinates to query
111 |         '''
112 |         shape = xyz.shape[:-1]
113 |         xyz = xyz.reshape(1,1,-1,3)
114 |         ind_norm = (xyz - self.xyz_min) / (self.xyz_max - self.xyz_min) * 2 - 1
115 |         ind_norm = torch.cat([ind_norm, torch.zeros_like(ind_norm[...,[0]])], dim=-1)
116 |         if self.channels > 1:
117 |             out = compute_tensorf_feat(
118 |                     self.xy_plane, self.xz_plane, self.yz_plane,
119 |                     self.x_vec, self.y_vec, self.z_vec, self.f_vec, ind_norm)
120 |             out = out.reshape(*shape,self.channels)
121 |         else:
122 |             out = compute_tensorf_val(
123 |                     self.xy_plane, self.xz_plane, self.yz_plane,
124 |                     self.x_vec, self.y_vec, self.z_vec, ind_norm)
125 |             out = out.reshape(*shape)
126 |         return out
127 | 
128 |     def scale_volume_grid(self, new_world_size):
129 |         if self.channels == 0:
130 |             return
131 |         X, Y, Z = new_world_size
132 |         self.xy_plane = nn.Parameter(F.interpolate(self.xy_plane.data, size=[X,Y], mode='bilinear', align_corners=True))
133 |         self.xz_plane = nn.Parameter(F.interpolate(self.xz_plane.data, size=[X,Z], mode='bilinear', align_corners=True))
134 |         self.yz_plane = nn.Parameter(F.interpolate(self.yz_plane.data, size=[Y,Z], mode='bilinear', align_corners=True))
135 |         self.x_vec = nn.Parameter(F.interpolate(self.x_vec.data, size=[X,1], mode='bilinear', align_corners=True))
136 |         self.y_vec = nn.Parameter(F.interpolate(self.y_vec.data, size=[Y,1], mode='bilinear', align_corners=True))
137 |         self.z_vec = nn.Parameter(F.interpolate(self.z_vec.data, size=[Z,1], mode='bilinear', align_corners=True))
138 | 
139 |     def total_variation_add_grad(self, wx, wy, wz, dense_mode):
140 |         '''Add gradients by total variation loss in-place'''
141 |         loss = wx * F.smooth_l1_loss(self.xy_plane[:,:,1:], self.xy_plane[:,:,:-1], reduction='sum') +\
142 |                wy * F.smooth_l1_loss(self.xy_plane[:,:,:,1:], self.xy_plane[:,:,:,:-1], reduction='sum') +\
143 |                wx * F.smooth_l1_loss(self.xz_plane[:,:,1:], self.xz_plane[:,:,:-1], reduction='sum') +\
144 |                wz * F.smooth_l1_loss(self.xz_plane[:,:,:,1:], self.xz_plane[:,:,:,:-1], reduction='sum') +\
145 |                wy * F.smooth_l1_loss(self.yz_plane[:,:,1:], self.yz_plane[:,:,:-1], reduction='sum') +\
146 |                wz * F.smooth_l1_loss(self.yz_plane[:,:,:,1:], self.yz_plane[:,:,:,:-1], reduction='sum') +\
147 |                wx * F.smooth_l1_loss(self.x_vec[:,:,1:], self.x_vec[:,:,:-1], reduction='sum') +\
148 |                wy * F.smooth_l1_loss(self.y_vec[:,:,1:], self.y_vec[:,:,:-1], reduction='sum') +\
149 |                wz * F.smooth_l1_loss(self.z_vec[:,:,1:], self.z_vec[:,:,:-1], reduction='sum')
150 |         loss /= 6
151 |         loss.backward()
152 | 
153 |     def get_dense_grid(self):
154 |         if self.channels > 1:
155 |             feat = torch.cat([
156 |                 torch.einsum('rxy,rz->rxyz', self.xy_plane[0], self.z_vec[0,:,:,0]),
157 |                 torch.einsum('rxz,ry->rxyz', self.xz_plane[0], self.y_vec[0,:,:,0]),
158 |                 torch.einsum('ryz,rx->rxyz', self.yz_plane[0], self.x_vec[0,:,:,0]),
159 |             ])
160 |             grid = torch.einsum('rxyz,rc->cxyz', feat, self.f_vec)[None]
161 |         else:
162 |             grid = torch.einsum('rxy,rz->xyz', self.xy_plane[0], self.z_vec[0,:,:,0]) + \
163 |                    torch.einsum('rxz,ry->xyz', self.xz_plane[0], self.y_vec[0,:,:,0]) + \
164 |                    torch.einsum('ryz,rx->xyz', self.yz_plane[0], self.x_vec[0,:,:,0])
165 |             grid = grid[None,None]
166 |         return grid
167 | 
168 |     def extra_repr(self):
169 |         return f'channels={self.channels}, world_size={self.world_size.tolist()}, n_comp={self.config["n_comp"]}'
170 | 
171 | def compute_tensorf_feat(xy_plane, xz_plane, yz_plane, x_vec, y_vec, z_vec, f_vec, ind_norm):
172 |     # Interp feature (feat shape: [n_pts, n_comp])
173 |     xy_feat = F.grid_sample(xy_plane, ind_norm[:,:,:,[1,0]], mode='bilinear', align_corners=True).flatten(0,2).T
174 |     xz_feat = F.grid_sample(xz_plane, ind_norm[:,:,:,[2,0]], mode='bilinear', align_corners=True).flatten(0,2).T
175 |     yz_feat = F.grid_sample(yz_plane, ind_norm[:,:,:,[2,1]], mode='bilinear', align_corners=True).flatten(0,2).T
176 |     x_feat = F.grid_sample(x_vec, ind_norm[:,:,:,[3,0]], mode='bilinear', align_corners=True).flatten(0,2).T
177 |     y_feat = F.grid_sample(y_vec, ind_norm[:,:,:,[3,1]], mode='bilinear', align_corners=True).flatten(0,2).T
178 |     z_feat = F.grid_sample(z_vec, ind_norm[:,:,:,[3,2]], mode='bilinear', align_corners=True).flatten(0,2).T
179 |     # Aggregate components
180 |     feat = torch.cat([
181 |         xy_feat * z_feat,
182 |         xz_feat * y_feat,
183 |         yz_feat * x_feat,
184 |     ], dim=-1)
185 |     feat = torch.mm(feat, f_vec)
186 |     return feat
187 | 
188 | def compute_tensorf_val(xy_plane, xz_plane, yz_plane, x_vec, y_vec, z_vec, ind_norm):
189 |     # Interp feature (feat shape: [n_pts, n_comp])
190 |     xy_feat = F.grid_sample(xy_plane, ind_norm[:,:,:,[1,0]], mode='bilinear', align_corners=True).flatten(0,2).T
191 |     xz_feat = F.grid_sample(xz_plane, ind_norm[:,:,:,[2,0]], mode='bilinear', align_corners=True).flatten(0,2).T
192 |     yz_feat = F.grid_sample(yz_plane, ind_norm[:,:,:,[2,1]], mode='bilinear', align_corners=True).flatten(0,2).T
193 |     x_feat = F.grid_sample(x_vec, ind_norm[:,:,:,[3,0]], mode='bilinear', align_corners=True).flatten(0,2).T
194 |     y_feat = F.grid_sample(y_vec, ind_norm[:,:,:,[3,1]], mode='bilinear', align_corners=True).flatten(0,2).T
195 |     z_feat = F.grid_sample(z_vec, ind_norm[:,:,:,[3,2]], mode='bilinear', align_corners=True).flatten(0,2).T
196 |     # Aggregate components
197 |     feat = (xy_feat * z_feat).sum(-1) + (xz_feat * y_feat).sum(-1) + (yz_feat * x_feat).sum(-1)
198 |     return feat
199 | 
200 | 
201 | ''' Mask grid
202 | It supports query for the known free space and unknown space.
203 | '''
204 | class MaskGrid(nn.Module):
205 |     def __init__(self, path=None, mask_cache_thres=None, mask=None, xyz_min=None, xyz_max=None):
206 |         super(MaskGrid, self).__init__()
207 |         if path is not None:
208 |             st = torch.load(path)
209 |             self.mask_cache_thres = mask_cache_thres
210 |             density = F.max_pool3d(st['model_state_dict']['density.grid'], kernel_size=3, padding=1, stride=1)
211 |             alpha = 1 - torch.exp(-F.softplus(density + st['model_state_dict']['act_shift']) * st['model_kwargs']['voxel_size_ratio'])
212 |             mask = (alpha >= self.mask_cache_thres).squeeze(0).squeeze(0)
213 |             xyz_min = torch.Tensor(st['model_kwargs']['xyz_min'])
214 |             xyz_max = torch.Tensor(st['model_kwargs']['xyz_max'])
215 |         else:
216 |             mask = mask.bool()
217 |             xyz_min = torch.Tensor(xyz_min)
218 |             xyz_max = torch.Tensor(xyz_max)
219 | 
220 |         self.register_buffer('mask', mask)
221 |         xyz_len = xyz_max - xyz_min
222 |         self.register_buffer('xyz2ijk_scale', (torch.Tensor(list(mask.shape)) - 1) / xyz_len)
223 |         self.register_buffer('xyz2ijk_shift', -xyz_min * self.xyz2ijk_scale)
224 | 
225 |     @torch.no_grad()
226 |     def forward(self, xyz):
227 |         '''Skip know freespace
228 |         @xyz:   [..., 3] the xyz in global coordinate.
229 |         '''
230 |         shape = xyz.shape[:-1]
231 |         xyz = xyz.reshape(-1, 3)
232 |         mask = render_utils_cuda.maskcache_lookup(self.mask, xyz, self.xyz2ijk_scale, self.xyz2ijk_shift)
233 |         mask = mask.reshape(shape)
234 |         return mask
235 | 
236 |     def extra_repr(self):
237 |         return f'mask.shape=list(self.mask.shape)'
238 | 
239 | 


--------------------------------------------------------------------------------
/lib/load_blendedmvs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import torch
 4 | import numpy as np
 5 | import imageio
 6 | import json
 7 | import torch.nn.functional as F
 8 | import cv2
 9 | 
10 | 
11 | def load_blendedmvs_data(basedir):
12 |     pose_paths = sorted(glob.glob(os.path.join(basedir, 'pose', '*txt')))
13 |     rgb_paths = sorted(glob.glob(os.path.join(basedir, 'rgb', '*png')))
14 | 
15 |     all_poses = []
16 |     all_imgs = []
17 |     i_split = [[], []]
18 |     for i, (pose_path, rgb_path) in enumerate(zip(pose_paths, rgb_paths)):
19 |         i_set = int(os.path.split(rgb_path)[-1][0])
20 |         all_imgs.append((imageio.imread(rgb_path) / 255.).astype(np.float32))
21 |         all_poses.append(np.loadtxt(pose_path).astype(np.float32))
22 |         i_split[i_set].append(i)
23 | 
24 |     imgs = np.stack(all_imgs, 0)
25 |     poses = np.stack(all_poses, 0)
26 |     i_split.append(i_split[-1])
27 | 
28 |     path_intrinsics = os.path.join(basedir, 'intrinsics.txt')
29 |     H, W = imgs[0].shape[:2]
30 |     K = np.loadtxt(path_intrinsics)
31 |     focal = float(K[0,0])
32 | 
33 |     render_poses = torch.Tensor(np.loadtxt(os.path.join(basedir, 'test_traj.txt')).reshape(-1,4,4).astype(np.float32))
34 | 
35 |     return imgs, poses, render_poses, [H, W, focal], K, i_split
36 | 
37 | 


--------------------------------------------------------------------------------
/lib/load_blender.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import numpy as np
 4 | import imageio
 5 | import json
 6 | import torch.nn.functional as F
 7 | import cv2
 8 | 
 9 | 
10 | trans_t = lambda t : torch.Tensor([
11 |     [1,0,0,0],
12 |     [0,1,0,0],
13 |     [0,0,1,t],
14 |     [0,0,0,1]]).float()
15 | 
16 | rot_phi = lambda phi : torch.Tensor([
17 |     [1,0,0,0],
18 |     [0,np.cos(phi),-np.sin(phi),0],
19 |     [0,np.sin(phi), np.cos(phi),0],
20 |     [0,0,0,1]]).float()
21 | 
22 | rot_theta = lambda th : torch.Tensor([
23 |     [np.cos(th),0,-np.sin(th),0],
24 |     [0,1,0,0],
25 |     [np.sin(th),0, np.cos(th),0],
26 |     [0,0,0,1]]).float()
27 | 
28 | 
29 | def pose_spherical(theta, phi, radius):
30 |     c2w = trans_t(radius)
31 |     c2w = rot_phi(phi/180.*np.pi) @ c2w
32 |     c2w = rot_theta(theta/180.*np.pi) @ c2w
33 |     c2w = torch.Tensor(np.array([[-1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]])) @ c2w
34 |     return c2w
35 | 
36 | 
37 | def load_blender_data(basedir, half_res=False, testskip=1):
38 |     splits = ['train', 'val', 'test']
39 |     metas = {}
40 |     for s in splits:
41 |         with open(os.path.join(basedir, 'transforms_{}.json'.format(s)), 'r') as fp:
42 |             metas[s] = json.load(fp)
43 | 
44 |     all_imgs = []
45 |     all_poses = []
46 |     counts = [0]
47 |     for s in splits:
48 |         meta = metas[s]
49 |         imgs = []
50 |         poses = []
51 |         if s=='train' or testskip==0:
52 |             skip = 1
53 |         else:
54 |             skip = testskip
55 | 
56 |         for frame in meta['frames'][::skip]:
57 |             fname = os.path.join(basedir, frame['file_path'] + '.png')
58 |             imgs.append(imageio.imread(fname))
59 |             poses.append(np.array(frame['transform_matrix']))
60 |         imgs = (np.array(imgs) / 255.).astype(np.float32) # keep all 4 channels (RGBA)
61 |         poses = np.array(poses).astype(np.float32)
62 |         counts.append(counts[-1] + imgs.shape[0])
63 |         all_imgs.append(imgs)
64 |         all_poses.append(poses)
65 | 
66 |     i_split = [np.arange(counts[i], counts[i+1]) for i in range(3)]
67 | 
68 |     imgs = np.concatenate(all_imgs, 0)
69 |     poses = np.concatenate(all_poses, 0)
70 | 
71 |     H, W = imgs[0].shape[:2]
72 |     camera_angle_x = float(meta['camera_angle_x'])
73 |     focal = .5 * W / np.tan(.5 * camera_angle_x)
74 | 
75 |     render_poses = torch.stack([pose_spherical(angle, -30.0, 4.0) for angle in np.linspace(-180,180,160+1)[:-1]], 0)
76 | 
77 |     if half_res:
78 |         H = H//2
79 |         W = W//2
80 |         focal = focal/2.
81 | 
82 |         imgs_half_res = np.zeros((imgs.shape[0], H, W, 4))
83 |         for i, img in enumerate(imgs):
84 |             imgs_half_res[i] = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA)
85 |         imgs = imgs_half_res
86 |         # imgs = tf.image.resize_area(imgs, [400, 400]).numpy()
87 | 
88 |     return imgs, poses, render_poses, [H, W, focal], i_split
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/lib/load_co3d.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import gzip
 4 | import glob
 5 | import torch
 6 | import numpy as np
 7 | import imageio
 8 | import torch.nn.functional as F
 9 | import cv2
10 | 
11 | 
12 | def load_co3d_data(cfg):
13 | 
14 |     # load meta
15 |     with gzip.open(cfg.annot_path, 'rt', encoding='utf8') as zipfile:
16 |         annot = [v for v in json.load(zipfile) if v['sequence_name'] == cfg.sequence_name]
17 |     with open(cfg.split_path) as f:
18 |         split = json.load(f)
19 |         train_im_path = set()
20 |         test_im_path = set()
21 |         for k, lst in split.items():
22 |             for v in lst:
23 |                 if v[0] == cfg.sequence_name:
24 |                     if 'known' in k:
25 |                         train_im_path.add(v[-1])
26 |                     else:
27 |                         test_im_path.add(v[-1])
28 |     assert len(annot) == len(train_im_path) + len(test_im_path), 'Mismatch: '\
29 |             f'{len(annot)} == {len(train_im_path) + len(test_im_path)}'
30 | 
31 |     # load datas
32 |     imgs = []
33 |     masks = []
34 |     poses = []
35 |     Ks = []
36 |     i_split = [[], []]
37 |     remove_empty_masks_cnt = [0, 0]
38 |     for i, meta in enumerate(annot):
39 |         im_fname = meta['image']['path']
40 |         assert im_fname in train_im_path or im_fname in test_im_path
41 |         sid = 0 if im_fname in train_im_path else 1
42 |         if meta['mask']['mass'] == 0:
43 |             remove_empty_masks_cnt[sid] += 1
44 |             continue
45 |         im_path = os.path.join(cfg.datadir, im_fname)
46 |         mask_path = os.path.join(cfg.datadir, meta['mask']['path'])
47 |         mask = imageio.imread(mask_path) / 255.
48 |         if mask.max() < 0.5:
49 |             remove_empty_masks_cnt[sid] += 1
50 |             continue
51 |         Rt = np.concatenate([meta['viewpoint']['R'], np.array(meta['viewpoint']['T'])[:,None]], 1)
52 |         pose = np.linalg.inv(np.concatenate([Rt, [[0,0,0,1]]]))
53 |         imgs.append(imageio.imread(im_path) / 255.)
54 |         masks.append(mask)
55 |         poses.append(pose)
56 |         assert imgs[-1].shape[:2] == tuple(meta['image']['size'])
57 |         half_image_size_wh = np.float32(meta['image']['size'][::-1]) * 0.5
58 |         principal_point = np.float32(meta['viewpoint']['principal_point'])
59 |         focal_length = np.float32(meta['viewpoint']['focal_length'])
60 |         principal_point_px = -1.0 * (principal_point - 1.0) * half_image_size_wh
61 |         focal_length_px = focal_length * half_image_size_wh
62 |         Ks.append(np.array([
63 |             [focal_length_px[0], 0, principal_point_px[0]],
64 |             [0, focal_length_px[1], principal_point_px[1]],
65 |             [0, 0, 1],
66 |         ]))
67 |         i_split[sid].append(len(imgs)-1)
68 | 
69 |     if sum(remove_empty_masks_cnt) > 0:
70 |         print('load_co3d_data: removed %d train / %d test due to empty mask' % tuple(remove_empty_masks_cnt))
71 |     print(f'load_co3d_data: num images {len(i_split[0])} train / {len(i_split[1])} test')
72 | 
73 |     imgs = np.array(imgs)
74 |     masks = np.array(masks)
75 |     poses = np.stack(poses, 0)
76 |     Ks = np.stack(Ks, 0)
77 |     render_poses = poses[i_split[-1]]
78 |     i_split.append(i_split[-1])
79 | 
80 |     # visyalization hwf
81 |     H, W = np.array([im.shape[:2] for im in imgs]).mean(0).astype(int)
82 |     focal = Ks[:,[0,1],[0,1]].mean()
83 | 
84 |     return imgs, masks, poses, render_poses, [H, W, focal], Ks, i_split
85 | 
86 | 


--------------------------------------------------------------------------------
/lib/load_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .load_llff import load_llff_data
  4 | from .load_blender import load_blender_data
  5 | from .load_nsvf import load_nsvf_data
  6 | from .load_blendedmvs import load_blendedmvs_data
  7 | from .load_tankstemple import load_tankstemple_data
  8 | from .load_deepvoxels import load_dv_data
  9 | from .load_co3d import load_co3d_data
 10 | from .load_nerfpp import load_nerfpp_data
 11 | 
 12 | 
 13 | def load_data(args):
 14 | 
 15 |     K, depths = None, None
 16 |     near_clip = None
 17 | 
 18 |     if args.dataset_type == 'llff':
 19 |         images, depths, poses, bds, render_poses, i_test = load_llff_data(
 20 |                 args.datadir, args.factor, args.width, args.height,
 21 |                 recenter=True, bd_factor=args.bd_factor,
 22 |                 spherify=args.spherify,
 23 |                 load_depths=args.load_depths,
 24 |                 movie_render_kwargs=args.movie_render_kwargs)
 25 |         hwf = poses[0,:3,-1]
 26 |         poses = poses[:,:3,:4]
 27 |         print('Loaded llff', images.shape, render_poses.shape, hwf, args.datadir)
 28 |         if not isinstance(i_test, list):
 29 |             i_test = [i_test]
 30 | 
 31 |         if args.llffhold > 0:
 32 |             print('Auto LLFF holdout,', args.llffhold)
 33 |             i_test = np.arange(images.shape[0])[::args.llffhold]
 34 | 
 35 |         i_val = i_test
 36 |         i_train = np.array([i for i in np.arange(int(images.shape[0])) if
 37 |                         (i not in i_test and i not in i_val)])
 38 | 
 39 |         print('DEFINING BOUNDS')
 40 |         if args.ndc:
 41 |             near = 0.
 42 |             far = 1.
 43 |         else:
 44 |             near_clip = max(np.ndarray.min(bds) * .9, 0)
 45 |             _far = max(np.ndarray.max(bds) * 1., 0)
 46 |             near = 0
 47 |             far = inward_nearfar_heuristic(poses[i_train, :3, 3])[1]
 48 |             print('near_clip', near_clip)
 49 |             print('original far', _far)
 50 |         print('NEAR FAR', near, far)
 51 | 
 52 |     elif args.dataset_type == 'blender':
 53 |         images, poses, render_poses, hwf, i_split = load_blender_data(args.datadir, args.half_res, args.testskip)
 54 |         print('Loaded blender', images.shape, render_poses.shape, hwf, args.datadir)
 55 |         i_train, i_val, i_test = i_split
 56 | 
 57 |         near, far = 2., 6.
 58 | 
 59 |         if images.shape[-1] == 4:
 60 |             if args.white_bkgd:
 61 |                 images = images[...,:3]*images[...,-1:] + (1.-images[...,-1:])
 62 |             else:
 63 |                 images = images[...,:3]*images[...,-1:]
 64 | 
 65 |     elif args.dataset_type == 'blendedmvs':
 66 |         images, poses, render_poses, hwf, K, i_split = load_blendedmvs_data(args.datadir)
 67 |         print('Loaded blendedmvs', images.shape, render_poses.shape, hwf, args.datadir)
 68 |         i_train, i_val, i_test = i_split
 69 | 
 70 |         near, far = inward_nearfar_heuristic(poses[i_train, :3, 3])
 71 | 
 72 |         assert images.shape[-1] == 3
 73 | 
 74 |     elif args.dataset_type == 'tankstemple':
 75 |         images, poses, render_poses, hwf, K, i_split = load_tankstemple_data(
 76 |                 args.datadir, movie_render_kwargs=args.movie_render_kwargs)
 77 |         print('Loaded tankstemple', images.shape, render_poses.shape, hwf, args.datadir)
 78 |         i_train, i_val, i_test = i_split
 79 | 
 80 |         near, far = inward_nearfar_heuristic(poses[i_train, :3, 3], ratio=0)
 81 | 
 82 |         if images.shape[-1] == 4:
 83 |             if args.white_bkgd:
 84 |                 images = images[...,:3]*images[...,-1:] + (1.-images[...,-1:])
 85 |             else:
 86 |                 images = images[...,:3]*images[...,-1:]
 87 | 
 88 |     elif args.dataset_type == 'nsvf':
 89 |         images, poses, render_poses, hwf, i_split = load_nsvf_data(args.datadir)
 90 |         print('Loaded nsvf', images.shape, render_poses.shape, hwf, args.datadir)
 91 |         i_train, i_val, i_test = i_split
 92 | 
 93 |         near, far = inward_nearfar_heuristic(poses[i_train, :3, 3])
 94 | 
 95 |         if images.shape[-1] == 4:
 96 |             if args.white_bkgd:
 97 |                 images = images[...,:3]*images[...,-1:] + (1.-images[...,-1:])
 98 |             else:
 99 |                 images = images[...,:3]*images[...,-1:]
100 | 
101 |     elif args.dataset_type == 'deepvoxels':
102 |         images, poses, render_poses, hwf, i_split = load_dv_data(scene=args.scene, basedir=args.datadir, testskip=args.testskip)
103 |         print('Loaded deepvoxels', images.shape, render_poses.shape, hwf, args.datadir)
104 |         i_train, i_val, i_test = i_split
105 | 
106 |         hemi_R = np.mean(np.linalg.norm(poses[:,:3,-1], axis=-1))
107 |         near = hemi_R - 1
108 |         far = hemi_R + 1
109 |         assert args.white_bkgd
110 |         assert images.shape[-1] == 3
111 | 
112 |     elif args.dataset_type == 'co3d':
113 |         # each image can be in different shapes and intrinsics
114 |         images, masks, poses, render_poses, hwf, K, i_split = load_co3d_data(args)
115 |         print('Loaded co3d', args.datadir, args.annot_path, args.sequence_name)
116 |         i_train, i_val, i_test = i_split
117 | 
118 |         near, far = inward_nearfar_heuristic(poses[i_train, :3, 3], ratio=0)
119 | 
120 |         for i in range(len(images)):
121 |             if args.white_bkgd:
122 |                 images[i] = images[i] * masks[i][...,None] + (1.-masks[i][...,None])
123 |             else:
124 |                 images[i] = images[i] * masks[i][...,None]
125 | 
126 |     elif args.dataset_type == 'nerfpp':
127 |         images, poses, render_poses, hwf, K, i_split = load_nerfpp_data(args.datadir)
128 |         print('Loaded nerf_pp', images.shape, hwf, args.datadir)
129 |         i_train, i_val, i_test = i_split
130 | 
131 |         near_clip, far = inward_nearfar_heuristic(poses[i_train, :3, 3], ratio=0.02)
132 |         near = 0
133 | 
134 |     else:
135 |         raise NotImplementedError(f'Unknown dataset type {args.dataset_type} exiting')
136 | 
137 |     # Cast intrinsics to right types
138 |     H, W, focal = hwf
139 |     H, W = int(H), int(W)
140 |     hwf = [H, W, focal]
141 |     HW = np.array([im.shape[:2] for im in images])
142 |     irregular_shape = (images.dtype is np.dtype('object'))
143 | 
144 |     if K is None:
145 |         K = np.array([
146 |             [focal, 0, 0.5*W],
147 |             [0, focal, 0.5*H],
148 |             [0, 0, 1]
149 |         ])
150 | 
151 |     if len(K.shape) == 2:
152 |         Ks = K[None].repeat(len(poses), axis=0)
153 |     else:
154 |         Ks = K
155 | 
156 |     render_poses = render_poses[...,:4]
157 | 
158 |     data_dict = dict(
159 |         hwf=hwf, HW=HW, Ks=Ks,
160 |         near=near, far=far, near_clip=near_clip,
161 |         i_train=i_train, i_val=i_val, i_test=i_test,
162 |         poses=poses, render_poses=render_poses,
163 |         images=images, depths=depths,
164 |         irregular_shape=irregular_shape,
165 |     )
166 |     return data_dict
167 | 
168 | 
169 | def inward_nearfar_heuristic(cam_o, ratio=0.05):
170 |     dist = np.linalg.norm(cam_o[:,None] - cam_o, axis=-1)
171 |     far = dist.max()  # could be too small to exist the scene bbox
172 |                       # it is only used to determined scene bbox
173 |                       # lib/dvgo use 1e9 as far
174 |     near = far * ratio
175 |     return near, far
176 | 
177 | 


--------------------------------------------------------------------------------
/lib/load_deepvoxels.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import imageio
  4 | 
  5 | 
  6 | def load_dv_data(scene='cube', basedir='/data/deepvoxels', testskip=1):
  7 | 
  8 |     def parse_intrinsics(filepath, trgt_sidelength, invert_y=False):
  9 |         # Get camera intrinsics
 10 |         with open(filepath, 'r') as file:
 11 |             f, cx, cy = list(map(float, file.readline().split()))[:3]
 12 |             grid_barycenter = np.array(list(map(float, file.readline().split())))
 13 |             near_plane = float(file.readline())
 14 |             scale = float(file.readline())
 15 |             height, width = map(float, file.readline().split())
 16 | 
 17 |             try:
 18 |                 world2cam_poses = int(file.readline())
 19 |             except ValueError:
 20 |                 world2cam_poses = None
 21 | 
 22 |         if world2cam_poses is None:
 23 |             world2cam_poses = False
 24 | 
 25 |         world2cam_poses = bool(world2cam_poses)
 26 | 
 27 |         print(cx,cy,f,height,width)
 28 | 
 29 |         cx = cx / width * trgt_sidelength
 30 |         cy = cy / height * trgt_sidelength
 31 |         f = trgt_sidelength / height * f
 32 | 
 33 |         fx = f
 34 |         if invert_y:
 35 |             fy = -f
 36 |         else:
 37 |             fy = f
 38 | 
 39 |         # Build the intrinsic matrices
 40 |         full_intrinsic = np.array([[fx, 0., cx, 0.],
 41 |                                    [0., fy, cy, 0],
 42 |                                    [0., 0, 1, 0],
 43 |                                    [0, 0, 0, 1]])
 44 | 
 45 |         return full_intrinsic, grid_barycenter, scale, near_plane, world2cam_poses
 46 | 
 47 | 
 48 |     def load_pose(filename):
 49 |         assert os.path.isfile(filename)
 50 |         nums = open(filename).read().split()
 51 |         return np.array([float(x) for x in nums]).reshape([4,4]).astype(np.float32)
 52 | 
 53 | 
 54 |     H = 512
 55 |     W = 512
 56 |     deepvoxels_base = '{}/train/{}/'.format(basedir, scene)
 57 | 
 58 |     full_intrinsic, grid_barycenter, scale, near_plane, world2cam_poses = parse_intrinsics(os.path.join(deepvoxels_base, 'intrinsics.txt'), H)
 59 |     print(full_intrinsic, grid_barycenter, scale, near_plane, world2cam_poses)
 60 |     focal = full_intrinsic[0,0]
 61 |     print(H, W, focal)
 62 | 
 63 |     def dir2poses(posedir):
 64 |         poses = np.stack([load_pose(os.path.join(posedir, f)) for f in sorted(os.listdir(posedir)) if f.endswith('txt')], 0)
 65 |         transf = np.array([
 66 |             [1,0,0,0],
 67 |             [0,-1,0,0],
 68 |             [0,0,-1,0],
 69 |             [0,0,0,1.],
 70 |         ])
 71 |         poses = poses @ transf
 72 |         poses = poses[:,:3,:4].astype(np.float32)
 73 |         return poses
 74 | 
 75 |     posedir = os.path.join(deepvoxels_base, 'pose')
 76 |     poses = dir2poses(posedir)
 77 |     testposes = dir2poses('{}/test/{}/pose'.format(basedir, scene))
 78 |     testposes = testposes[::testskip]
 79 |     valposes = dir2poses('{}/validation/{}/pose'.format(basedir, scene))
 80 |     valposes = valposes[::testskip]
 81 | 
 82 |     imgfiles = [f for f in sorted(os.listdir(os.path.join(deepvoxels_base, 'rgb'))) if f.endswith('png')]
 83 |     imgs = np.stack([imageio.imread(os.path.join(deepvoxels_base, 'rgb', f))/255. for f in imgfiles], 0).astype(np.float32)
 84 | 
 85 |     testimgd = '{}/test/{}/rgb'.format(basedir, scene)
 86 |     imgfiles = [f for f in sorted(os.listdir(testimgd)) if f.endswith('png')]
 87 |     testimgs = np.stack([imageio.imread(os.path.join(testimgd, f))/255. for f in imgfiles[::testskip]], 0).astype(np.float32)
 88 | 
 89 |     valimgd = '{}/validation/{}/rgb'.format(basedir, scene)
 90 |     imgfiles = [f for f in sorted(os.listdir(valimgd)) if f.endswith('png')]
 91 |     valimgs = np.stack([imageio.imread(os.path.join(valimgd, f))/255. for f in imgfiles[::testskip]], 0).astype(np.float32)
 92 | 
 93 |     all_imgs = [imgs, valimgs, testimgs]
 94 |     counts = [0] + [x.shape[0] for x in all_imgs]
 95 |     counts = np.cumsum(counts)
 96 |     i_split = [np.arange(counts[i], counts[i+1]) for i in range(3)]
 97 | 
 98 |     imgs = np.concatenate(all_imgs, 0)
 99 |     poses = np.concatenate([poses, valposes, testposes], 0)
100 | 
101 |     render_poses = testposes
102 | 
103 |     print(poses.shape, imgs.shape)
104 | 
105 |     return imgs, poses, render_poses, [H, W, focal], i_split
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/lib/load_llff.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os, imageio
  3 | import torch
  4 | import scipy
  5 | 
  6 | ########## Slightly modified version of LLFF data loading code
  7 | ##########  see https://github.com/Fyusion/LLFF for original
  8 | def imread(f):
  9 |     if f.endswith('png'):
 10 |         return imageio.imread(f, ignoregamma=True)
 11 |     else:
 12 |         return imageio.imread(f)
 13 | 
 14 | def depthread(path):
 15 |     with open(path, "rb") as fid:
 16 |         width, height, channels = np.genfromtxt(fid, delimiter="&", max_rows=1,
 17 |                                                 usecols=(0, 1, 2), dtype=int)
 18 |         fid.seek(0)
 19 |         num_delimiter = 0
 20 |         byte = fid.read(1)
 21 |         while True:
 22 |             if byte == b"&":
 23 |                 num_delimiter += 1
 24 |                 if num_delimiter >= 3:
 25 |                     break
 26 |             byte = fid.read(1)
 27 |         array = np.fromfile(fid, np.float32)
 28 |     array = array.reshape((width, height, channels), order="F")
 29 |     return np.transpose(array, (1, 0, 2)).squeeze()
 30 | 
 31 | 
 32 | def _minify(basedir, factors=[], resolutions=[]):
 33 |     needtoload = False
 34 |     for r in factors:
 35 |         imgdir = os.path.join(basedir, 'images_{}'.format(r))
 36 |         if not os.path.exists(imgdir):
 37 |             needtoload = True
 38 |     for r in resolutions:
 39 |         imgdir = os.path.join(basedir, 'images_{}x{}'.format(r[1], r[0]))
 40 |         if not os.path.exists(imgdir):
 41 |             needtoload = True
 42 |     if not needtoload:
 43 |         return
 44 | 
 45 |     from shutil import copy
 46 |     from subprocess import check_output
 47 | 
 48 |     imgdir = os.path.join(basedir, 'images')
 49 |     imgs = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir))]
 50 |     imgs = [f for f in imgs if any([f.endswith(ex) for ex in ['JPG', 'jpg', 'png', 'jpeg', 'PNG']])]
 51 |     imgdir_orig = imgdir
 52 | 
 53 |     wd = os.getcwd()
 54 | 
 55 |     for r in factors + resolutions:
 56 |         if isinstance(r, int):
 57 |             name = 'images_{}'.format(r)
 58 |             resizearg = '{}%'.format(100./r)
 59 |         else:
 60 |             name = 'images_{}x{}'.format(r[1], r[0])
 61 |             resizearg = '{}x{}'.format(r[1], r[0])
 62 |         imgdir = os.path.join(basedir, name)
 63 |         if os.path.exists(imgdir):
 64 |             continue
 65 | 
 66 |         print('Minifying', r, basedir)
 67 | 
 68 |         os.makedirs(imgdir)
 69 |         check_output('cp {}/* {}'.format(imgdir_orig, imgdir), shell=True)
 70 | 
 71 |         ext = imgs[0].split('.')[-1]
 72 |         args = ' '.join(['mogrify', '-resize', resizearg, '-format', 'png', '*.{}'.format(ext)])
 73 |         print(args)
 74 |         os.chdir(imgdir)
 75 |         check_output(args, shell=True)
 76 |         os.chdir(wd)
 77 | 
 78 |         if ext != 'png':
 79 |             check_output('rm {}/*.{}'.format(imgdir, ext), shell=True)
 80 |             print('Removed duplicates')
 81 |         print('Done')
 82 | 
 83 | 
 84 | def _load_data(basedir, factor=None, width=None, height=None, load_imgs=True, load_depths=False):
 85 | 
 86 |     poses_arr = np.load(os.path.join(basedir, 'poses_bounds.npy'))
 87 |     if poses_arr.shape[1] == 17:
 88 |         poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1,2,0])
 89 |     elif poses_arr.shape[1] == 14:
 90 |         poses = poses_arr[:, :-2].reshape([-1, 3, 4]).transpose([1,2,0])
 91 |     else:
 92 |         raise NotImplementedError
 93 |     bds = poses_arr[:, -2:].transpose([1,0])
 94 | 
 95 |     img0 = [os.path.join(basedir, 'images', f) for f in sorted(os.listdir(os.path.join(basedir, 'images'))) \
 96 |             if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')][0]
 97 |     sh = imageio.imread(img0).shape
 98 | 
 99 |     sfx = ''
100 | 
101 |     if height is not None and width is not None:
102 |         _minify(basedir, resolutions=[[height, width]])
103 |         sfx = '_{}x{}'.format(width, height)
104 |     elif factor is not None and factor != 1:
105 |         sfx = '_{}'.format(factor)
106 |         _minify(basedir, factors=[factor])
107 |         factor = factor
108 |     elif height is not None:
109 |         factor = sh[0] / float(height)
110 |         width = int(sh[1] / factor)
111 |         _minify(basedir, resolutions=[[height, width]])
112 |         sfx = '_{}x{}'.format(width, height)
113 |     elif width is not None:
114 |         factor = sh[1] / float(width)
115 |         height = int(sh[0] / factor)
116 |         _minify(basedir, resolutions=[[height, width]])
117 |         sfx = '_{}x{}'.format(width, height)
118 |     else:
119 |         factor = 1
120 | 
121 |     imgdir = os.path.join(basedir, 'images' + sfx)
122 |     print(f'Loading images from {imgdir}')
123 |     if not os.path.exists(imgdir):
124 |         print( imgdir, 'does not exist, returning' )
125 |         return
126 | 
127 |     imgfiles = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir)) if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')]
128 |     if poses.shape[-1] != len(imgfiles):
129 |         print()
130 |         print( 'Mismatch between imgs {} and poses {} !!!!'.format(len(imgfiles), poses.shape[-1]) )
131 |         names = set(name[:-4] for name in np.load(os.path.join(basedir, 'poses_names.npy')))
132 |         assert len(names) == poses.shape[-1]
133 |         print('Below failed files are skip due to SfM failure:')
134 |         new_imgfiles = []
135 |         for i in imgfiles:
136 |             fname = os.path.split(i)[1][:-4]
137 |             if fname in names:
138 |                 new_imgfiles.append(i)
139 |             else:
140 |                 print('==>', i)
141 |         imgfiles = new_imgfiles
142 | 
143 |     if len(imgfiles) < 3:
144 |         print('Too few images...')
145 |         import sys; sys.exit()
146 | 
147 |     sh = imageio.imread(imgfiles[0]).shape
148 |     if poses.shape[1] == 4:
149 |         poses = np.concatenate([poses, np.zeros_like(poses[:,[0]])], 1)
150 |         poses[2, 4, :] = np.load(os.path.join(basedir, 'hwf_cxcy.npy'))[2]
151 |     poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1])
152 |     poses[2, 4, :] = poses[2, 4, :] * 1./factor
153 | 
154 |     if not load_imgs:
155 |         return poses, bds
156 | 
157 | 
158 |     imgs = imgs = [imread(f)[...,:3]/255. for f in imgfiles]
159 |     imgs = np.stack(imgs, -1)
160 | 
161 |     print('Loaded image data', imgs.shape, poses[:,-1,0])
162 | 
163 |     if not load_depths:
164 |         return poses, bds, imgs
165 | 
166 |     depthdir = os.path.join(basedir, 'stereo', 'depth_maps')
167 |     assert os.path.exists(depthdir), f'Dir not found: {depthdir}'
168 | 
169 |     depthfiles = [os.path.join(depthdir, f) for f in sorted(os.listdir(depthdir)) if f.endswith('.geometric.bin')]
170 |     assert poses.shape[-1] == len(depthfiles), 'Mismatch between imgs {} and poses {} !!!!'.format(len(depthfiles), poses.shape[-1])
171 | 
172 |     depths = [depthread(f) for f in depthfiles]
173 |     depths = np.stack(depths, -1)
174 |     print('Loaded depth data', depths.shape)
175 |     return poses, bds, imgs, depths
176 | 
177 | 
178 | def normalize(x):
179 |     return x / np.linalg.norm(x)
180 | 
181 | def viewmatrix(z, up, pos):
182 |     vec2 = normalize(z)
183 |     vec1_avg = up
184 |     vec0 = normalize(np.cross(vec1_avg, vec2))
185 |     vec1 = normalize(np.cross(vec2, vec0))
186 |     m = np.stack([vec0, vec1, vec2, pos], 1)
187 |     return m
188 | 
189 | def ptstocam(pts, c2w):
190 |     tt = np.matmul(c2w[:3,:3].T, (pts-c2w[:3,3])[...,np.newaxis])[...,0]
191 |     return tt
192 | 
193 | def poses_avg(poses):
194 | 
195 |     hwf = poses[0, :3, -1:]
196 | 
197 |     center = poses[:, :3, 3].mean(0)
198 |     vec2 = normalize(poses[:, :3, 2].sum(0))
199 |     up = poses[:, :3, 1].sum(0)
200 |     c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1)
201 | 
202 |     return c2w
203 | 
204 | 
205 | 
206 | def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N):
207 |     render_poses = []
208 |     rads = np.array(list(rads) + [1.])
209 |     hwf = c2w[:,4:5]
210 | 
211 |     for theta in np.linspace(0., 2. * np.pi * rots, N+1)[:-1]:
212 |         c = np.dot(c2w[:3,:4], np.array([np.cos(theta), -np.sin(theta), -np.sin(theta*zrate)*zdelta, 1.]) * rads) 
213 |         z = normalize(c - np.dot(c2w[:3,:4], np.array([0,0,-focal, 1.])))
214 |         render_poses.append(np.concatenate([viewmatrix(z, up, c), hwf], 1))
215 |     return render_poses
216 | 
217 | 
218 | 
219 | def recenter_poses(poses):
220 | 
221 |     poses_ = poses+0
222 |     bottom = np.reshape([0,0,0,1.], [1,4])
223 |     c2w = poses_avg(poses)
224 |     c2w = np.concatenate([c2w[:3,:4], bottom], -2)
225 |     bottom = np.tile(np.reshape(bottom, [1,1,4]), [poses.shape[0],1,1])
226 |     poses = np.concatenate([poses[:,:3,:4], bottom], -2)
227 | 
228 |     poses = np.linalg.inv(c2w) @ poses
229 |     poses_[:,:3,:4] = poses[:,:3,:4]
230 |     poses = poses_
231 |     return poses
232 | 
233 | 
234 | def rerotate_poses(poses):
235 |     poses = np.copy(poses)
236 |     centroid = poses[:,:3,3].mean(0)
237 | 
238 |     poses[:,:3,3] = poses[:,:3,3] - centroid
239 | 
240 |     # Find the minimum pca vector with minimum eigen value
241 |     x = poses[:,:,3]
242 |     mu = x.mean(0)
243 |     cov = np.cov((x-mu).T)
244 |     ev , eig = np.linalg.eig(cov)
245 |     cams_up = eig[:,np.argmin(ev)]
246 |     if cams_up[1] < 0:
247 |         cams_up = -cams_up
248 | 
249 |     # Find rotation matrix that align cams_up with [0,1,0]
250 |     R = scipy.spatial.transform.Rotation.align_vectors(
251 |             [[0,1,0]], cams_up[None])[0].as_matrix()
252 | 
253 |     # Apply rotation and add back the centroid position
254 |     poses[:,:3,:3] = R @ poses[:,:3,:3]
255 |     poses[:,:3,[3]] = R @ poses[:,:3,[3]]
256 |     poses[:,:3,3] = poses[:,:3,3] + centroid
257 |     return poses
258 | 
259 | #####################
260 | 
261 | 
262 | def spherify_poses(poses, bds, depths):
263 | 
264 |     p34_to_44 = lambda p : np.concatenate([p, np.tile(np.reshape(np.eye(4)[-1,:], [1,1,4]), [p.shape[0], 1,1])], 1)
265 | 
266 |     rays_d = poses[:,:3,2:3]
267 |     rays_o = poses[:,:3,3:4]
268 | 
269 |     def min_line_dist(rays_o, rays_d):
270 |         A_i = np.eye(3) - rays_d * np.transpose(rays_d, [0,2,1])
271 |         b_i = -A_i @ rays_o
272 |         pt_mindist = np.squeeze(-np.linalg.inv((np.transpose(A_i, [0,2,1]) @ A_i).mean(0)) @ (b_i).mean(0))
273 |         return pt_mindist
274 | 
275 |     pt_mindist = min_line_dist(rays_o, rays_d)
276 | 
277 |     center = pt_mindist
278 |     up = (poses[:,:3,3] - center).mean(0)
279 | 
280 |     vec0 = normalize(up)
281 |     vec1 = normalize(np.cross([.1,.2,.3], vec0))
282 |     vec2 = normalize(np.cross(vec0, vec1))
283 |     pos = center
284 |     c2w = np.stack([vec1, vec2, vec0, pos], 1)
285 | 
286 |     poses_reset = np.linalg.inv(p34_to_44(c2w[None])) @ p34_to_44(poses[:,:3,:4])
287 | 
288 |     radius = np.sqrt(np.mean(np.sum(np.square(poses_reset[:,:3,3]), -1)))
289 | 
290 |     sc = 1./radius
291 |     poses_reset[:,:3,3] *= sc
292 |     bds *= sc
293 |     radius *= sc
294 |     depths *= sc
295 | 
296 |     poses_reset = np.concatenate([poses_reset[:,:3,:4], np.broadcast_to(poses[0,:3,-1:], poses_reset[:,:3,-1:].shape)], -1)
297 | 
298 |     return poses_reset, radius, bds, depths
299 | 
300 | 
301 | def load_llff_data(basedir, factor=8, width=None, height=None,
302 |                    recenter=True, rerotate=True,
303 |                    bd_factor=.75, spherify=False, path_zflat=False, load_depths=False,
304 |                    movie_render_kwargs={}):
305 | 
306 |     poses, bds, imgs, *depths = _load_data(basedir, factor=factor, width=width, height=height,
307 |                                            load_depths=load_depths) # factor=8 downsamples original imgs by 8x
308 |     print('Loaded', basedir, bds.min(), bds.max())
309 |     if load_depths:
310 |         depths = depths[0]
311 |     else:
312 |         depths = 0
313 | 
314 |     # Correct rotation matrix ordering and move variable dim to axis 0
315 |     poses = np.concatenate([poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1)
316 |     poses = np.moveaxis(poses, -1, 0).astype(np.float32)
317 |     imgs = np.moveaxis(imgs, -1, 0).astype(np.float32)
318 |     images = imgs
319 |     bds = np.moveaxis(bds, -1, 0).astype(np.float32)
320 | 
321 |     # Rescale if bd_factor is provided
322 |     if bds.min() < 0 and bd_factor is not None:
323 |         print('Found negative z values from SfM sparse points!?')
324 |         print('Please try bd_factor=None')
325 |         import sys; sys.exit()
326 |     sc = 1. if bd_factor is None else 1./(bds.min() * bd_factor)
327 |     poses[:,:3,3] *= sc
328 |     bds *= sc
329 |     depths *= sc
330 | 
331 |     if recenter:
332 |         poses = recenter_poses(poses)
333 | 
334 |     if spherify:
335 |         poses, radius, bds, depths = spherify_poses(poses, bds, depths)
336 |         if rerotate:
337 |             poses = rerotate_poses(poses)
338 | 
339 |         ### generate spiral poses for rendering fly-through movie
340 |         centroid = poses[:,:3,3].mean(0)
341 |         radcircle = movie_render_kwargs.get('scale_r', 1) * np.linalg.norm(poses[:,:3,3] - centroid, axis=-1).mean()
342 |         centroid[0] += movie_render_kwargs.get('shift_x', 0)
343 |         centroid[1] += movie_render_kwargs.get('shift_y', 0)
344 |         centroid[2] += movie_render_kwargs.get('shift_z', 0)
345 |         new_up_rad = movie_render_kwargs.get('pitch_deg', 0) * np.pi / 180
346 |         target_y = radcircle * np.tan(new_up_rad)
347 | 
348 |         render_poses = []
349 | 
350 |         for th in np.linspace(0., 2.*np.pi, 200):
351 |             camorigin = np.array([radcircle * np.cos(th), 0, radcircle * np.sin(th)])
352 |             if movie_render_kwargs.get('flip_up', False):
353 |                 up = np.array([0,1.,0])
354 |             else:
355 |                 up = np.array([0,-1.,0])
356 |             vec2 = normalize(camorigin)
357 |             vec0 = normalize(np.cross(vec2, up))
358 |             vec1 = normalize(np.cross(vec2, vec0))
359 |             pos = camorigin + centroid
360 |             # rotate to align with new pitch rotation
361 |             lookat = -vec2
362 |             lookat[1] = target_y
363 |             lookat = normalize(lookat)
364 |             vec2 = -lookat
365 |             vec1 = normalize(np.cross(vec2, vec0))
366 | 
367 |             p = np.stack([vec0, vec1, vec2, pos], 1)
368 | 
369 |             render_poses.append(p)
370 | 
371 |         render_poses = np.stack(render_poses, 0)
372 |         render_poses = np.concatenate([render_poses, np.broadcast_to(poses[0,:3,-1:], render_poses[:,:3,-1:].shape)], -1)
373 | 
374 |     else:
375 | 
376 |         c2w = poses_avg(poses)
377 |         print('recentered', c2w.shape)
378 |         print(c2w[:3,:4])
379 | 
380 |         ## Get spiral
381 |         # Get average pose
382 |         up = normalize(poses[:, :3, 1].sum(0))
383 | 
384 |         # Find a reasonable "focus depth" for this dataset
385 |         close_depth, inf_depth = bds.min()*.9, bds.max()*5.
386 |         dt = .75
387 |         mean_dz = 1./(((1.-dt)/close_depth + dt/inf_depth))
388 |         focal = mean_dz * movie_render_kwargs.get('scale_f', 1)
389 | 
390 |         # Get radii for spiral path
391 |         zdelta = movie_render_kwargs.get('zdelta', 0.5)
392 |         zrate = movie_render_kwargs.get('zrate', 1.0)
393 |         tt = poses[:,:3,3] # ptstocam(poses[:3,3,:].T, c2w).T
394 |         rads = np.percentile(np.abs(tt), 90, 0) * movie_render_kwargs.get('scale_r', 1)
395 |         c2w_path = c2w
396 |         N_views = 120
397 |         N_rots = movie_render_kwargs.get('N_rots', 1)
398 |         if path_zflat:
399 | #             zloc = np.percentile(tt, 10, 0)[2]
400 |             zloc = -close_depth * .1
401 |             c2w_path[:3,3] = c2w_path[:3,3] + zloc * c2w_path[:3,2]
402 |             rads[2] = 0.
403 |             N_rots = 1
404 |             N_views/=2
405 | 
406 |         # Generate poses for spiral path
407 |         render_poses = render_path_spiral(c2w_path, up, rads, focal, zdelta, zrate=zrate, rots=N_rots, N=N_views)
408 | 
409 |     render_poses = torch.Tensor(render_poses)
410 | 
411 |     c2w = poses_avg(poses)
412 |     print('Data:')
413 |     print(poses.shape, images.shape, bds.shape)
414 | 
415 |     dists = np.sum(np.square(c2w[:3,3] - poses[:,:3,3]), -1)
416 |     i_test = np.argmin(dists)
417 |     print('HOLDOUT view is', i_test)
418 | 
419 |     images = images.astype(np.float32)
420 |     poses = poses.astype(np.float32)
421 | 
422 |     return images, depths, poses, bds, render_poses, i_test
423 | 
424 | 


--------------------------------------------------------------------------------
/lib/load_nerfpp.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Modify from
  3 | https://github.com/Kai-46/nerfplusplus/blob/master/data_loader_split.py
  4 | '''
  5 | import os
  6 | import glob
  7 | import scipy
  8 | import imageio
  9 | import numpy as np
 10 | import torch
 11 | 
 12 | ########################################################################################################################
 13 | # camera coordinate system: x-->right, y-->down, z-->scene (opencv/colmap convention)
 14 | # poses is camera-to-world
 15 | ########################################################################################################################
 16 | def find_files(dir, exts):
 17 |     if os.path.isdir(dir):
 18 |         files_grabbed = []
 19 |         for ext in exts:
 20 |             files_grabbed.extend(glob.glob(os.path.join(dir, ext)))
 21 |         if len(files_grabbed) > 0:
 22 |             files_grabbed = sorted(files_grabbed)
 23 |         return files_grabbed
 24 |     else:
 25 |         return []
 26 | 
 27 | 
 28 | def load_data_split(split_dir, skip=1, try_load_min_depth=True, only_img_files=False):
 29 | 
 30 |     def parse_txt(filename):
 31 |         assert os.path.isfile(filename)
 32 |         nums = open(filename).read().split()
 33 |         return np.array([float(x) for x in nums]).reshape([4, 4]).astype(np.float32)
 34 | 
 35 |     if only_img_files:
 36 |         img_files = find_files('{}/rgb'.format(split_dir), exts=['*.png', '*.jpg'])
 37 |         return img_files
 38 | 
 39 |     # camera parameters files
 40 |     intrinsics_files = find_files('{}/intrinsics'.format(split_dir), exts=['*.txt'])
 41 |     pose_files = find_files('{}/pose'.format(split_dir), exts=['*.txt'])
 42 | 
 43 |     intrinsics_files = intrinsics_files[::skip]
 44 |     pose_files = pose_files[::skip]
 45 |     cam_cnt = len(pose_files)
 46 | 
 47 |     # img files
 48 |     img_files = find_files('{}/rgb'.format(split_dir), exts=['*.png', '*.jpg'])
 49 |     if len(img_files) > 0:
 50 |         img_files = img_files[::skip]
 51 |         assert(len(img_files) == cam_cnt)
 52 |     else:
 53 |         img_files = [None, ] * cam_cnt
 54 | 
 55 |     # mask files
 56 |     mask_files = find_files('{}/mask'.format(split_dir), exts=['*.png', '*.jpg'])
 57 |     if len(mask_files) > 0:
 58 |         mask_files = mask_files[::skip]
 59 |         assert(len(mask_files) == cam_cnt)
 60 |     else:
 61 |         mask_files = [None, ] * cam_cnt
 62 | 
 63 |     # min depth files
 64 |     mindepth_files = find_files('{}/min_depth'.format(split_dir), exts=['*.png', '*.jpg'])
 65 |     if try_load_min_depth and len(mindepth_files) > 0:
 66 |         mindepth_files = mindepth_files[::skip]
 67 |         assert(len(mindepth_files) == cam_cnt)
 68 |     else:
 69 |         mindepth_files = [None, ] * cam_cnt
 70 | 
 71 |     return intrinsics_files, pose_files, img_files, mask_files, mindepth_files
 72 | 
 73 | 
 74 | def rerotate_poses(poses, render_poses):
 75 |     poses = np.copy(poses)
 76 |     centroid = poses[:,:3,3].mean(0)
 77 | 
 78 |     poses[:,:3,3] = poses[:,:3,3] - centroid
 79 | 
 80 |     # Find the minimum pca vector with minimum eigen value
 81 |     x = poses[:,:3,3]
 82 |     mu = x.mean(0)
 83 |     cov = np.cov((x-mu).T)
 84 |     ev , eig = np.linalg.eig(cov)
 85 |     cams_up = eig[:,np.argmin(ev)]
 86 |     if cams_up[1] < 0:
 87 |         cams_up = -cams_up
 88 | 
 89 |     # Find rotation matrix that align cams_up with [0,1,0]
 90 |     R = scipy.spatial.transform.Rotation.align_vectors(
 91 |             [[0,-1,0]], cams_up[None])[0].as_matrix()
 92 | 
 93 |     # Apply rotation and add back the centroid position
 94 |     poses[:,:3,:3] = R @ poses[:,:3,:3]
 95 |     poses[:,:3,[3]] = R @ poses[:,:3,[3]]
 96 |     poses[:,:3,3] = poses[:,:3,3] + centroid
 97 |     render_poses = np.copy(render_poses)
 98 |     render_poses[:,:3,3] = render_poses[:,:3,3] - centroid
 99 |     render_poses[:,:3,:3] = R @ render_poses[:,:3,:3]
100 |     render_poses[:,:3,[3]] = R @ render_poses[:,:3,[3]]
101 |     render_poses[:,:3,3] = render_poses[:,:3,3] + centroid
102 |     return poses, render_poses
103 | 
104 | 
105 | def load_nerfpp_data(basedir, rerotate=True):
106 |     tr_K, tr_c2w, tr_im_path = load_data_split(os.path.join(basedir, 'train'))[:3]
107 |     te_K, te_c2w, te_im_path = load_data_split(os.path.join(basedir, 'test'))[:3]
108 |     assert len(tr_K) == len(tr_c2w) and len(tr_K) == len(tr_im_path)
109 |     assert len(te_K) == len(te_c2w) and len(te_K) == len(te_im_path)
110 | 
111 |     # Determine split id list
112 |     i_split = [[], []]
113 |     i = 0
114 |     for _ in tr_c2w:
115 |         i_split[0].append(i)
116 |         i += 1
117 |     for _ in te_c2w:
118 |         i_split[1].append(i)
119 |         i += 1
120 | 
121 |     # Load camera intrinsics. Assume all images share a intrinsic.
122 |     K_flatten = np.loadtxt(tr_K[0])
123 |     for path in tr_K:
124 |         assert np.allclose(np.loadtxt(path), K_flatten)
125 |     for path in te_K:
126 |         assert np.allclose(np.loadtxt(path), K_flatten)
127 |     K = K_flatten.reshape(4,4)[:3,:3]
128 | 
129 |     # Load camera poses
130 |     poses = []
131 |     for path in tr_c2w:
132 |         poses.append(np.loadtxt(path).reshape(4,4))
133 |     for path in te_c2w:
134 |         poses.append(np.loadtxt(path).reshape(4,4))
135 | 
136 |     # Load images
137 |     imgs = []
138 |     for path in tr_im_path:
139 |         imgs.append(imageio.imread(path) / 255.)
140 |     for path in te_im_path:
141 |         imgs.append(imageio.imread(path) / 255.)
142 | 
143 |     # Bundle all data
144 |     imgs = np.stack(imgs, 0)
145 |     poses = np.stack(poses, 0)
146 |     i_split.append(i_split[1])
147 |     H, W = imgs.shape[1:3]
148 |     focal = K[[0,1], [0,1]].mean()
149 | 
150 |     # Generate movie trajectory
151 |     render_poses_path = sorted(glob.glob(os.path.join(basedir, 'camera_path', 'pose', '*txt')))
152 |     render_poses = []
153 |     for path in render_poses_path:
154 |         render_poses.append(np.loadtxt(path).reshape(4,4))
155 |     render_poses = np.array(render_poses)
156 |     render_K = np.loadtxt(glob.glob(os.path.join(basedir, 'camera_path', 'intrinsics', '*txt'))[0]).reshape(4,4)[:3,:3]
157 |     render_poses[:,:,0] *= K[0,0] / render_K[0,0]
158 |     render_poses[:,:,1] *= K[1,1] / render_K[1,1]
159 |     if rerotate:
160 |         poses, render_poses = rerotate_poses(poses, render_poses)
161 | 
162 |     render_poses = torch.Tensor(render_poses)
163 | 
164 |     return imgs, poses, render_poses, [H, W, focal], K, i_split
165 | 
166 | 


--------------------------------------------------------------------------------
/lib/load_nsvf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import torch
 4 | import numpy as np
 5 | import imageio
 6 | import json
 7 | import torch.nn.functional as F
 8 | import cv2
 9 | 
10 | 
11 | trans_t = lambda t : torch.Tensor([
12 |     [1,0,0,0],
13 |     [0,1,0,0],
14 |     [0,0,1,t],
15 |     [0,0,0,1]]).float()
16 | 
17 | rot_phi = lambda phi : torch.Tensor([
18 |     [1,0,0,0],
19 |     [0,np.cos(phi),-np.sin(phi),0],
20 |     [0,np.sin(phi), np.cos(phi),0],
21 |     [0,0,0,1]]).float()
22 | 
23 | rot_theta = lambda th : torch.Tensor([
24 |     [np.cos(th),0,-np.sin(th),0],
25 |     [0,1,0,0],
26 |     [np.sin(th),0, np.cos(th),0],
27 |     [0,0,0,1]]).float()
28 | 
29 | 
30 | def pose_spherical(theta, phi, radius):
31 |     c2w = trans_t(radius)
32 |     c2w = rot_phi(phi/180.*np.pi) @ c2w
33 |     c2w = rot_theta(theta/180.*np.pi) @ c2w
34 |     c2w = torch.Tensor(np.array([[-1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]])) @ c2w
35 |     c2w[:,[1,2]] *= -1
36 |     return c2w
37 | 
38 | 
39 | def load_nsvf_data(basedir):
40 |     pose_paths = sorted(glob.glob(os.path.join(basedir, 'pose', '*txt')))
41 |     rgb_paths = sorted(glob.glob(os.path.join(basedir, 'rgb', '*png')))
42 | 
43 |     all_poses = []
44 |     all_imgs = []
45 |     i_split = [[], [], []]
46 |     for i, (pose_path, rgb_path) in enumerate(zip(pose_paths, rgb_paths)):
47 |         i_set = int(os.path.split(rgb_path)[-1][0])
48 |         all_imgs.append((imageio.imread(rgb_path) / 255.).astype(np.float32))
49 |         all_poses.append(np.loadtxt(pose_path).astype(np.float32))
50 |         i_split[i_set].append(i)
51 | 
52 |     imgs = np.stack(all_imgs, 0)
53 |     poses = np.stack(all_poses, 0)
54 | 
55 |     H, W = imgs[0].shape[:2]
56 |     with open(os.path.join(basedir, 'intrinsics.txt')) as f:
57 |         focal = float(f.readline().split()[0])
58 | 
59 |     R = np.sqrt((poses[...,:3,3]**2).sum(-1)).mean()
60 |     render_poses = torch.stack([pose_spherical(angle, -30.0, R) for angle in np.linspace(-180,180,200+1)[:-1]], 0)
61 | 
62 |     return imgs, poses, render_poses, [H, W, focal], i_split
63 | 
64 | 


--------------------------------------------------------------------------------
/lib/load_tankstemple.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import torch
 4 | import numpy as np
 5 | import imageio
 6 | import json
 7 | import torch.nn.functional as F
 8 | import cv2
 9 | 
10 | 
11 | def normalize(x):
12 |     return x / np.linalg.norm(x)
13 | 
14 | def load_tankstemple_data(basedir, movie_render_kwargs={}):
15 |     pose_paths = sorted(glob.glob(os.path.join(basedir, 'pose', '*txt')))
16 |     rgb_paths = sorted(glob.glob(os.path.join(basedir, 'rgb', '*png')))
17 | 
18 |     all_poses = []
19 |     all_imgs = []
20 |     i_split = [[], []]
21 |     for i, (pose_path, rgb_path) in enumerate(zip(pose_paths, rgb_paths)):
22 |         i_set = int(os.path.split(rgb_path)[-1][0])
23 |         all_poses.append(np.loadtxt(pose_path).astype(np.float32))
24 |         all_imgs.append((imageio.imread(rgb_path) / 255.).astype(np.float32))
25 |         i_split[i_set].append(i)
26 | 
27 |     imgs = np.stack(all_imgs, 0)
28 |     poses = np.stack(all_poses, 0)
29 |     i_split.append(i_split[-1])
30 | 
31 |     path_intrinsics = os.path.join(basedir, 'intrinsics.txt')
32 |     H, W = imgs[0].shape[:2]
33 |     K = np.loadtxt(path_intrinsics)
34 |     focal = float(K[0,0])
35 | 
36 |     ### generate spiral poses for rendering fly-through movie
37 |     centroid = poses[:,:3,3].mean(0)
38 |     radcircle = movie_render_kwargs.get('scale_r', 1.0) * np.linalg.norm(poses[:,:3,3] - centroid, axis=-1).mean()
39 |     centroid[0] += movie_render_kwargs.get('shift_x', 0)
40 |     centroid[1] += movie_render_kwargs.get('shift_y', 0)
41 |     centroid[2] += movie_render_kwargs.get('shift_z', 0)
42 |     new_up_rad = movie_render_kwargs.get('pitch_deg', 0) * np.pi / 180
43 |     target_y = radcircle * np.tan(new_up_rad)
44 | 
45 |     render_poses = []
46 | 
47 |     for th in np.linspace(0., 2.*np.pi, 200):
48 |         camorigin = np.array([radcircle * np.cos(th), 0, radcircle * np.sin(th)])
49 |         if movie_render_kwargs.get('flip_up_vec', False):
50 |             up = np.array([0,-1.,0])
51 |         else:
52 |             up = np.array([0,1.,0])
53 |         vec2 = normalize(camorigin)
54 |         vec0 = normalize(np.cross(vec2, up))
55 |         vec1 = normalize(np.cross(vec2, vec0))
56 |         pos = camorigin + centroid
57 |         # rotate to align with new pitch rotation
58 |         lookat = -vec2
59 |         lookat[1] = target_y
60 |         lookat = normalize(lookat)
61 |         lookat *= -1
62 |         vec2 = -lookat
63 |         vec1 = normalize(np.cross(vec2, vec0))
64 | 
65 |         p = np.stack([vec0, vec1, vec2, pos], 1)
66 | 
67 |         render_poses.append(p)
68 | 
69 |     render_poses = np.stack(render_poses, 0)
70 |     render_poses = np.concatenate([render_poses, np.broadcast_to(poses[0,:3,-1:], render_poses[:,:3,-1:].shape)], -1)
71 | 
72 |     return imgs, poses, render_poses, [H, W, focal], K, i_split
73 | 
74 | 


--------------------------------------------------------------------------------
/lib/masked_adam.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.cpp_extension import load
 4 | 
 5 | parent_dir = os.path.dirname(os.path.abspath(__file__))
 6 | sources=['cuda/adam_upd.cpp', 'cuda/adam_upd_kernel.cu']
 7 | adam_upd_cuda = load(
 8 |         name='adam_upd_cuda',
 9 |         sources=[os.path.join(parent_dir, path) for path in sources],
10 |         verbose=True)
11 | 
12 | 
13 | ''' Extend Adam optimizer
14 | 1. support per-voxel learning rate
15 | 2. masked update (ignore zero grad) which speeduping training
16 | '''
17 | class MaskedAdam(torch.optim.Optimizer):
18 | 
19 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.99), eps=1e-8):
20 |         if not 0.0 <= lr:
21 |             raise ValueError("Invalid learning rate: {}".format(lr))
22 |         if not 0.0 <= eps:
23 |             raise ValueError("Invalid epsilon value: {}".format(eps))
24 |         if not 0.0 <= betas[0] < 1.0:
25 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
26 |         if not 0.0 <= betas[1] < 1.0:
27 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
28 |         defaults = dict(lr=lr, betas=betas, eps=eps)
29 |         self.per_lr = None
30 |         super(MaskedAdam, self).__init__(params, defaults)
31 | 
32 |     def __setstate__(self, state):
33 |         super(MaskedAdam, self).__setstate__(state)
34 | 
35 |     def set_pervoxel_lr(self, count):
36 |         assert self.param_groups[0]['params'][0].shape == count.shape
37 |         self.per_lr = count.float() / count.max()
38 | 
39 |     @torch.no_grad()
40 |     def step(self):
41 |         for group in self.param_groups:
42 |             lr = group['lr']
43 |             beta1, beta2 = group['betas']
44 |             eps = group['eps']
45 |             skip_zero_grad = group['skip_zero_grad']
46 | 
47 |             for param in group['params']:
48 |                 if param.grad is not None:
49 |                     state = self.state[param]
50 |                     # Lazy state initialization
51 |                     if len(state) == 0:
52 |                         state['step'] = 0
53 |                         # Exponential moving average of gradient values
54 |                         state['exp_avg'] = torch.zeros_like(param, memory_format=torch.preserve_format)
55 |                         # Exponential moving average of squared gradient values
56 |                         state['exp_avg_sq'] = torch.zeros_like(param, memory_format=torch.preserve_format)
57 | 
58 |                     state['step'] += 1
59 | 
60 |                     if self.per_lr is not None and param.shape == self.per_lr.shape:
61 |                         adam_upd_cuda.adam_upd_with_perlr(
62 |                                 param, param.grad, state['exp_avg'], state['exp_avg_sq'], self.per_lr,
63 |                                 state['step'], beta1, beta2, lr, eps)
64 |                     elif skip_zero_grad:
65 |                         adam_upd_cuda.masked_adam_upd(
66 |                                 param, param.grad, state['exp_avg'], state['exp_avg_sq'],
67 |                                 state['step'], beta1, beta2, lr, eps)
68 |                     else:
69 |                         adam_upd_cuda.adam_upd(
70 |                                 param, param.grad, state['exp_avg'], state['exp_avg_sq'],
71 |                                 state['step'], beta1, beta2, lr, eps)
72 | 
73 | 


--------------------------------------------------------------------------------
/lib/utils.py:
--------------------------------------------------------------------------------
  1 | import os, math
  2 | import numpy as np
  3 | import scipy.signal
  4 | from typing import List, Optional
  5 | 
  6 | from torch import Tensor
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | from .masked_adam import MaskedAdam
 12 | 
 13 | 
 14 | ''' Misc
 15 | '''
 16 | mse2psnr = lambda x : -10. * torch.log10(x)
 17 | to8b = lambda x : (255*np.clip(x,0,1)).astype(np.uint8)
 18 | 
 19 | def create_optimizer_or_freeze_model(model, cfg_train, global_step):
 20 |     decay_steps = cfg_train.lrate_decay * 1000
 21 |     decay_factor = 0.1 ** (global_step/decay_steps)
 22 | 
 23 |     param_group = []
 24 |     for k in cfg_train.keys():
 25 |         if not k.startswith('lrate_'):
 26 |             continue
 27 |         k = k[len('lrate_'):]
 28 | 
 29 |         if not hasattr(model, k):
 30 |             continue
 31 | 
 32 |         param = getattr(model, k)
 33 |         if param is None:
 34 |             print(f'create_optimizer_or_freeze_model: param {k} not exist')
 35 |             continue
 36 | 
 37 |         lr = getattr(cfg_train, f'lrate_{k}') * decay_factor
 38 |         if lr > 0:
 39 |             print(f'create_optimizer_or_freeze_model: param {k} lr {lr}')
 40 |             if isinstance(param, nn.Module):
 41 |                 param = param.parameters()
 42 |             param_group.append({'params': param, 'lr': lr, 'skip_zero_grad': (k in cfg_train.skip_zero_grad_fields)})
 43 |         else:
 44 |             print(f'create_optimizer_or_freeze_model: param {k} freeze')
 45 |             param.requires_grad = False
 46 |     return MaskedAdam(param_group)
 47 | 
 48 | 
 49 | ''' Checkpoint utils
 50 | '''
 51 | def load_checkpoint(model, optimizer, ckpt_path, no_reload_optimizer):
 52 |     ckpt = torch.load(ckpt_path)
 53 |     start = ckpt['global_step']
 54 |     model.load_state_dict(ckpt['model_state_dict'])
 55 |     if not no_reload_optimizer:
 56 |         optimizer.load_state_dict(ckpt['optimizer_state_dict'])
 57 |     return model, optimizer, start
 58 | 
 59 | 
 60 | def load_model(model_class, ckpt_path):
 61 |     ckpt = torch.load(ckpt_path)
 62 |     model = model_class(**ckpt['model_kwargs'])
 63 |     model.load_state_dict(ckpt['model_state_dict'])
 64 |     return model
 65 | 
 66 | 
 67 | ''' Evaluation metrics (ssim, lpips)
 68 | '''
 69 | def rgb_ssim(img0, img1, max_val,
 70 |              filter_size=11,
 71 |              filter_sigma=1.5,
 72 |              k1=0.01,
 73 |              k2=0.03,
 74 |              return_map=False):
 75 |     # Modified from https://github.com/google/mipnerf/blob/16e73dfdb52044dcceb47cda5243a686391a6e0f/internal/math.py#L58
 76 |     assert len(img0.shape) == 3
 77 |     assert img0.shape[-1] == 3
 78 |     assert img0.shape == img1.shape
 79 | 
 80 |     # Construct a 1D Gaussian blur filter.
 81 |     hw = filter_size // 2
 82 |     shift = (2 * hw - filter_size + 1) / 2
 83 |     f_i = ((np.arange(filter_size) - hw + shift) / filter_sigma)**2
 84 |     filt = np.exp(-0.5 * f_i)
 85 |     filt /= np.sum(filt)
 86 | 
 87 |     # Blur in x and y (faster than the 2D convolution).
 88 |     def convolve2d(z, f):
 89 |         return scipy.signal.convolve2d(z, f, mode='valid')
 90 | 
 91 |     filt_fn = lambda z: np.stack([
 92 |         convolve2d(convolve2d(z[...,i], filt[:, None]), filt[None, :])
 93 |         for i in range(z.shape[-1])], -1)
 94 |     mu0 = filt_fn(img0)
 95 |     mu1 = filt_fn(img1)
 96 |     mu00 = mu0 * mu0
 97 |     mu11 = mu1 * mu1
 98 |     mu01 = mu0 * mu1
 99 |     sigma00 = filt_fn(img0**2) - mu00
100 |     sigma11 = filt_fn(img1**2) - mu11
101 |     sigma01 = filt_fn(img0 * img1) - mu01
102 | 
103 |     # Clip the variances and covariances to valid values.
104 |     # Variance must be non-negative:
105 |     sigma00 = np.maximum(0., sigma00)
106 |     sigma11 = np.maximum(0., sigma11)
107 |     sigma01 = np.sign(sigma01) * np.minimum(
108 |         np.sqrt(sigma00 * sigma11), np.abs(sigma01))
109 |     c1 = (k1 * max_val)**2
110 |     c2 = (k2 * max_val)**2
111 |     numer = (2 * mu01 + c1) * (2 * sigma01 + c2)
112 |     denom = (mu00 + mu11 + c1) * (sigma00 + sigma11 + c2)
113 |     ssim_map = numer / denom
114 |     ssim = np.mean(ssim_map)
115 |     return ssim_map if return_map else ssim
116 | 
117 | 
118 | __LPIPS__ = {}
119 | def init_lpips(net_name, device):
120 |     assert net_name in ['alex', 'vgg']
121 |     import lpips
122 |     print(f'init_lpips: lpips_{net_name}')
123 |     return lpips.LPIPS(net=net_name, version='0.1').eval().to(device)
124 | 
125 | def rgb_lpips(np_gt, np_im, net_name, device):
126 |     if net_name not in __LPIPS__:
127 |         __LPIPS__[net_name] = init_lpips(net_name, device)
128 |     gt = torch.from_numpy(np_gt).permute([2, 0, 1]).contiguous().to(device)
129 |     im = torch.from_numpy(np_im).permute([2, 0, 1]).contiguous().to(device)
130 |     return __LPIPS__[net_name](gt, im, normalize=True).item()
131 | 
132 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | tqdm
 4 | lpips
 5 | mmcv
 6 | imageio
 7 | imageio-ffmpeg
 8 | opencv-python
 9 | torch_efficient_distloss
10 | 
11 | 


--------------------------------------------------------------------------------
/tools/colmap_utils/colmap_read_model.py:
--------------------------------------------------------------------------------
  1 | # Source: https://github.com/Fyusion/LLFF
  2 | # Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
  3 | # All rights reserved.
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | #     * Redistributions of source code must retain the above copyright
  9 | #       notice, this list of conditions and the following disclaimer.
 10 | #
 11 | #     * Redistributions in binary form must reproduce the above copyright
 12 | #       notice, this list of conditions and the following disclaimer in the
 13 | #       documentation and/or other materials provided with the distribution.
 14 | #
 15 | #     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
 16 | #       its contributors may be used to endorse or promote products derived
 17 | #       from this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 22 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
 23 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 24 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 25 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 26 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 27 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 28 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 29 | # POSSIBILITY OF SUCH DAMAGE.
 30 | #
 31 | # Author: Johannes L. Schoenberger (jsch at inf.ethz.ch)
 32 | 
 33 | import os
 34 | import sys
 35 | import collections
 36 | import numpy as np
 37 | import struct
 38 | 
 39 | 
 40 | CameraModel = collections.namedtuple(
 41 |     "CameraModel", ["model_id", "model_name", "num_params"])
 42 | Camera = collections.namedtuple(
 43 |     "Camera", ["id", "model", "width", "height", "params"])
 44 | BaseImage = collections.namedtuple(
 45 |     "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
 46 | Point3D = collections.namedtuple(
 47 |     "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
 48 | 
 49 | class Image(BaseImage):
 50 |     def qvec2rotmat(self):
 51 |         return qvec2rotmat(self.qvec)
 52 | 
 53 | 
 54 | CAMERA_MODELS = {
 55 |     CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
 56 |     CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
 57 |     CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
 58 |     CameraModel(model_id=3, model_name="RADIAL", num_params=5),
 59 |     CameraModel(model_id=4, model_name="OPENCV", num_params=8),
 60 |     CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
 61 |     CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
 62 |     CameraModel(model_id=7, model_name="FOV", num_params=5),
 63 |     CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
 64 |     CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
 65 |     CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
 66 | }
 67 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) \
 68 |                          for camera_model in CAMERA_MODELS])
 69 | 
 70 | 
 71 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
 72 |     """Read and unpack the next bytes from a binary file.
 73 |     :param fid:
 74 |     :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
 75 |     :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
 76 |     :param endian_character: Any of {@, =, <, >, !}
 77 |     :return: Tuple of read and unpacked values.
 78 |     """
 79 |     data = fid.read(num_bytes)
 80 |     return struct.unpack(endian_character + format_char_sequence, data)
 81 | 
 82 | 
 83 | def read_cameras_text(path):
 84 |     """
 85 |     see: src/base/reconstruction.cc
 86 |         void Reconstruction::WriteCamerasText(const std::string& path)
 87 |         void Reconstruction::ReadCamerasText(const std::string& path)
 88 |     """
 89 |     cameras = {}
 90 |     with open(path, "r") as fid:
 91 |         while True:
 92 |             line = fid.readline()
 93 |             if not line:
 94 |                 break
 95 |             line = line.strip()
 96 |             if len(line) > 0 and line[0] != "#":
 97 |                 elems = line.split()
 98 |                 camera_id = int(elems[0])
 99 |                 model = elems[1]
100 |                 width = int(elems[2])
101 |                 height = int(elems[3])
102 |                 params = np.array(tuple(map(float, elems[4:])))
103 |                 cameras[camera_id] = Camera(id=camera_id, model=model,
104 |                                             width=width, height=height,
105 |                                             params=params)
106 |     return cameras
107 | 
108 | 
109 | def read_cameras_binary(path_to_model_file):
110 |     """
111 |     see: src/base/reconstruction.cc
112 |         void Reconstruction::WriteCamerasBinary(const std::string& path)
113 |         void Reconstruction::ReadCamerasBinary(const std::string& path)
114 |     """
115 |     cameras = {}
116 |     with open(path_to_model_file, "rb") as fid:
117 |         num_cameras = read_next_bytes(fid, 8, "Q")[0]
118 |         for camera_line_index in range(num_cameras):
119 |             camera_properties = read_next_bytes(
120 |                 fid, num_bytes=24, format_char_sequence="iiQQ")
121 |             camera_id = camera_properties[0]
122 |             model_id = camera_properties[1]
123 |             model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
124 |             width = camera_properties[2]
125 |             height = camera_properties[3]
126 |             num_params = CAMERA_MODEL_IDS[model_id].num_params
127 |             params = read_next_bytes(fid, num_bytes=8*num_params,
128 |                                      format_char_sequence="d"*num_params)
129 |             cameras[camera_id] = Camera(id=camera_id,
130 |                                         model=model_name,
131 |                                         width=width,
132 |                                         height=height,
133 |                                         params=np.array(params))
134 |         assert len(cameras) == num_cameras
135 |     return cameras
136 | 
137 | 
138 | def read_images_text(path):
139 |     """
140 |     see: src/base/reconstruction.cc
141 |         void Reconstruction::ReadImagesText(const std::string& path)
142 |         void Reconstruction::WriteImagesText(const std::string& path)
143 |     """
144 |     images = {}
145 |     with open(path, "r") as fid:
146 |         while True:
147 |             line = fid.readline()
148 |             if not line:
149 |                 break
150 |             line = line.strip()
151 |             if len(line) > 0 and line[0] != "#":
152 |                 elems = line.split()
153 |                 image_id = int(elems[0])
154 |                 qvec = np.array(tuple(map(float, elems[1:5])))
155 |                 tvec = np.array(tuple(map(float, elems[5:8])))
156 |                 camera_id = int(elems[8])
157 |                 image_name = elems[9]
158 |                 elems = fid.readline().split()
159 |                 xys = np.column_stack([tuple(map(float, elems[0::3])),
160 |                                        tuple(map(float, elems[1::3]))])
161 |                 point3D_ids = np.array(tuple(map(int, elems[2::3])))
162 |                 images[image_id] = Image(
163 |                     id=image_id, qvec=qvec, tvec=tvec,
164 |                     camera_id=camera_id, name=image_name,
165 |                     xys=xys, point3D_ids=point3D_ids)
166 |     return images
167 | 
168 | 
169 | def read_images_binary(path_to_model_file):
170 |     """
171 |     see: src/base/reconstruction.cc
172 |         void Reconstruction::ReadImagesBinary(const std::string& path)
173 |         void Reconstruction::WriteImagesBinary(const std::string& path)
174 |     """
175 |     images = {}
176 |     with open(path_to_model_file, "rb") as fid:
177 |         num_reg_images = read_next_bytes(fid, 8, "Q")[0]
178 |         for image_index in range(num_reg_images):
179 |             binary_image_properties = read_next_bytes(
180 |                 fid, num_bytes=64, format_char_sequence="idddddddi")
181 |             image_id = binary_image_properties[0]
182 |             qvec = np.array(binary_image_properties[1:5])
183 |             tvec = np.array(binary_image_properties[5:8])
184 |             camera_id = binary_image_properties[8]
185 |             image_name = ""
186 |             current_char = read_next_bytes(fid, 1, "c")[0]
187 |             while current_char != b"\x00":   # look for the ASCII 0 entry
188 |                 image_name += current_char.decode("utf-8")
189 |                 current_char = read_next_bytes(fid, 1, "c")[0]
190 |             num_points2D = read_next_bytes(fid, num_bytes=8,
191 |                                            format_char_sequence="Q")[0]
192 |             x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
193 |                                        format_char_sequence="ddq"*num_points2D)
194 |             xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
195 |                                    tuple(map(float, x_y_id_s[1::3]))])
196 |             point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
197 |             images[image_id] = Image(
198 |                 id=image_id, qvec=qvec, tvec=tvec,
199 |                 camera_id=camera_id, name=image_name,
200 |                 xys=xys, point3D_ids=point3D_ids)
201 |     return images
202 | 
203 | 
204 | def read_points3D_text(path):
205 |     """
206 |     see: src/base/reconstruction.cc
207 |         void Reconstruction::ReadPoints3DText(const std::string& path)
208 |         void Reconstruction::WritePoints3DText(const std::string& path)
209 |     """
210 |     points3D = {}
211 |     with open(path, "r") as fid:
212 |         while True:
213 |             line = fid.readline()
214 |             if not line:
215 |                 break
216 |             line = line.strip()
217 |             if len(line) > 0 and line[0] != "#":
218 |                 elems = line.split()
219 |                 point3D_id = int(elems[0])
220 |                 xyz = np.array(tuple(map(float, elems[1:4])))
221 |                 rgb = np.array(tuple(map(int, elems[4:7])))
222 |                 error = float(elems[7])
223 |                 image_ids = np.array(tuple(map(int, elems[8::2])))
224 |                 point2D_idxs = np.array(tuple(map(int, elems[9::2])))
225 |                 points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
226 |                                                error=error, image_ids=image_ids,
227 |                                                point2D_idxs=point2D_idxs)
228 |     return points3D
229 | 
230 | 
231 | def read_points3d_binary(path_to_model_file):
232 |     """
233 |     see: src/base/reconstruction.cc
234 |         void Reconstruction::ReadPoints3DBinary(const std::string& path)
235 |         void Reconstruction::WritePoints3DBinary(const std::string& path)
236 |     """
237 |     points3D = {}
238 |     with open(path_to_model_file, "rb") as fid:
239 |         num_points = read_next_bytes(fid, 8, "Q")[0]
240 |         for point_line_index in range(num_points):
241 |             binary_point_line_properties = read_next_bytes(
242 |                 fid, num_bytes=43, format_char_sequence="QdddBBBd")
243 |             point3D_id = binary_point_line_properties[0]
244 |             xyz = np.array(binary_point_line_properties[1:4])
245 |             rgb = np.array(binary_point_line_properties[4:7])
246 |             error = np.array(binary_point_line_properties[7])
247 |             track_length = read_next_bytes(
248 |                 fid, num_bytes=8, format_char_sequence="Q")[0]
249 |             track_elems = read_next_bytes(
250 |                 fid, num_bytes=8*track_length,
251 |                 format_char_sequence="ii"*track_length)
252 |             image_ids = np.array(tuple(map(int, track_elems[0::2])))
253 |             point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
254 |             points3D[point3D_id] = Point3D(
255 |                 id=point3D_id, xyz=xyz, rgb=rgb,
256 |                 error=error, image_ids=image_ids,
257 |                 point2D_idxs=point2D_idxs)
258 |     return points3D
259 | 
260 | 
261 | def read_model(path, ext):
262 |     if ext == ".txt":
263 |         cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
264 |         images = read_images_text(os.path.join(path, "images" + ext))
265 |         points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
266 |     else:
267 |         cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
268 |         images = read_images_binary(os.path.join(path, "images" + ext))
269 |         points3D = read_points3d_binary(os.path.join(path, "points3D") + ext)
270 |     return cameras, images, points3D
271 | 
272 | 
273 | def qvec2rotmat(qvec):
274 |     return np.array([
275 |         [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
276 |          2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
277 |          2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
278 |         [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
279 |          1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
280 |          2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
281 |         [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
282 |          2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
283 |          1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
284 | 
285 | 
286 | def rotmat2qvec(R):
287 |     Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
288 |     K = np.array([
289 |         [Rxx - Ryy - Rzz, 0, 0, 0],
290 |         [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
291 |         [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
292 |         [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
293 |     eigvals, eigvecs = np.linalg.eigh(K)
294 |     qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
295 |     if qvec[0] < 0:
296 |         qvec *= -1
297 |     return qvec
298 | 
299 | 
300 | def main():
301 |     if len(sys.argv) != 3:
302 |         print("Usage: python read_model.py path/to/model/folder [.txt,.bin]")
303 |         return
304 | 
305 |     cameras, images, points3D = read_model(path=sys.argv[1], ext=sys.argv[2])
306 | 
307 |     print("num_cameras:", len(cameras))
308 |     print("num_images:", len(images))
309 |     print("num_points3D:", len(points3D))
310 | 
311 | 
312 | if __name__ == "__main__":
313 |     main()
314 | 


--------------------------------------------------------------------------------
/tools/colmap_utils/colmap_wrapper.py:
--------------------------------------------------------------------------------
 1 | # Source: https://github.com/Fyusion/LLFF
 2 | import os
 3 | import subprocess
 4 | 
 5 | 
 6 | 
 7 | # $ DATASET_PATH=/path/to/dataset
 8 | 
 9 | # $ colmap feature_extractor \
10 | #    --database_path $DATASET_PATH/database.db \
11 | #    --image_path $DATASET_PATH/images
12 | 
13 | # $ colmap exhaustive_matcher \
14 | #    --database_path $DATASET_PATH/database.db
15 | 
16 | # $ mkdir $DATASET_PATH/sparse
17 | 
18 | # $ colmap mapper \
19 | #     --database_path $DATASET_PATH/database.db \
20 | #     --image_path $DATASET_PATH/images \
21 | #     --output_path $DATASET_PATH/sparse
22 | 
23 | # $ mkdir $DATASET_PATH/dense
24 | def run_colmap(basedir, match_type):
25 |     
26 |     logfile_name = os.path.join(basedir, 'colmap_output.txt')
27 |     logfile = open(logfile_name, 'w')
28 |     
29 |     feature_extractor_args = [
30 |         'colmap', 'feature_extractor', 
31 |             '--database_path', os.path.join(basedir, 'database.db'), 
32 |             '--image_path', os.path.join(basedir, 'source'),
33 |             '--ImageReader.single_camera', '1',
34 |             # '--SiftExtraction.use_gpu', '0',
35 |     ]
36 |     feat_output = ( subprocess.check_output(feature_extractor_args, universal_newlines=True) )
37 |     logfile.write(feat_output)
38 |     print('Features extracted')
39 | 
40 |     exhaustive_matcher_args = [
41 |         'colmap', match_type, 
42 |             '--database_path', os.path.join(basedir, 'database.db'), 
43 |     ]
44 | 
45 |     match_output = ( subprocess.check_output(exhaustive_matcher_args, universal_newlines=True) )
46 |     logfile.write(match_output)
47 |     print('Features matched')
48 |     
49 |     p = os.path.join(basedir, 'sparse')
50 |     if not os.path.exists(p):
51 |         os.makedirs(p)
52 | 
53 |     # mapper_args = [
54 |     #     'colmap', 'mapper', 
55 |     #         '--database_path', os.path.join(basedir, 'database.db'), 
56 |     #         '--image_path', os.path.join(basedir, 'images'),
57 |     #         '--output_path', os.path.join(basedir, 'sparse'),
58 |     #         '--Mapper.num_threads', '16',
59 |     #         '--Mapper.init_min_tri_angle', '4',
60 |     # ]
61 |     mapper_args = [
62 |         'colmap', 'mapper',
63 |             '--database_path', os.path.join(basedir, 'database.db'),
64 |             '--image_path', os.path.join(basedir, 'source'),
65 |             '--output_path', os.path.join(basedir, 'sparse'), # --export_path changed to --output_path in colmap 3.6
66 |             '--Mapper.num_threads', '16',
67 |             '--Mapper.init_min_tri_angle', '4',
68 |             '--Mapper.multiple_models', '0',
69 |             '--Mapper.extract_colors', '0',
70 |     ]
71 | 
72 |     map_output = ( subprocess.check_output(mapper_args, universal_newlines=True) )
73 |     logfile.write(map_output)
74 |     print('Sparse map created')
75 | 
76 |     undistorter = [
77 |         'colmap', 'image_undistorter',
78 |         '--image_path', os.path.join(basedir, 'source'),
79 |         '--input_path', os.path.join(basedir, 'sparse', '0'),
80 |         '--output_path', os.path.join(basedir, 'dense'),
81 |         '--output_type', 'COLMAP',
82 |     ]
83 |     undistort_output = subprocess.check_output(undistorter, universal_newlines=True)
84 |     logfile.write(undistort_output)
85 |     print('Undistort images')
86 | 
87 |     logfile.close()
88 |     print( 'Finished running COLMAP, see {} for logs'.format(logfile_name) )
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/tools/colmap_utils/pose_utils.py:
--------------------------------------------------------------------------------
  1 | # Source: https://github.com/Fyusion/LLFF
  2 | import numpy as np
  3 | import os
  4 | import sys
  5 | import imageio
  6 | import skimage.transform
  7 | 
  8 | from .colmap_wrapper import run_colmap
  9 | from . import colmap_read_model as read_model
 10 | 
 11 | 
 12 | def load_colmap_data(realdir):
 13 |     
 14 |     #camerasfile = os.path.join(realdir, 'sparse/0/cameras.bin')
 15 |     camerasfile = os.path.join(realdir, 'dense/sparse/cameras.bin')
 16 |     camdata = read_model.read_cameras_binary(camerasfile)
 17 |     
 18 |     # cam = camdata[camdata.keys()[0]]
 19 |     list_of_keys = list(camdata.keys())
 20 |     cam = camdata[list_of_keys[0]]
 21 |     print( 'Cameras', len(cam))
 22 | 
 23 |     h, w, f = cam.height, cam.width, cam.params[0]
 24 |     # w, h, f = factor * w, factor * h, factor * f
 25 |     hwf = np.array([h,w,f]).reshape([3,1])
 26 |     
 27 |     #imagesfile = os.path.join(realdir, 'sparse/0/images.bin')
 28 |     imagesfile = os.path.join(realdir, 'dense/sparse/images.bin')
 29 |     imdata = read_model.read_images_binary(imagesfile)
 30 |     
 31 |     w2c_mats = []
 32 |     bottom = np.array([0,0,0,1.]).reshape([1,4])
 33 |     
 34 |     names = [imdata[k].name for k in imdata]
 35 |     print( 'Images #', len(names))
 36 |     perm = np.argsort(names)
 37 |     for k in imdata:
 38 |         im = imdata[k]
 39 |         R = im.qvec2rotmat()
 40 |         t = im.tvec.reshape([3,1])
 41 |         m = np.concatenate([np.concatenate([R, t], 1), bottom], 0)
 42 |         w2c_mats.append(m)
 43 |     
 44 |     w2c_mats = np.stack(w2c_mats, 0)
 45 |     c2w_mats = np.linalg.inv(w2c_mats)
 46 |     
 47 |     poses = c2w_mats[:, :3, :4].transpose([1,2,0])
 48 |     poses = np.concatenate([poses, np.tile(hwf[..., np.newaxis], [1,1,poses.shape[-1]])], 1)
 49 |     
 50 |     points3dfile = os.path.join(realdir, 'dense/sparse/points3D.bin')
 51 |     pts3d = read_model.read_points3d_binary(points3dfile)
 52 |     
 53 |     # must switch to [-u, r, -t] from [r, -u, t], NOT [r, u, -t]
 54 |     poses = np.concatenate([poses[:, 1:2, :], poses[:, 0:1, :], -poses[:, 2:3, :], poses[:, 3:4, :], poses[:, 4:5, :]], 1)
 55 |     
 56 |     return poses, pts3d, perm, names
 57 | 
 58 | 
 59 | def save_poses(basedir, poses, pts3d, perm, names):
 60 |     pts_arr = []
 61 |     vis_arr = []
 62 |     for k in pts3d:
 63 |         pts_arr.append(pts3d[k].xyz)
 64 |         cams = [0] * poses.shape[-1]
 65 |         for ind in pts3d[k].image_ids:
 66 |             if len(cams) < ind - 1:
 67 |                 print('ERROR: the correct camera poses for current points cannot be accessed')
 68 |                 return
 69 |             cams[ind-1] = 1
 70 |         vis_arr.append(cams)
 71 | 
 72 |     pts_arr = np.array(pts_arr)
 73 |     vis_arr = np.array(vis_arr)
 74 |     print( 'Points', pts_arr.shape, 'Visibility', vis_arr.shape )
 75 |     
 76 |     zvals = np.sum(-(pts_arr[:, np.newaxis, :].transpose([2,0,1]) - poses[:3, 3:4, :]) * poses[:3, 2:3, :], 0)
 77 |     valid_z = zvals[vis_arr==1]
 78 |     print( 'Depth stats', valid_z.min(), valid_z.max(), valid_z.mean() )
 79 |     
 80 |     save_arr = []
 81 |     for i in perm:
 82 |         vis = vis_arr[:, i]
 83 |         zs = zvals[:, i]
 84 |         zs = zs[vis==1]
 85 |         close_depth, inf_depth = np.percentile(zs, .1), np.percentile(zs, 99.9)
 86 |         # print( i, close_depth, inf_depth )
 87 |         
 88 |         save_arr.append(np.concatenate([poses[..., i].ravel(), np.array([close_depth, inf_depth])], 0))
 89 |     save_arr = np.array(save_arr)
 90 |     
 91 |     np.save(os.path.join(basedir, 'poses_bounds.npy'), save_arr)
 92 |     np.save(os.path.join(basedir, 'poses_names.npy'), sorted(names))
 93 | 
 94 | 
 95 | def minify(basedir, factors=[], resolutions=[]):
 96 |     needtoload = False
 97 |     for r in factors:
 98 |         imgdir = os.path.join(basedir, 'images_{}'.format(r))
 99 |         if not os.path.exists(imgdir):
100 |             needtoload = True
101 |     for r in resolutions:
102 |         imgdir = os.path.join(basedir, 'images_{}x{}'.format(r[1], r[0]))
103 |         if not os.path.exists(imgdir):
104 |             needtoload = True
105 |     if not needtoload:
106 |         return
107 | 
108 |     from shutil import copy
109 |     from subprocess import check_output
110 | 
111 |     imgdir = os.path.join(basedir, 'images')
112 |     imgs = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir))]
113 |     imgs = [f for f in imgs if any([f.endswith(ex) for ex in ['JPG', 'jpg', 'png', 'jpeg', 'PNG']])]
114 |     imgdir_orig = imgdir
115 | 
116 |     wd = os.getcwd()
117 | 
118 |     for r in factors + resolutions:
119 |         if isinstance(r, int):
120 |             name = 'images_{}'.format(r)
121 |             resizearg = '{}%'.format(int(100./r))
122 |         else:
123 |             name = 'images_{}x{}'.format(r[1], r[0])
124 |             resizearg = '{}x{}'.format(r[1], r[0])
125 |         imgdir = os.path.join(basedir, name)
126 |         if os.path.exists(imgdir):
127 |             continue
128 | 
129 |         print('Minifying', r, basedir)
130 | 
131 |         os.makedirs(imgdir)
132 |         check_output('cp {}/* {}'.format(imgdir_orig, imgdir), shell=True)
133 | 
134 |         ext = imgs[0].split('.')[-1]
135 |         args = ' '.join(['mogrify', '-resize', resizearg, '-format', 'png', '*.{}'.format(ext)])
136 |         print(args)
137 |         os.chdir(imgdir)
138 |         check_output(args, shell=True)
139 |         os.chdir(wd)
140 | 
141 |         if ext != 'png':
142 |             check_output('rm {}/*.{}'.format(imgdir, ext), shell=True)
143 |             print('Removed duplicates')
144 |         print('Done')
145 | 
146 | 
147 | 
148 | 
149 | def gen_poses(basedir, match_type, factors=None):
150 | 
151 |     files_needed = ['{}.bin'.format(f) for f in ['cameras', 'images', 'points3D']]
152 |     if os.path.exists(os.path.join(basedir, 'sparse/0')):
153 |         files_had = os.listdir(os.path.join(basedir, 'sparse/0'))
154 |     else:
155 |         files_had = []
156 |     if not all([f in files_had for f in files_needed]):
157 |         print( 'Need to run COLMAP' )
158 |         run_colmap(basedir, match_type)
159 |     else:
160 |         print('Don\'t need to run COLMAP')
161 | 
162 |     print( 'Post-colmap')
163 | 
164 |     poses, pts3d, perm, names = load_colmap_data(basedir)
165 | 
166 |     densedir = os.path.join(basedir, 'dense')
167 | 
168 |     save_poses(densedir, poses, pts3d, perm, names)
169 | 
170 |     if factors is not None:
171 |         print( 'Factors:', factors)
172 |         minify(densedir, factors)
173 | 
174 |     print( 'Done with imgs2poses' )
175 | 
176 |     return True
177 | 
178 | 


--------------------------------------------------------------------------------
/tools/imgs2poses.py:
--------------------------------------------------------------------------------
 1 | # Modified from https://github.com/Fyusion/LLFF
 2 | import os
 3 | import sys
 4 | import glob
 5 | 
 6 | from colmap_utils.pose_utils import gen_poses
 7 | 
 8 | 
 9 | def check_structure(scenedir):
10 |     source = os.path.join(scenedir, 'source')
11 |     if not os.path.isdir(source):
12 |         print('Invalid directory structure.')
13 |         print('Please put all your images under', source, '!')
14 |         sys.exit()
15 |     if len(glob.glob(f'{source}/*[JPG\|jpg\|png\|jpeg\|PNG]')) == 0:
16 |         print('Invalid directory structure.')
17 |         print('No image in', source, '!')
18 |         sys.exit()
19 |     print('Directory structure check: PASS.')
20 | 
21 | 
22 | if __name__=='__main__':
23 | 
24 |     import argparse
25 |     parser = argparse.ArgumentParser()
26 |     parser.add_argument('--match_type', type=str,
27 |                         default='exhaustive_matcher', help='type of matcher used.  Valid options: \
28 |                         exhaustive_matcher sequential_matcher.  Other matchers not supported at this time')
29 |     parser.add_argument('scenedir', type=str,
30 |                         help='input scene directory')
31 |     args = parser.parse_args()
32 | 
33 |     if args.match_type != 'exhaustive_matcher' and args.match_type != 'sequential_matcher':
34 |         print('ERROR: matcher type ' + args.match_type + ' is not valid.  Aborting')
35 |         sys.exit()
36 | 
37 |     check_structure(args.scenedir)
38 | 
39 |     gen_poses(args.scenedir, args.match_type, factors=[2,4,8])
40 | 
41 | 


--------------------------------------------------------------------------------
/tools/vis_train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | import open3d as o3d
 4 | 
 5 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 6 | parser.add_argument('path')
 7 | args = parser.parse_args()
 8 | 
 9 | data = np.load(args.path)
10 | xyz_min = data['xyz_min']
11 | xyz_max = data['xyz_max']
12 | cam_lst = data['cam_lst']
13 | 
14 | # Outer aabb
15 | aabb_01 = np.array([[0, 0, 0],
16 |                     [0, 0, 1],
17 |                     [0, 1, 1],
18 |                     [0, 1, 0],
19 |                     [1, 0, 0],
20 |                     [1, 0, 1],
21 |                     [1, 1, 1],
22 |                     [1, 1, 0]])
23 | out_bbox = o3d.geometry.LineSet()
24 | out_bbox.points = o3d.utility.Vector3dVector(xyz_min + aabb_01 * (xyz_max - xyz_min))
25 | out_bbox.colors = o3d.utility.Vector3dVector([[1,0,0] for i in range(12)])
26 | out_bbox.lines = o3d.utility.Vector2iVector([[0,1],[1,2],[2,3],[3,0],[4,5],[5,6],[6,7],[7,4],[0,4],[1,5],[2,6],[3,7]])
27 | 
28 | # Cameras
29 | cam_frustrm_lst = []
30 | for cam in cam_lst:
31 |     cam_frustrm = o3d.geometry.LineSet()
32 |     cam_frustrm.points = o3d.utility.Vector3dVector(cam)
33 |     if len(cam) == 5:
34 |         cam_frustrm.colors = o3d.utility.Vector3dVector([[0,0,0] for i in range(8)])
35 |         cam_frustrm.lines = o3d.utility.Vector2iVector([[0,1],[0,2],[0,3],[0,4],[1,2],[2,4],[4,3],[3,1]])
36 |     elif len(cam) == 8:
37 |         cam_frustrm.colors = o3d.utility.Vector3dVector([[0,0,0] for i in range(12)])
38 |         cam_frustrm.lines = o3d.utility.Vector2iVector([
39 |             [0,1],[1,3],[3,2],[2,0],
40 |             [4,5],[5,7],[7,6],[6,4],
41 |             [0,4],[1,5],[3,7],[2,6],
42 |         ])
43 |     else:
44 |         raise NotImplementedError
45 |     cam_frustrm_lst.append(cam_frustrm)
46 | 
47 | # Show
48 | o3d.visualization.draw_geometries([
49 |     o3d.geometry.TriangleMesh.create_coordinate_frame(size=1.0, origin=xyz_min),
50 |     out_bbox, *cam_frustrm_lst])
51 | 
52 | 


--------------------------------------------------------------------------------
/tools/vis_volume.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | import open3d as o3d
 4 | 
 5 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 6 | parser.add_argument('path')
 7 | parser.add_argument('thres', type=float)
 8 | parser.add_argument('--cam')
 9 | args = parser.parse_args()
10 | 
11 | data = np.load(args.path)
12 | alpha = data['alpha']
13 | rgb = data['rgb']
14 | if rgb.shape[0] < rgb.shape[-1]:
15 |     alpha = np.transpose(alpha, (1,2,0))
16 |     rgb = np.transpose(rgb, (1,2,3,0))
17 | print('Shape', alpha.shape, rgb.shape)
18 | print('Active rate', (alpha > args.thres).mean())
19 | print('Active nums', (alpha > args.thres).sum())
20 | xyz_min = np.array([0,0,0])
21 | xyz_max = np.array(alpha.shape)
22 | 
23 | if args.cam:
24 |     data = np.load(args.cam)
25 |     xyz_min = data['xyz_min']
26 |     xyz_max = data['xyz_max']
27 |     cam_lst = data['cam_lst']
28 |     cam_frustrm_lst = []
29 |     for cam in cam_lst:
30 |         cam_frustrm = o3d.geometry.LineSet()
31 |         cam_frustrm.points = o3d.utility.Vector3dVector(cam)
32 |         if len(cam) == 5:
33 |             cam_frustrm.colors = o3d.utility.Vector3dVector([[0.5,0.5,0.5] for i in range(8)])
34 |             cam_frustrm.lines = o3d.utility.Vector2iVector([[0,1],[0,2],[0,3],[0,4],[1,2],[2,4],[4,3],[3,1]])
35 |         elif len(cam) == 8:
36 |             cam_frustrm.colors = o3d.utility.Vector3dVector([[0.5,0.5,0.5] for i in range(12)])
37 |             cam_frustrm.lines = o3d.utility.Vector2iVector([
38 |                 [0,1],[1,3],[3,2],[2,0],
39 |                 [4,5],[5,7],[7,6],[6,4],
40 |                 [0,4],[1,5],[3,7],[2,6],
41 |             ])
42 |         cam_frustrm_lst.append(cam_frustrm)
43 | else:
44 |     cam_frustrm_lst = []
45 | 
46 | 
47 | aabb_01 = np.array([[0, 0, 0],
48 |                     [0, 0, 1],
49 |                     [0, 1, 1],
50 |                     [0, 1, 0],
51 |                     [1, 0, 0],
52 |                     [1, 0, 1],
53 |                     [1, 1, 1],
54 |                     [1, 1, 0]])
55 | out_bbox = o3d.geometry.LineSet()
56 | out_bbox.points = o3d.utility.Vector3dVector(xyz_min + aabb_01 * (xyz_max - xyz_min))
57 | out_bbox.colors = o3d.utility.Vector3dVector([[1,0,0] for i in range(12)])
58 | out_bbox.lines = o3d.utility.Vector2iVector([[0,1],[1,2],[2,3],[3,0],[4,5],[5,6],[6,7],[7,4],[0,4],[1,5],[2,6],[3,7]])
59 | 
60 | xyz = np.stack((alpha > args.thres).nonzero(), -1)
61 | color = rgb[xyz[:,0], xyz[:,1], xyz[:,2]]
62 | pcd = o3d.geometry.PointCloud()
63 | pcd.points = o3d.utility.Vector3dVector(xyz / alpha.shape * (xyz_max - xyz_min) + xyz_min)
64 | pcd.colors = o3d.utility.Vector3dVector(color[:, :3])
65 | voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size=max((xyz_max - xyz_min) / alpha.shape))
66 | 
67 | def change_background_to_black(vis):
68 |     opt = vis.get_render_option()
69 |     opt.background_color = np.asarray([0, 0, 0])
70 |     return False
71 | 
72 | o3d.visualization.draw_geometries_with_key_callbacks([
73 |     o3d.geometry.TriangleMesh.create_coordinate_frame(size=(xyz_max-xyz_min).min()*0.1, origin=xyz_min),
74 |     out_bbox, voxel_grid, *cam_frustrm_lst,
75 | ], {ord("K"): change_background_to_black})
76 | 
77 | 


--------------------------------------------------------------------------------