├── .gitignore ├── IMPROVING_LOG.md ├── LICENSE ├── README.md ├── configs ├── blendedmvs │ ├── Character.py │ ├── Fountain.py │ ├── Jade.py │ └── Statues.py ├── co3d │ └── donut_369_40208_78816.py ├── custom │ ├── Madoka.py │ ├── Otobai.py │ ├── default_forward_facing.py │ └── default_ubd_inward_facing.py ├── deepvoxels │ ├── armchair.py │ ├── cube.py │ ├── greek.py │ └── vase.py ├── default.py ├── lf │ ├── africa.py │ ├── basket.py │ ├── lf_default.py │ ├── ship.py │ ├── statue.py │ └── torch.py ├── llff │ ├── fern.py │ ├── fern_lg.py │ ├── flower.py │ ├── flower_lg.py │ ├── fortress.py │ ├── fortress_lg.py │ ├── horns.py │ ├── horns_lg.py │ ├── leaves.py │ ├── leaves_lg.py │ ├── llff_default.py │ ├── llff_default_lg.py │ ├── orchids.py │ ├── orchids_lg.py │ ├── room.py │ ├── room_lg.py │ ├── trex.py │ └── trex_lg.py ├── nerf │ ├── chair.py │ ├── drums.py │ ├── ficus.py │ ├── hotdog.py │ ├── lego.py │ ├── materials.py │ ├── mic.py │ ├── ship.py │ └── ship.tensorf.py ├── nerf_unbounded │ ├── bicycle.py │ ├── bonsai.py │ ├── counter.py │ ├── garden.py │ ├── kitchen.py │ ├── nerf_unbounded_default.py │ ├── room.py │ └── stump.py ├── nsvf │ ├── Bike.py │ ├── Lifestyle.py │ ├── Palace.py │ ├── Robot.py │ ├── Spaceship.py │ ├── Steamtrain.py │ ├── Toad.py │ └── Wineholder.py ├── tankstemple │ ├── Barn.py │ ├── Barn_lg.py │ ├── Caterpillar.py │ ├── Caterpillar_lg.py │ ├── Family.py │ ├── Family_lg.py │ ├── Ignatius.py │ ├── Ignatius_lg.py │ ├── Truck.py │ └── Truck_lg.py └── tankstemple_unbounded │ ├── M60.py │ ├── Playground.py │ ├── Train.py │ ├── Truck.py │ └── tt_default.py ├── figs ├── debug_cam_and_bbox.png └── debug_coarse_volume.png ├── lib ├── cuda │ ├── adam_upd.cpp │ ├── adam_upd_kernel.cu │ ├── render_utils.cpp │ ├── render_utils_kernel.cu │ ├── total_variation.cpp │ ├── total_variation_kernel.cu │ ├── ub360_utils.cpp │ └── ub360_utils_kernel.cu ├── dbvgo.py ├── dcvgo.py ├── dmpigo.py ├── dvgo.py ├── grid.py ├── load_blendedmvs.py ├── load_blender.py ├── load_co3d.py ├── load_data.py ├── load_deepvoxels.py ├── load_llff.py ├── load_nerfpp.py ├── load_nsvf.py ├── load_tankstemple.py ├── masked_adam.py └── utils.py ├── requirements.txt ├── run.py └── tools ├── colmap_utils ├── colmap_read_model.py ├── colmap_wrapper.py └── pose_utils.py ├── imgs2poses.py ├── vis_train.py └── vis_volume.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/.ipynb_checkpoints 2 | **/__pycache__ 3 | *.npy 4 | *.npz 5 | *.dae 6 | data 7 | logs 8 | -------------------------------------------------------------------------------- /IMPROVING_LOG.md: -------------------------------------------------------------------------------- 1 | # Improving log 2 | 3 | ### Custom CUDA implementation for efficiency 4 | Some intermediate steps are reimplemented in cuda (`lib/cuda/`), which improves training speed by 5 | **1.8\~3.5**. Below show the results dense grid under `256^3` voxels and `160^3` voxels. *Telsa V100*, *RTX 2080 Ti*, and *RTX 1080 Ti* are tested. The PSNRs of different versions on different machines have about 0.2 PSNR drift. The training speeds of the final version are improved 2--3 times from the original native pytorch implementation. 6 | 7 | --- 8 | 9 | | **num_voxels=256^3** | lego | | mic | | ship | | 10 | |--------------|-------|-------|-------|-------|-------|-------| 11 | | **GPU=V100** | psnr | `mm:ss` | psnr | `mm:ss` | psnr | `mm:ss` | 12 | | native pytorch
[b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 35.51 | `15:10` | 34.39 | `14:11` | 30.05 | `17:04` | 13 | | cuda re-impl. Adam optimizer
[d3783f4](https://github.com/sunset1995/DirectVoxGO/tree/d3783f4) | 35.47 | `08:54` (1.7x) | 34.34 | `06:41` (2.1x) | 30.05 | `10:23` (1.6x) | 14 | | cuda re-impl. rendering
[3de7a6d](https://github.com/sunset1995/DirectVoxGO/tree/3de7a6d) | 35.63 | `06:31` (2.3x) | 34.48 | `04:31` (3.1x) | 30.30 | `08:20` (2.0x) | 15 | | prevent atomic add in alpha2weight
[4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 35.61 | `05:35` (2.7x) | 34.51 | `04:00` (3.5x) | 30.29 | `07:20` (2.3x) | 16 | | | 17 | | **GPU=2080Ti** | 18 | | native pytorch [b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | - | OOM | 34.44 | `18:01` | - | OOM | 19 | | cuda re-impl. [4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 35.61 | `07:19` | 34.49 | `04:30` (4.0x) | 30.29 | `09:53` | 20 | | | 21 | | **GPU=1080Ti** | 22 | | native pytorch [b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 35.76 | `37:22` | 34.47 | `31:18` | 30.09 | `45:28` | 23 | | cuda re-impl. [4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 35.62 | `14:32` (2.6x) | 34.50 | `08:55` (3.5x) | 30.29 | `21:00` (2.2x) | 24 | 25 | ```python 26 | # The model&training config for the results above 27 | coarse_train = dict(N_iters=5000) 28 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000]) 29 | fine_model_and_render = dict(num_voxels=256**3) 30 | ``` 31 | 32 | --- 33 | 34 | | **num_voxels=160^3** | lego | | mic | | ship | | 35 | |--------------|-------|-------|-------|-------|-------|-------| 36 | | **GPU=V100** | psnr | `mm:ss` | psnr | `mm:ss` | psnr | `mm:ss` | 37 | | native pytorch
[b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 34.65 | `08:29` | 33.19 | `07:04` | 29.08 | `10:38` | 38 | | cuda re-impl. Adam optimizer
[d3783f4](https://github.com/sunset1995/DirectVoxGO/tree/d3783f4) | 34.66 | `06:01` (1.4x) | 33.14 | `04:38` (1.5x) | 29.04 | `08:06` (1.3x) | 39 | | cuda re-impl. rendering
[3de7a6d](https://github.com/sunset1995/DirectVoxGO/tree/3de7a6d) | 34.56 | `04:50` (1.8x) | 33.10 | `03:22` (2.1x) | 29.19 | `06:31` (1.6x) | 40 | | prevent atomic add in alpha2weight
[4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 34.58 | `03:58` (2.1x) | 33.12 | `03:00` (2.4x) | 29.17 | `05:46` (1.8x) | 41 | | | 42 | | **GPU=2080Ti** | 43 | | native pytorch [b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 34.68 | `11:27` | 33.18 | `09:19` | 29.13 | `14:35` | 44 | | cuda re-impl. [4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 34.59 | `04:59` (2.3x) | 33.15 | `03:04` (3.0x) | 29.19 | `07:32` (1.9x) | 45 | | | 46 | | **GPU=1080Ti** | 47 | | native pytorch [b076912](https://github.com/sunset1995/DirectVoxGO/tree/b076912) | 34.66 | `22:01` | 33.19 | `17:14` | 29.10 | `29:57` | 48 | | cuda re-impl. [4f4ac99](https://github.com/sunset1995/DirectVoxGO/tree/4f4ac99) | 34.56 | `10:29` (2.1x) | 33.11 | `06:21` (2.7x) | 29.18 | `16:48` (x1.8) | 49 | 50 | ```python 51 | # The model&training config for the results above 52 | coarse_train = dict(N_iters=5000) 53 | fine_train = dict(pg_scale=[1000,2000,3000,4000]) 54 | fine_model_and_render = dict(num_voxels=160**3) 55 | ``` 56 | 57 | --- 58 | 59 | ### Extend for forward-facing scene 60 | The model for forward-facing scene is implemented in `lib/dmpigo.py`. Some main modifications include: 61 | - Use NeRF's NDC warping 62 | - Use Multiplane Image 63 | - The initial probability stopping at each plane is `1/(# of planes)` 64 | - Skip coarse stage training as it don't help in forward-facing scene 65 | - Adopt total variation loss or the quality would degrade 66 | 67 | All config files are in `configs/llff/`. The based config for small model is: 68 | ```python 69 | # See configs/llff/llff_default.py 70 | data = dict( 71 | dataset_type='llff', # use llff dataloader 72 | ndc=True, # use ndc coordinate (only for forward-facing; not support yet) 73 | width=1008, # enforce image width 74 | height=756, # enforce image height 75 | ) 76 | 77 | coarse_train = dict( 78 | N_iters=0, # we don't need the coarse stage training 79 | ) 80 | 81 | fine_train = dict( 82 | N_iters=30000, 83 | N_rand=4096, # it seem that larger batch don't help 84 | pg_scale=[2000,4000,6000,8000], 85 | ray_sampler='flatten', 86 | tv_before=1e9, # enable total variation loss 87 | tv_dense_before=10000, # dense version of total variation loss for the first 10k iterations 88 | weight_tv_density=1e-5, 89 | weight_tv_k0=1e-6, 90 | ) 91 | 92 | fine_model_and_render = dict( 93 | num_voxels=256**3, 94 | mpi_depth=128, # the number of planes in Multiplane Image (work when ndc=True) 95 | rgbnet_dim=9, # it seem that more rgbnet_dim don't help 96 | rgbnet_width=64, # it seem that larger rgbnet_width don't help 97 | world_bound_scale=1, # we don't have to slightly enlarge the ndc 98 | fast_color_thres=1e-3, # the initial probability stopping at each plane is 1/mpi_depth 99 | # so the original 1e-4 would be too passive here 100 | ) 101 | ``` 102 | See `configs/llff/llff_default_lg.py` for the modification for large model. Basically, we double the number of `mpi_depth` and use a larger MLP. 103 | 104 | 105 | **Results**: 106 | - Our training times are measured on single Telsa V100 GPU. 107 | - Training time (`mm:ss`) 108 | | Method | Avg. | Room | Fern | Leaves | Fortress | Orchids | Flower | T-Rex | Horns | 109 | |--|--|--|--|--|--|--|--|--|--| 110 | | NeRF | 30+ hr | 111 | | Ours small | 05:30 | 05:55 | 06:12 | 04:36 | 05:38 | 05:26 | 05:28 | 05:07 | 05:23 | 112 | | Ours large | 16:27 | 17:38 | 18:21 | 14:11 | 16:03 | 17:14 | 16:27 | 15:46 | 16:00 | 113 | - PSNR 114 | | Method | Avg. | Room | Fern | Leaves | Fortress | Orchids | Flower | T-Rex | Horns | 115 | |--|--|--|--|--|--|--|--|--|--| 116 | | NeRF | **26.50** | **32.70** | **25.17** | 20.92 | **31.16** | **20.36** | 27.40 | **26.80** | 27.45 | 117 | | Ours small | 25.83 | 30.88 | 24.69 | 20.81 | 30.09 | 19.82 | 27.34 | 26.04 | 26.98 | 118 | | Ours large | 26.37 | 32.16 | 24.99 | **21.01** | 30.79 | 20.07 | **27.62** | 26.63 | **27.69** | 119 | - SSIM 120 | | Method | Avg. | Room | Fern | Leaves | Fortress | Orchids | Flower | T-Rex | Horns | 121 | |--|--|--|--|--|--|--|--|--|--| 122 | | NeRF | 0.811 | 0.948 | 0.792 | 0.690 | 0.881 | 0.641 | 0.827 | 0.880 | 0.828 | 123 | | Ours small | 0.826 | 0.940 | 0.810 | 0.735 | 0.871 | 0.663 | 0.849 | 0.891 | 0.850 | 124 | | Ours large | **0.840** | **0.951** | **0.821** | **0.745** | **0.890** | **0.673** | **0.856** | **0.909** | **0.877** | 125 | - LPIPS (VGG) 126 | | Method | Avg. | Room | Fern | Leaves | Fortress | Orchids | Flower | T-Rex | Horns | 127 | |--|--|--|--|--|--|--|--|--|--| 128 | | NeRF | 0.250 | 0.178 | 0.280 | 0.316 | 0.171 | 0.321 | 0.219 | 0.249 | 0.268 | 129 | | Ours small | 0.215 | 0.191 | 0.231 | 0.215 | 0.185 | 0.252 | 0.187 | 0.229 | 0.233 | 130 | | Ours large | **0.200** | **0.172** | **0.222** | **0.205** | **0.161** | **0.247** | **0.181** | **0.215** | **0.203** | 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DirectVoxGO 2 | 3 | Direct Voxel Grid Optimization (CVPR2022 Oral, [project page](https://sunset1995.github.io/dvgo/), [DVGO paper](https://arxiv.org/abs/2111.11215), [DVGO v2 paper](https://arxiv.org/abs/2206.05085)). 4 | 5 | https://user-images.githubusercontent.com/2712505/153380311-19d6c3a1-9130-489a-af16-ad36c78f10a9.mp4 6 | 7 | https://user-images.githubusercontent.com/2712505/153380197-991d1689-6418-499c-a192-d757f9a64b64.mp4 8 | 9 | ### Custom casual capturing 10 | A [short guide](https://sunset1995.github.io/dvgo/tutor_forward_facing.html) to capture custom forward-facing scenes and rendering fly-through videos. 11 | 12 | Below are two rgb and depth fly-through videos from custom captured scenes. 13 | 14 | https://user-images.githubusercontent.com/2712505/174267754-619d4f81-dd04-4c50-ba7f-434774cb890e.mp4 15 | 16 | ### Features 17 | - Speedup NeRF by replacing the MLP with the voxel grid. 18 | - Simple scene representation: 19 | - *Volume densities*: dense voxel grid (3D). 20 | - *View-dependent colors*: dense feature grid (4D) + shallow MLP. 21 | - Pytorch cuda extention built just-in-time for another 2--3x speedup. 22 | - O(N) realization for the distortion loss proposed by [mip-nerf 360](https://jonbarron.info/mipnerf360/). 23 | - The loss improves our training time and quality. 24 | - We have released a self-contained pytorch package: [torch_efficient_distloss](https://github.com/sunset1995/torch_efficient_distloss). 25 | - Consider a batch of 8192 rays X 256 points. 26 | - GPU memory consumption: 6192MB => 96MB. 27 | - Run times for 100 iters: 20 sec => 0.2sec. 28 | - Supported datasets: 29 | - *Bounded inward-facing*: [NeRF](https://drive.google.com/drive/folders/128yBriW1IG_3NJ5Rp7APSTZsJqdJdfc1), [NSVF](https://dl.fbaipublicfiles.com/nsvf/dataset/Synthetic_NSVF.zip), [BlendedMVS](https://dl.fbaipublicfiles.com/nsvf/dataset/BlendedMVS.zip), [T&T (masked)](https://dl.fbaipublicfiles.com/nsvf/dataset/TanksAndTemple.zip), [DeepVoxels](https://drive.google.com/open?id=1ScsRlnzy9Bd_n-xw83SP-0t548v63mPH). 30 | - *Unbounded inward-facing*: [T&T](https://drive.google.com/file/d/11KRfN91W1AxAW6lOFs4EeYDbeoQZCi87/view?usp=sharing), [LF](https://drive.google.com/file/d/1gsjDjkbTh4GAR9fFqlIDZ__qR9NYTURQ/view?usp=sharing), [mip-NeRF360](https://jonbarron.info/mipnerf360/). 31 | - *Foward-facing*: [LLFF](https://drive.google.com/drive/folders/14boI-o5hGO9srnWaaogTU5_ji7wkX2S7). 32 | 33 | 34 | ### Installation 35 | ``` 36 | git clone git@github.com:sunset1995/DirectVoxGO.git 37 | cd DirectVoxGO 38 | pip install -r requirements.txt 39 | ``` 40 | [Pytorch](https://pytorch.org/) and [torch_scatter](https://github.com/rusty1s/pytorch_scatter) installation is machine dependent, please install the correct version for your machine. 41 | 42 |
43 | Dependencies (click to expand) 44 | 45 | - `PyTorch`, `numpy`, `torch_scatter`: main computation. 46 | - `scipy`, `lpips`: SSIM and LPIPS evaluation. 47 | - `tqdm`: progress bar. 48 | - `mmcv`: config system. 49 | - `opencv-python`: image processing. 50 | - `imageio`, `imageio-ffmpeg`: images and videos I/O. 51 | - `Ninja`: to build the newly implemented torch extention just-in-time. 52 | - `einops`: torch tensor shaping with pretty api. 53 | - `torch_efficient_distloss`: O(N) realization for the distortion loss. 54 |
55 | 56 | 57 | ## Directory structure for the datasets 58 | 59 |
60 | (click to expand;) 61 | 62 | data 63 | ├── nerf_synthetic # Link: https://drive.google.com/drive/folders/128yBriW1IG_3NJ5Rp7APSTZsJqdJdfc1 64 | │ └── [chair|drums|ficus|hotdog|lego|materials|mic|ship] 65 | │ ├── [train|val|test] 66 | │ │ └── r_*.png 67 | │ └── transforms_[train|val|test].json 68 | │ 69 | ├── Synthetic_NSVF # Link: https://dl.fbaipublicfiles.com/nsvf/dataset/Synthetic_NSVF.zip 70 | │ └── [Bike|Lifestyle|Palace|Robot|Spaceship|Steamtrain|Toad|Wineholder] 71 | │ ├── intrinsics.txt 72 | │ ├── rgb 73 | │ │ └── [0_train|1_val|2_test]_*.png 74 | │ └── pose 75 | │ └── [0_train|1_val|2_test]_*.txt 76 | │ 77 | ├── BlendedMVS # Link: https://dl.fbaipublicfiles.com/nsvf/dataset/BlendedMVS.zip 78 | │ └── [Character|Fountain|Jade|Statues] 79 | │ ├── intrinsics.txt 80 | │ ├── rgb 81 | │ │ └── [0|1|2]_*.png 82 | │ └── pose 83 | │ └── [0|1|2]_*.txt 84 | │ 85 | ├── TanksAndTemple # Link: https://dl.fbaipublicfiles.com/nsvf/dataset/TanksAndTemple.zip 86 | │ └── [Barn|Caterpillar|Family|Ignatius|Truck] 87 | │ ├── intrinsics.txt 88 | │ ├── rgb 89 | │ │ └── [0|1|2]_*.png 90 | │ └── pose 91 | │ └── [0|1|2]_*.txt 92 | │ 93 | ├── deepvoxels # Link: https://drive.google.com/drive/folders/1ScsRlnzy9Bd_n-xw83SP-0t548v63mPH 94 | │ └── [train|validation|test] 95 | │ └── [armchair|cube|greek|vase] 96 | │ ├── intrinsics.txt 97 | │ ├── rgb/*.png 98 | │ └── pose/*.txt 99 | │ 100 | ├── nerf_llff_data # Link: https://drive.google.com/drive/folders/128yBriW1IG_3NJ5Rp7APSTZsJqdJdfc1 101 | │ └── [fern|flower|fortress|horns|leaves|orchids|room|trex] 102 | │ 103 | ├── tanks_and_temples # Link: https://drive.google.com/file/d/11KRfN91W1AxAW6lOFs4EeYDbeoQZCi87/view?usp=sharing 104 | │ └── [tat_intermediate_M60|tat_intermediate_Playground|tat_intermediate_Train|tat_training_Truck] 105 | │ └── [train|test] 106 | │ ├── intrinsics/*txt 107 | │ ├── pose/*txt 108 | │ └── rgb/*jpg 109 | │ 110 | ├── lf_data # Link: https://drive.google.com/file/d/1gsjDjkbTh4GAR9fFqlIDZ__qR9NYTURQ/view?usp=sharing 111 | │ └── [africa|basket|ship|statue|torch] 112 | │ └── [train|test] 113 | │ ├── intrinsics/*txt 114 | │ ├── pose/*txt 115 | │ └── rgb/*jpg 116 | │ 117 | ├── 360_v2 # Link: https://jonbarron.info/mipnerf360/ 118 | │ └── [bicycle|bonsai|counter|garden|kitchen|room|stump] 119 | │ ├── poses_bounds.npy 120 | │ └── [images_2|images_4] 121 | │ 122 | ├── nerf_llff_data # Link: https://drive.google.com/drive/folders/14boI-o5hGO9srnWaaogTU5_ji7wkX2S7 123 | │ └── [fern|flower|fortress|horns|leaves|orchids|room|trex] 124 | │ ├── poses_bounds.npy 125 | │ └── [images_2|images_4] 126 | │ 127 | └── co3d # Link: https://github.com/facebookresearch/co3d 128 | └── [donut|teddybear|umbrella|...] 129 | ├── frame_annotations.jgz 130 | ├── set_lists.json 131 | └── [129_14950_29917|189_20376_35616|...] 132 | ├── images 133 | │ └── frame*.jpg 134 | └── masks 135 | └── frame*.png 136 |
137 | 138 | 139 | 140 | ## GO 141 | 142 | - Training 143 | ```bash 144 | $ python run.py --config configs/nerf/lego.py --render_test 145 | ``` 146 | Use `--i_print` and `--i_weights` to change the log interval. 147 | - Evaluation 148 | To only evaluate the testset `PSNR`, `SSIM`, and `LPIPS` of the trained `lego` without re-training, run: 149 | ```bash 150 | $ python run.py --config configs/nerf/lego.py --render_only --render_test \ 151 | --eval_ssim --eval_lpips_vgg 152 | ``` 153 | Use `--eval_lpips_alex` to evaluate LPIPS with pre-trained Alex net instead of VGG net. 154 | - Render video 155 | ```bash 156 | $ python run.py --config configs/nerf/lego.py --render_only --render_video 157 | ``` 158 | Use `--render_video_factor 4` for a fast preview. 159 | - Reproduction: all config files to reproduce our results. 160 |
161 | (click to expand) 162 | 163 | $ ls configs/* 164 | configs/blendedmvs: 165 | Character.py Fountain.py Jade.py Statues.py 166 | 167 | configs/nerf: 168 | chair.py drums.py ficus.py hotdog.py lego.py materials.py mic.py ship.py 169 | 170 | configs/nsvf: 171 | Bike.py Lifestyle.py Palace.py Robot.py Spaceship.py Steamtrain.py Toad.py Wineholder.py 172 | 173 | configs/tankstemple: 174 | Barn.py Caterpillar.py Family.py Ignatius.py Truck.py 175 | 176 | configs/deepvoxels: 177 | armchair.py cube.py greek.py vase.py 178 | 179 | configs/tankstemple_unbounded: 180 | M60.py Playground.py Train.py Truck.py 181 | 182 | configs/lf: 183 | africa.py basket.py ship.py statue.py torch.py 184 | 185 | configs/nerf_unbounded: 186 | bicycle.py bonsai.py counter.py garden.py kitchen.py room.py stump.py 187 | 188 | configs/llff: 189 | fern.py flower.py fortress.py horns.py leaves.py orchids.py room.py trex.py 190 |
191 | 192 | ### Custom casually captured scenes 193 | Coming soon hopefully. 194 | 195 | ### Development and tuning guide 196 | #### Extention to new dataset 197 | Adjusting the data related config fields to fit your camera coordinate system is recommend before implementing a new one. 198 | We provide two visualization tools for debugging. 199 | 1. Inspect the camera and the allocated BBox. 200 | - Export via `--export_bbox_and_cams_only {filename}.npz`: 201 | ```bash 202 | python run.py --config configs/nerf/mic.py --export_bbox_and_cams_only cam_mic.npz 203 | ``` 204 | - Visualize the result: 205 | ```bash 206 | python tools/vis_train.py cam_mic.npz 207 | ``` 208 | 2. Inspect the learned geometry after coarse optimization. 209 | - Export via `--export_coarse_only {filename}.npz` (assumed `coarse_last.tar` available in the train log): 210 | ```bash 211 | python run.py --config configs/nerf/mic.py --export_coarse_only coarse_mic.npz 212 | ``` 213 | - Visualize the result: 214 | ```bash 215 | python tools/vis_volume.py coarse_mic.npz 0.001 --cam cam_mic.npz 216 | ``` 217 | 218 | | Inspecting the cameras & BBox | Inspecting the learned coarse volume | 219 | |:-:|:-:| 220 | |![](figs/debug_cam_and_bbox.png)|![](figs/debug_coarse_volume.png)| 221 | 222 | 223 | 224 | #### Speed and quality tradeoff 225 | We have reported some ablation experiments in our paper supplementary material. 226 | Setting `N_iters`, `N_rand`, `num_voxels`, `rgbnet_depth`, `rgbnet_width` to larger values or setting `stepsize` to smaller values typically leads to better quality but need more computation. 227 | The `weight_distortion` affects the training speed and quality as well. 228 | Only `stepsize` is tunable in testing phase, while all the other fields should remain the same as training. 229 | 230 | ## Advanced data structure 231 | - **Octree** — [Plenoxels: Radiance Fields without Neural Networks](https://alexyu.net/plenoxels/). 232 | - **Hash** — [Instant Neural Graphics Primitives with a Multiresolution Hash Encoding](https://nvlabs.github.io/instant-ngp/). 233 | - **Factorized components** — [TensoRF: Tensorial Radiance Fields](https://apchenstu.github.io/TensoRF/). 234 | 235 | You will need them for scaling to a higher grid resolution. But we believe our simplest dense grid could still be your good starting point if you have other challenging problems to deal with. 236 | 237 | ## Acknowledgement 238 | The code base is origined from an awesome [nerf-pytorch](https://github.com/yenchenlin/nerf-pytorch) implementation, but it becomes very different from the code base now. 239 | -------------------------------------------------------------------------------- /configs/blendedmvs/Character.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Character' 4 | basedir = './logs/blended_mvs' 5 | 6 | data = dict( 7 | datadir='./data/BlendedMVS/Character/', 8 | dataset_type='blendedmvs', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/blendedmvs/Fountain.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Fountain' 4 | basedir = './logs/blended_mvs' 5 | 6 | data = dict( 7 | datadir='./data/BlendedMVS/Fountain/', 8 | dataset_type='blendedmvs', 9 | inverse_y=True, 10 | white_bkgd=False, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/blendedmvs/Jade.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Jade' 4 | basedir = './logs/blended_mvs' 5 | 6 | data = dict( 7 | datadir='./data/BlendedMVS/Jade/', 8 | dataset_type='blendedmvs', 9 | inverse_y=True, 10 | white_bkgd=False, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/blendedmvs/Statues.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Statues' 4 | basedir = './logs/blended_mvs' 5 | 6 | data = dict( 7 | datadir='./data/BlendedMVS/Statues/', 8 | dataset_type='blendedmvs', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/co3d/donut_369_40208_78816.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_donut_369_40208_78816' 4 | basedir = './logs/co3d' 5 | 6 | data = dict( 7 | datadir='./data/co3d/', 8 | dataset_type='co3d', 9 | annot_path='./data/co3d/donut/frame_annotations.jgz', 10 | split_path='./data/co3d/donut/set_lists.json', 11 | sequence_name='369_40208_78816', 12 | flip_x=True, 13 | flip_y=True, 14 | inverse_y=True, 15 | white_bkgd=False, 16 | ) 17 | 18 | coarse_train = dict( 19 | ray_sampler='flatten', 20 | ) 21 | 22 | -------------------------------------------------------------------------------- /configs/custom/Madoka.py: -------------------------------------------------------------------------------- 1 | _base_ = './default_forward_facing.py' 2 | 3 | expname = 'Madoka' 4 | 5 | data = dict( 6 | datadir='./data/custom/Madoka/dense', 7 | factor=2, 8 | movie_render_kwargs={ 9 | 'scale_r': 1.0, 10 | 'scale_f': 0.8, 11 | 'zrate': 2.0, 12 | 'zdelta': 0.5, 13 | } 14 | ) 15 | 16 | -------------------------------------------------------------------------------- /configs/custom/Otobai.py: -------------------------------------------------------------------------------- 1 | _base_ = './default_forward_facing.py' 2 | 3 | expname = 'Otobai' 4 | 5 | data = dict( 6 | datadir='./data/custom/Otobai/dense', 7 | factor=2, 8 | movie_render_kwargs={ 9 | 'scale_r': 0.8, 10 | 'scale_f': 10.0, 11 | 'zrate': 6.0, 12 | 'zdelta': 0.5, 13 | } 14 | ) 15 | 16 | -------------------------------------------------------------------------------- /configs/custom/default_forward_facing.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | basedir = './logs/custom' 4 | 5 | data = dict( 6 | dataset_type='llff', 7 | load2gpu_on_the_fly=True, 8 | ndc=True, 9 | llffhold=0, 10 | rand_bkgd=True, 11 | movie_render_kwargs={ 12 | 'scale_r': 1.0, # circling radius 13 | 'scale_f': 1.0, # the distance to the looking point of foucs 14 | 'zdelta': 0.5, # amplitude of forward motion 15 | 'zrate': 1.0, # frequency of forward motion 16 | 'N_rots': 1, # number of rotation in 120 frames 17 | } 18 | ) 19 | 20 | coarse_train = dict( 21 | N_iters=0, 22 | ) 23 | 24 | fine_train = dict( 25 | N_iters=30000, 26 | N_rand=4096, 27 | weight_distortion=0.01, 28 | pg_scale=[2000,4000,6000,8000], 29 | decay_after_scale=0.1, 30 | ray_sampler='flatten', 31 | tv_before=1e9, 32 | tv_dense_before=10000, 33 | weight_tv_density=1e-5, 34 | weight_tv_k0=1e-6, 35 | ) 36 | 37 | _mpi_depth = 256 38 | _stepsize = 1.0 39 | 40 | fine_model_and_render = dict( 41 | num_voxels=384*384*_mpi_depth, 42 | mpi_depth=_mpi_depth, 43 | stepsize=_stepsize, 44 | rgbnet_dim=9, 45 | rgbnet_width=64, 46 | world_bound_scale=1, 47 | fast_color_thres=_stepsize/_mpi_depth/5, 48 | ) 49 | 50 | -------------------------------------------------------------------------------- /configs/custom/default_ubd_inward_facing.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | basedir = './logs/custom' 4 | 5 | data = dict( 6 | dataset_type='llff', 7 | spherify=True, 8 | llffhold=0, 9 | bd_factor=None, 10 | white_bkgd=True, 11 | rand_bkgd=True, 12 | unbounded_inward=True, 13 | load2gpu_on_the_fly=True, 14 | ) 15 | 16 | coarse_train = dict(N_iters=0) 17 | 18 | fine_train = dict( 19 | N_iters=40000, 20 | N_rand=4096, 21 | lrate_decay=80, 22 | ray_sampler='flatten', 23 | weight_nearclip=0.0, 24 | weight_distortion=0.01, 25 | pg_scale=[2000,4000,6000,8000,10000,12000,14000,16000], 26 | tv_before=20000, 27 | tv_dense_before=20000, 28 | weight_tv_density=1e-6, 29 | weight_tv_k0=1e-7, 30 | ) 31 | 32 | alpha_init = 1e-4 33 | stepsize = 0.5 34 | 35 | fine_model_and_render = dict( 36 | num_voxels=320**3, 37 | num_voxels_base=320**3, 38 | alpha_init=alpha_init, 39 | stepsize=stepsize, 40 | fast_color_thres={ 41 | '_delete_': True, 42 | 0 : alpha_init*stepsize/10, 43 | 1500: min(alpha_init, 1e-4)*stepsize/5, 44 | 2500: min(alpha_init, 1e-4)*stepsize/2, 45 | 3500: min(alpha_init, 1e-4)*stepsize/1.5, 46 | 4500: min(alpha_init, 1e-4)*stepsize, 47 | 5500: min(alpha_init, 1e-4), 48 | 6500: 1e-4, 49 | }, 50 | world_bound_scale=1, 51 | ) 52 | 53 | -------------------------------------------------------------------------------- /configs/deepvoxels/armchair.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_armchair' 4 | basedir = './logs/deepvoxels' 5 | 6 | data = dict( 7 | datadir='./data/deepvoxels/', 8 | dataset_type='deepvoxels', 9 | scene='armchair', 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/deepvoxels/cube.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_cube' 4 | basedir = './logs/deepvoxels' 5 | 6 | data = dict( 7 | datadir='./data/deepvoxels/', 8 | dataset_type='deepvoxels', 9 | scene='cube', 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/deepvoxels/greek.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_greek' 4 | basedir = './logs/deepvoxels' 5 | 6 | data = dict( 7 | datadir='./data/deepvoxels/', 8 | dataset_type='deepvoxels', 9 | scene='greek', 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/deepvoxels/vase.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_vase' 4 | basedir = './logs/deepvoxels' 5 | 6 | data = dict( 7 | datadir='./data/deepvoxels/', 8 | dataset_type='deepvoxels', 9 | scene='vase', 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/default.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | expname = None # experiment name 4 | basedir = './logs/' # where to store ckpts and logs 5 | 6 | ''' Template of data options 7 | ''' 8 | data = dict( 9 | datadir=None, # path to dataset root folder 10 | dataset_type=None, # blender | nsvf | blendedmvs | tankstemple | deepvoxels | co3d 11 | inverse_y=False, # intrinsict mode (to support blendedmvs, nsvf, tankstemple) 12 | flip_x=False, # to support co3d 13 | flip_y=False, # to support co3d 14 | annot_path='', # to support co3d 15 | split_path='', # to support co3d 16 | sequence_name='', # to support co3d 17 | load2gpu_on_the_fly=False, # do not load all images into gpu (to save gpu memory) 18 | testskip=1, # subsample testset to preview results 19 | white_bkgd=False, # use white background (note that some dataset don't provide alpha and with blended bg color) 20 | rand_bkgd=False, # use random background during training 21 | half_res=False, # [TODO] 22 | bd_factor=.75, 23 | movie_render_kwargs=dict(), 24 | 25 | # Below are forward-facing llff specific settings. 26 | ndc=False, # use ndc coordinate (only for forward-facing; not support yet) 27 | spherify=False, # inward-facing 28 | factor=4, # [TODO] 29 | width=None, # enforce image width 30 | height=None, # enforce image height 31 | llffhold=8, # testsplit 32 | load_depths=False, # load depth 33 | 34 | # Below are unbounded inward-facing specific settings. 35 | unbounded_inward=False, 36 | unbounded_inner_r=1.0, 37 | ) 38 | 39 | ''' Template of training options 40 | ''' 41 | coarse_train = dict( 42 | N_iters=5000, # number of optimization steps 43 | N_rand=8192, # batch size (number of random rays per optimization step) 44 | lrate_density=1e-1, # lr of density voxel grid 45 | lrate_k0=1e-1, # lr of color/feature voxel grid 46 | lrate_rgbnet=1e-3, # lr of the mlp to preduct view-dependent color 47 | lrate_decay=20, # lr decay by 0.1 after every lrate_decay*1000 steps 48 | pervoxel_lr=True, # view-count-based lr 49 | pervoxel_lr_downrate=1, # downsampled image for computing view-count-based lr 50 | ray_sampler='random', # ray sampling strategies 51 | weight_main=1.0, # weight of photometric loss 52 | weight_entropy_last=0.01, # weight of background entropy loss 53 | weight_nearclip=0, 54 | weight_distortion=0, 55 | weight_rgbper=0.1, # weight of per-point rgb loss 56 | tv_every=1, # count total variation loss every tv_every step 57 | tv_after=0, # count total variation loss from tv_from step 58 | tv_before=0, # count total variation before the given number of iterations 59 | tv_dense_before=0, # count total variation densely before the given number of iterations 60 | weight_tv_density=0.0, # weight of total variation loss of density voxel grid 61 | weight_tv_k0=0.0, # weight of total variation loss of color/feature voxel grid 62 | pg_scale=[], # checkpoints for progressive scaling 63 | decay_after_scale=1.0, # decay act_shift after scaling 64 | skip_zero_grad_fields=[], # the variable name to skip optimizing parameters w/ zero grad in each iteration 65 | maskout_lt_nviews=0, 66 | ) 67 | 68 | fine_train = deepcopy(coarse_train) 69 | fine_train.update(dict( 70 | N_iters=20000, 71 | pervoxel_lr=False, 72 | ray_sampler='in_maskcache', 73 | weight_entropy_last=0.001, 74 | weight_rgbper=0.01, 75 | pg_scale=[1000, 2000, 3000, 4000], 76 | skip_zero_grad_fields=['density', 'k0'], 77 | )) 78 | 79 | ''' Template of model and rendering options 80 | ''' 81 | coarse_model_and_render = dict( 82 | num_voxels=1024000, # expected number of voxel 83 | num_voxels_base=1024000, # to rescale delta distance 84 | density_type='DenseGrid', # DenseGrid, TensoRFGrid 85 | k0_type='DenseGrid', # DenseGrid, TensoRFGrid 86 | density_config=dict(), 87 | k0_config=dict(), 88 | mpi_depth=128, # the number of planes in Multiplane Image (work when ndc=True) 89 | nearest=False, # nearest interpolation 90 | pre_act_density=False, # pre-activated trilinear interpolation 91 | in_act_density=False, # in-activated trilinear interpolation 92 | bbox_thres=1e-3, # threshold to determine known free-space in the fine stage 93 | mask_cache_thres=1e-3, # threshold to determine a tighten BBox in the fine stage 94 | rgbnet_dim=0, # feature voxel grid dim 95 | rgbnet_full_implicit=False, # let the colors MLP ignore feature voxel grid 96 | rgbnet_direct=True, # set to False to treat the first 3 dim of feature voxel grid as diffuse rgb 97 | rgbnet_depth=3, # depth of the colors MLP (there are rgbnet_depth-1 intermediate features) 98 | rgbnet_width=128, # width of the colors MLP 99 | alpha_init=1e-6, # set the alpha values everywhere at the begin of training 100 | fast_color_thres=1e-7, # threshold of alpha value to skip the fine stage sampled point 101 | maskout_near_cam_vox=True, # maskout grid points that between cameras and their near planes 102 | world_bound_scale=1, # rescale the BBox enclosing the scene 103 | stepsize=0.5, # sampling stepsize in volume rendering 104 | ) 105 | 106 | fine_model_and_render = deepcopy(coarse_model_and_render) 107 | fine_model_and_render.update(dict( 108 | num_voxels=160**3, 109 | num_voxels_base=160**3, 110 | rgbnet_dim=12, 111 | alpha_init=1e-2, 112 | fast_color_thres=1e-4, 113 | maskout_near_cam_vox=False, 114 | world_bound_scale=1.05, 115 | )) 116 | 117 | del deepcopy 118 | -------------------------------------------------------------------------------- /configs/lf/africa.py: -------------------------------------------------------------------------------- 1 | _base_ = './lf_default.py' 2 | 3 | expname = 'dvgo_Africa_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/lf_data/africa', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/lf/basket.py: -------------------------------------------------------------------------------- 1 | _base_ = './lf_default.py' 2 | 3 | expname = 'dvgo_Basket_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/lf_data/basket', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/lf/lf_default.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | basedir = './logs/lf' 4 | 5 | data = dict( 6 | dataset_type='nerfpp', 7 | inverse_y=True, 8 | white_bkgd=False, 9 | rand_bkgd=True, 10 | unbounded_inward=True, 11 | ) 12 | 13 | coarse_train = dict(N_iters=0) 14 | 15 | fine_train = dict( 16 | N_iters=25000, 17 | N_rand=4096, 18 | ray_sampler='flatten', 19 | weight_distortion=1e-2, 20 | pg_scale=[1000,2000,3000,4000,5000,6000], 21 | decay_after_scale=1.0, 22 | tv_before=1e9, 23 | tv_dense_before=10000, 24 | weight_tv_density=1e-6, 25 | weight_tv_k0=1e-7, 26 | ) 27 | 28 | alpha_init = 1e-4 29 | stepsize = 0.5 30 | 31 | fine_model_and_render = dict( 32 | num_voxels=256**3, 33 | num_voxels_base=256**3, 34 | alpha_init=alpha_init, 35 | stepsize=stepsize, 36 | fast_color_thres={ 37 | '_delete_': True, 38 | 0 : alpha_init*stepsize/10, 39 | 1500: min(alpha_init, 1e-4)*stepsize/5, 40 | 2500: min(alpha_init, 1e-4)*stepsize/2, 41 | 3500: min(alpha_init, 1e-4)*stepsize/1.5, 42 | 4500: min(alpha_init, 1e-4)*stepsize, 43 | 5500: min(alpha_init, 1e-4), 44 | 6500: 1e-4, 45 | }, 46 | world_bound_scale=1, 47 | ) 48 | 49 | -------------------------------------------------------------------------------- /configs/lf/ship.py: -------------------------------------------------------------------------------- 1 | _base_ = './lf_default.py' 2 | 3 | expname = 'dvgo_Ship_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/lf_data/ship', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/lf/statue.py: -------------------------------------------------------------------------------- 1 | _base_ = './lf_default.py' 2 | 3 | expname = 'dvgo_Statue_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/lf_data/statue', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/lf/torch.py: -------------------------------------------------------------------------------- 1 | _base_ = './lf_default.py' 2 | 3 | expname = 'dvgo_Torch_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/lf_data/torch', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/fern.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default.py' 2 | 3 | expname = 'fern' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/fern', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/fern_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default_lg.py' 2 | 3 | expname = 'fern_lg' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/fern', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/flower.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default.py' 2 | 3 | expname = 'flower' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/flower', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/flower_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default_lg.py' 2 | 3 | expname = 'flower_lg' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/flower', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/fortress.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default.py' 2 | 3 | expname = 'fortress' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/fortress', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/fortress_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default_lg.py' 2 | 3 | expname = 'fortress_lg' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/fortress', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/horns.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default.py' 2 | 3 | expname = 'horns' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/horns', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/horns_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default_lg.py' 2 | 3 | expname = 'horns_lg' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/horns', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/leaves.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default.py' 2 | 3 | expname = 'leaves' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/leaves', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/leaves_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default_lg.py' 2 | 3 | expname = 'leaves_lg' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/leaves', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/llff_default.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | basedir = './logs/llff' 4 | 5 | data = dict( 6 | dataset_type='llff', 7 | ndc=True, 8 | width=1008, 9 | height=756, 10 | ) 11 | 12 | coarse_train = dict( 13 | N_iters=0, 14 | ) 15 | 16 | fine_train = dict( 17 | N_iters=30000, 18 | N_rand=4096, 19 | weight_distortion=0.01, 20 | pg_scale=[2000,4000,6000,8000], 21 | ray_sampler='flatten', 22 | tv_before=1e9, 23 | tv_dense_before=10000, 24 | weight_tv_density=1e-5, 25 | weight_tv_k0=1e-6, 26 | ) 27 | 28 | fine_model_and_render = dict( 29 | num_voxels=256**3, 30 | mpi_depth=128, 31 | rgbnet_dim=9, 32 | rgbnet_width=64, 33 | world_bound_scale=1, 34 | fast_color_thres=1e-3, 35 | ) 36 | 37 | -------------------------------------------------------------------------------- /configs/llff/llff_default_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | basedir = './logs/llff' 4 | 5 | data = dict( 6 | dataset_type='llff', 7 | ndc=True, 8 | width=1008, 9 | height=756, 10 | rand_bkgd=True, 11 | ) 12 | 13 | coarse_train = dict( 14 | N_iters=0, 15 | ) 16 | 17 | fine_train = dict( 18 | N_iters=30000, 19 | N_rand=4096, 20 | weight_distortion=0.01, 21 | pg_scale=[2000,4000,6000,8000], 22 | decay_after_scale=0.1, 23 | ray_sampler='flatten', 24 | tv_before=1e9, 25 | tv_dense_before=10000, 26 | weight_tv_density=1e-5, 27 | weight_tv_k0=1e-6, 28 | ) 29 | 30 | _mpi_depth = 256 31 | _stepsize = 1.0 32 | 33 | fine_model_and_render = dict( 34 | num_voxels=384*384*_mpi_depth, 35 | mpi_depth=_mpi_depth, 36 | stepsize=_stepsize, 37 | rgbnet_dim=9, 38 | rgbnet_width=64, 39 | world_bound_scale=1, 40 | fast_color_thres=_stepsize/_mpi_depth/5, 41 | ) 42 | 43 | -------------------------------------------------------------------------------- /configs/llff/orchids.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default.py' 2 | 3 | expname = 'orchids' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/orchids', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/orchids_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default_lg.py' 2 | 3 | expname = 'orchids_lg' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/orchids', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/room.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default.py' 2 | 3 | expname = 'room' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/room', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/room_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default_lg.py' 2 | 3 | expname = 'room_lg' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/room', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/trex.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default.py' 2 | 3 | expname = 'trex' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/trex', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/llff/trex_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = './llff_default_lg.py' 2 | 3 | expname = 'trex_lg' 4 | 5 | data = dict( 6 | datadir='./data/nerf_llff_data/trex', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/nerf/chair.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_chair' 4 | basedir = './logs/nerf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/nerf_synthetic/chair', 8 | dataset_type='blender', 9 | white_bkgd=True, 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /configs/nerf/drums.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_drums' 4 | basedir = './logs/nerf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/nerf_synthetic/drums', 8 | dataset_type='blender', 9 | white_bkgd=True, 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /configs/nerf/ficus.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_ficus' 4 | basedir = './logs/nerf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/nerf_synthetic/ficus', 8 | dataset_type='blender', 9 | white_bkgd=True, 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /configs/nerf/hotdog.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_hotdog' 4 | basedir = './logs/nerf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/nerf_synthetic/hotdog', 8 | dataset_type='blender', 9 | white_bkgd=True, 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /configs/nerf/lego.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_lego' 4 | basedir = './logs/nerf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/nerf_synthetic/lego', 8 | dataset_type='blender', 9 | white_bkgd=True, 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /configs/nerf/materials.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_materials' 4 | basedir = './logs/nerf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/nerf_synthetic/materials', 8 | dataset_type='blender', 9 | white_bkgd=True, 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /configs/nerf/mic.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_mic' 4 | basedir = './logs/nerf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/nerf_synthetic/mic', 8 | dataset_type='blender', 9 | white_bkgd=True, 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /configs/nerf/ship.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_ship' 4 | basedir = './logs/nerf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/nerf_synthetic/ship', 8 | dataset_type='blender', 9 | white_bkgd=True, 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /configs/nerf/ship.tensorf.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_ship_tensorf' 4 | basedir = './logs/nerf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/nerf_synthetic/ship', 8 | dataset_type='blender', 9 | white_bkgd=True, 10 | ) 11 | 12 | fine_train = dict( 13 | lrate_density=0.02, 14 | lrate_k0=0.02, 15 | pg_scale=[1000,2000,3000,4000,5000,6000], 16 | ) 17 | 18 | fine_model_and_render = dict( 19 | num_voxels=384**3, 20 | density_type='TensoRFGrid', 21 | density_config=dict(n_comp=8), 22 | k0_type='TensoRFGrid', 23 | k0_config=dict(n_comp=24), 24 | ) 25 | 26 | -------------------------------------------------------------------------------- /configs/nerf_unbounded/bicycle.py: -------------------------------------------------------------------------------- 1 | _base_ = './nerf_unbounded_default.py' 2 | 3 | expname = 'dvgo_bicycle_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/360_v2/bicycle', 7 | factor=4, # 1237x822 8 | movie_render_kwargs=dict( 9 | shift_x=0.0, # positive right 10 | shift_y=0, # negative down 11 | shift_z=0, 12 | scale_r=1.0, 13 | pitch_deg=-10, # negative look downward 14 | ), 15 | ) 16 | 17 | -------------------------------------------------------------------------------- /configs/nerf_unbounded/bonsai.py: -------------------------------------------------------------------------------- 1 | _base_ = './nerf_unbounded_default.py' 2 | 3 | expname = 'dvgo_bonsai_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/360_v2/bonsai', 7 | factor=2, # 1559x1039 8 | movie_render_kwargs=dict( 9 | shift_x=0.0, # positive right 10 | shift_y=0, # negative down 11 | shift_z=0, 12 | scale_r=1.0, 13 | pitch_deg=-30, # negative look downward 14 | ), 15 | ) 16 | 17 | -------------------------------------------------------------------------------- /configs/nerf_unbounded/counter.py: -------------------------------------------------------------------------------- 1 | _base_ = './nerf_unbounded_default.py' 2 | 3 | expname = 'dvgo_counter_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/360_v2/counter', 7 | factor=2, # 1558x1038 8 | movie_render_kwargs=dict( 9 | shift_x=0.0, # positive right 10 | shift_y=-0.2, # negative down 11 | shift_z=0, 12 | scale_r=0.9, 13 | pitch_deg=-30, # negative look downward 14 | ), 15 | ) 16 | 17 | -------------------------------------------------------------------------------- /configs/nerf_unbounded/garden.py: -------------------------------------------------------------------------------- 1 | _base_ = './nerf_unbounded_default.py' 2 | 3 | expname = 'dvgo_garden_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/360_v2/garden', 7 | factor=4, # 1297x840 8 | movie_render_kwargs=dict( 9 | shift_x=0.0, # positive right 10 | shift_y=-0.0, # negative down 11 | shift_z=0, 12 | scale_r=0.9, 13 | pitch_deg=-30, 14 | ), 15 | ) 16 | 17 | -------------------------------------------------------------------------------- /configs/nerf_unbounded/kitchen.py: -------------------------------------------------------------------------------- 1 | _base_ = './nerf_unbounded_default.py' 2 | 3 | expname = 'dvgo_kitchen_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/360_v2/kitchen', 7 | factor=2, # 1558x1039 8 | movie_render_kwargs=dict( 9 | shift_y=-0.0, 10 | scale_r=0.9, 11 | pitch_deg=-40, 12 | ), 13 | ) 14 | 15 | -------------------------------------------------------------------------------- /configs/nerf_unbounded/nerf_unbounded_default.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | basedir = './logs/nerf_unbounded' 4 | 5 | data = dict( 6 | dataset_type='llff', 7 | spherify=True, 8 | factor=4, 9 | llffhold=8, 10 | white_bkgd=True, 11 | rand_bkgd=True, 12 | unbounded_inward=True, 13 | load2gpu_on_the_fly=True, 14 | ) 15 | 16 | coarse_train = dict(N_iters=0) 17 | 18 | fine_train = dict( 19 | N_iters=40000, 20 | N_rand=4096, 21 | lrate_decay=80, 22 | ray_sampler='flatten', 23 | weight_nearclip=1.0, 24 | weight_distortion=0.01, 25 | pg_scale=[2000,4000,6000,8000,10000,12000,14000,16000], 26 | tv_before=20000, 27 | tv_dense_before=20000, 28 | weight_tv_density=1e-6, 29 | weight_tv_k0=1e-7, 30 | ) 31 | 32 | alpha_init = 1e-4 33 | stepsize = 0.5 34 | 35 | fine_model_and_render = dict( 36 | num_voxels=320**3, 37 | num_voxels_base=320**3, 38 | alpha_init=alpha_init, 39 | stepsize=stepsize, 40 | fast_color_thres={ 41 | '_delete_': True, 42 | 0 : alpha_init*stepsize/10, 43 | 1500: min(alpha_init, 1e-4)*stepsize/5, 44 | 2500: min(alpha_init, 1e-4)*stepsize/2, 45 | 3500: min(alpha_init, 1e-4)*stepsize/1.5, 46 | 4500: min(alpha_init, 1e-4)*stepsize, 47 | 5500: min(alpha_init, 1e-4), 48 | 6500: 1e-4, 49 | }, 50 | world_bound_scale=1, 51 | ) 52 | 53 | -------------------------------------------------------------------------------- /configs/nerf_unbounded/room.py: -------------------------------------------------------------------------------- 1 | _base_ = './nerf_unbounded_default.py' 2 | 3 | expname = 'dvgo_room_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/360_v2/room', 7 | factor=2, # 1557x1038 8 | movie_render_kwargs=dict( 9 | shift_x=0.0, # positive right 10 | shift_y=-0.3, # negative down 11 | shift_z=0, 12 | scale_r=0.2, 13 | pitch_deg=-40, # negative look downward 14 | ), 15 | ) 16 | 17 | -------------------------------------------------------------------------------- /configs/nerf_unbounded/stump.py: -------------------------------------------------------------------------------- 1 | _base_ = './nerf_unbounded_default.py' 2 | 3 | expname = 'dvgo_stump_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/360_v2/stump', 7 | factor=4, 8 | movie_render_kwargs=dict( 9 | shift_x=0.0, # positive right 10 | shift_y=-0.2, # negative down 11 | shift_z=0, 12 | scale_r=0.8, 13 | pitch_deg=-20, # negative look downward 14 | ), 15 | ) 16 | 17 | -------------------------------------------------------------------------------- /configs/nsvf/Bike.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Bike' 4 | basedir = './logs/nsvf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/Synthetic_NSVF/Bike', 8 | dataset_type='nsvf', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/nsvf/Lifestyle.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Lifestyle' 4 | basedir = './logs/nsvf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/Synthetic_NSVF/Lifestyle', 8 | dataset_type='nsvf', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/nsvf/Palace.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Palace' 4 | basedir = './logs/nsvf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/Synthetic_NSVF/Palace', 8 | dataset_type='nsvf', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/nsvf/Robot.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Robot' 4 | basedir = './logs/nsvf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/Synthetic_NSVF/Robot', 8 | dataset_type='nsvf', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/nsvf/Spaceship.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Spaceship' 4 | basedir = './logs/nsvf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/Synthetic_NSVF/Spaceship', 8 | dataset_type='nsvf', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/nsvf/Steamtrain.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Steamtrain' 4 | basedir = './logs/nsvf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/Synthetic_NSVF/Steamtrain', 8 | dataset_type='nsvf', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/nsvf/Toad.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Toad' 4 | basedir = './logs/nsvf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/Synthetic_NSVF/Toad', 8 | dataset_type='nsvf', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/nsvf/Wineholder.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Wineholder' 4 | basedir = './logs/nsvf_synthetic' 5 | 6 | data = dict( 7 | datadir='./data/Synthetic_NSVF/Wineholder', 8 | dataset_type='nsvf', 9 | inverse_y=True, 10 | white_bkgd=True, 11 | ) 12 | 13 | -------------------------------------------------------------------------------- /configs/tankstemple/Barn.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Barn' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Barn', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | ) 13 | 14 | coarse_train = dict( 15 | pervoxel_lr_downrate=2, 16 | ) 17 | 18 | -------------------------------------------------------------------------------- /configs/tankstemple/Barn_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Barn_lg' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Barn', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | movie_render_kwargs={'flip_up_vec': True}, 13 | ) 14 | 15 | coarse_train = dict( 16 | pervoxel_lr_downrate=2, 17 | ) 18 | 19 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000]) 20 | fine_model_and_render = dict(num_voxels=256**3) 21 | 22 | -------------------------------------------------------------------------------- /configs/tankstemple/Caterpillar.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Caterpillar' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Caterpillar', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | ) 13 | 14 | coarse_train = dict( 15 | pervoxel_lr_downrate=2, 16 | ) 17 | 18 | -------------------------------------------------------------------------------- /configs/tankstemple/Caterpillar_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Caterpillar_lg' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Caterpillar', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | ) 13 | 14 | coarse_train = dict( 15 | pervoxel_lr_downrate=2, 16 | ) 17 | 18 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000]) 19 | fine_model_and_render = dict(num_voxels=256**3) 20 | 21 | -------------------------------------------------------------------------------- /configs/tankstemple/Family.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Family' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Family', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | ) 13 | 14 | coarse_train = dict( 15 | pervoxel_lr_downrate=2, 16 | ) 17 | 18 | -------------------------------------------------------------------------------- /configs/tankstemple/Family_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Family_lg' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Family', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | movie_render_kwargs={'pitch_deg': 20}, 13 | ) 14 | 15 | coarse_train = dict( 16 | pervoxel_lr_downrate=2, 17 | ) 18 | 19 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000]) 20 | fine_model_and_render = dict(num_voxels=256**3) 21 | 22 | -------------------------------------------------------------------------------- /configs/tankstemple/Ignatius.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Ignatius' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Ignatius', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | ) 13 | 14 | coarse_train = dict( 15 | pervoxel_lr_downrate=2, 16 | ) 17 | 18 | -------------------------------------------------------------------------------- /configs/tankstemple/Ignatius_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Ignatius_lg' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Ignatius', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | ) 13 | 14 | coarse_train = dict( 15 | pervoxel_lr_downrate=2, 16 | ) 17 | 18 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000]) 19 | fine_model_and_render = dict(num_voxels=256**3) 20 | 21 | -------------------------------------------------------------------------------- /configs/tankstemple/Truck.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Truck' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Truck', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | ) 13 | 14 | coarse_train = dict( 15 | pervoxel_lr_downrate=2, 16 | ) 17 | 18 | -------------------------------------------------------------------------------- /configs/tankstemple/Truck_lg.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | expname = 'dvgo_Truck_lg' 4 | basedir = './logs/tanks_and_temple' 5 | 6 | data = dict( 7 | datadir='./data/TanksAndTemple/Truck', 8 | dataset_type='tankstemple', 9 | inverse_y=True, 10 | load2gpu_on_the_fly=True, 11 | white_bkgd=True, 12 | movie_render_kwargs={'flip_up_vec': True, 'shift_y': -0.1}, 13 | ) 14 | 15 | coarse_train = dict( 16 | pervoxel_lr_downrate=2, 17 | ) 18 | 19 | fine_train = dict(pg_scale=[1000,2000,3000,4000,5000,6000]) 20 | fine_model_and_render = dict(num_voxels=256**3) 21 | 22 | -------------------------------------------------------------------------------- /configs/tankstemple_unbounded/M60.py: -------------------------------------------------------------------------------- 1 | _base_ = './tt_default.py' 2 | 3 | expname = 'dvgo_M60_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/tanks_and_temples/tat_intermediate_M60', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/tankstemple_unbounded/Playground.py: -------------------------------------------------------------------------------- 1 | _base_ = './tt_default.py' 2 | 3 | expname = 'dvgo_Playground_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/tanks_and_temples/tat_intermediate_Playground', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/tankstemple_unbounded/Train.py: -------------------------------------------------------------------------------- 1 | _base_ = './tt_default.py' 2 | 3 | expname = 'dvgo_Train_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/tanks_and_temples/tat_intermediate_Train', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/tankstemple_unbounded/Truck.py: -------------------------------------------------------------------------------- 1 | _base_ = './tt_default.py' 2 | 3 | expname = 'dvgo_Truck_unbounded' 4 | 5 | data = dict( 6 | datadir='./data/tanks_and_temples/tat_training_Truck', 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /configs/tankstemple_unbounded/tt_default.py: -------------------------------------------------------------------------------- 1 | _base_ = '../default.py' 2 | 3 | basedir = './logs/tanks_and_temple_unbounded' 4 | 5 | data = dict( 6 | dataset_type='nerfpp', 7 | inverse_y=True, 8 | white_bkgd=True, 9 | rand_bkgd=True, 10 | unbounded_inward=True, 11 | load2gpu_on_the_fly=True, 12 | ) 13 | 14 | coarse_train = dict(N_iters=0) 15 | 16 | fine_train = dict( 17 | N_iters=30000, 18 | N_rand=4096, 19 | ray_sampler='flatten', 20 | weight_distortion=0.01, 21 | pg_scale=[1000,2000,3000,4000,5000,6000,7000], 22 | tv_before=1e9, 23 | tv_dense_before=10000, 24 | weight_tv_density=1e-6, 25 | weight_tv_k0=1e-7, 26 | ) 27 | 28 | alpha_init = 1e-4 29 | stepsize = 0.5 30 | 31 | fine_model_and_render = dict( 32 | num_voxels=320**3, 33 | num_voxels_base=320**3, 34 | alpha_init=alpha_init, 35 | stepsize=stepsize, 36 | fast_color_thres={ 37 | '_delete_': True, 38 | 0 : alpha_init*stepsize/10, 39 | 1500: min(alpha_init, 1e-4)*stepsize/5, 40 | 2500: min(alpha_init, 1e-4)*stepsize/2, 41 | 3500: min(alpha_init, 1e-4)*stepsize/1.5, 42 | 4500: min(alpha_init, 1e-4)*stepsize, 43 | 5500: min(alpha_init, 1e-4), 44 | 6500: 1e-4, 45 | }, 46 | world_bound_scale=1, 47 | contracted_norm='l2', 48 | ) 49 | 50 | -------------------------------------------------------------------------------- /figs/debug_cam_and_bbox.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunset1995/DirectVoxGO/341e1fc4e96efff146d42cd6f31b8199a3e536f7/figs/debug_cam_and_bbox.png -------------------------------------------------------------------------------- /figs/debug_coarse_volume.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunset1995/DirectVoxGO/341e1fc4e96efff146d42cd6f31b8199a3e536f7/figs/debug_coarse_volume.png -------------------------------------------------------------------------------- /lib/cuda/adam_upd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | // CUDA forward declarations 6 | 7 | void adam_upd_cuda( 8 | torch::Tensor param, 9 | torch::Tensor grad, 10 | torch::Tensor exp_avg, 11 | torch::Tensor exp_avg_sq, 12 | int step, float beta1, float beta2, float lr, float eps); 13 | 14 | void masked_adam_upd_cuda( 15 | torch::Tensor param, 16 | torch::Tensor grad, 17 | torch::Tensor exp_avg, 18 | torch::Tensor exp_avg_sq, 19 | int step, float beta1, float beta2, float lr, float eps); 20 | 21 | void adam_upd_with_perlr_cuda( 22 | torch::Tensor param, 23 | torch::Tensor grad, 24 | torch::Tensor exp_avg, 25 | torch::Tensor exp_avg_sq, 26 | torch::Tensor perlr, 27 | int step, float beta1, float beta2, float lr, float eps); 28 | 29 | 30 | // C++ interface 31 | 32 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 33 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 34 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 35 | 36 | void adam_upd( 37 | torch::Tensor param, 38 | torch::Tensor grad, 39 | torch::Tensor exp_avg, 40 | torch::Tensor exp_avg_sq, 41 | int step, float beta1, float beta2, float lr, float eps) { 42 | CHECK_INPUT(param); 43 | CHECK_INPUT(grad); 44 | CHECK_INPUT(exp_avg); 45 | CHECK_INPUT(exp_avg_sq); 46 | adam_upd_cuda(param, grad, exp_avg, exp_avg_sq, 47 | step, beta1, beta2, lr, eps); 48 | } 49 | 50 | void masked_adam_upd( 51 | torch::Tensor param, 52 | torch::Tensor grad, 53 | torch::Tensor exp_avg, 54 | torch::Tensor exp_avg_sq, 55 | int step, float beta1, float beta2, float lr, float eps) { 56 | CHECK_INPUT(param); 57 | CHECK_INPUT(grad); 58 | CHECK_INPUT(exp_avg); 59 | CHECK_INPUT(exp_avg_sq); 60 | masked_adam_upd_cuda(param, grad, exp_avg, exp_avg_sq, 61 | step, beta1, beta2, lr, eps); 62 | } 63 | 64 | void adam_upd_with_perlr( 65 | torch::Tensor param, 66 | torch::Tensor grad, 67 | torch::Tensor exp_avg, 68 | torch::Tensor exp_avg_sq, 69 | torch::Tensor perlr, 70 | int step, float beta1, float beta2, float lr, float eps) { 71 | CHECK_INPUT(param); 72 | CHECK_INPUT(grad); 73 | CHECK_INPUT(exp_avg); 74 | CHECK_INPUT(exp_avg_sq); 75 | adam_upd_with_perlr_cuda(param, grad, exp_avg, exp_avg_sq, perlr, 76 | step, beta1, beta2, lr, eps); 77 | } 78 | 79 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 80 | m.def("adam_upd", &adam_upd, 81 | "Adam update"); 82 | m.def("masked_adam_upd", &masked_adam_upd, 83 | "Adam update ignoring zero grad"); 84 | m.def("adam_upd_with_perlr", &adam_upd_with_perlr, 85 | "Adam update ignoring zero grad with per-voxel lr"); 86 | } 87 | 88 | -------------------------------------------------------------------------------- /lib/cuda/adam_upd_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | template 9 | __global__ void adam_upd_cuda_kernel( 10 | scalar_t* __restrict__ param, 11 | const scalar_t* __restrict__ grad, 12 | scalar_t* __restrict__ exp_avg, 13 | scalar_t* __restrict__ exp_avg_sq, 14 | const size_t N, 15 | const float step_size, const float beta1, const float beta2, const float eps) { 16 | 17 | const size_t index = blockIdx.x * blockDim.x + threadIdx.x; 18 | if(index 26 | __global__ void masked_adam_upd_cuda_kernel( 27 | scalar_t* __restrict__ param, 28 | const scalar_t* __restrict__ grad, 29 | scalar_t* __restrict__ exp_avg, 30 | scalar_t* __restrict__ exp_avg_sq, 31 | const size_t N, 32 | const float step_size, const float beta1, const float beta2, const float eps) { 33 | 34 | const size_t index = blockIdx.x * blockDim.x + threadIdx.x; 35 | if(index 43 | __global__ void adam_upd_with_perlr_cuda_kernel( 44 | scalar_t* __restrict__ param, 45 | const scalar_t* __restrict__ grad, 46 | scalar_t* __restrict__ exp_avg, 47 | scalar_t* __restrict__ exp_avg_sq, 48 | scalar_t* __restrict__ perlr, 49 | const size_t N, 50 | const float step_size, const float beta1, const float beta2, const float eps) { 51 | 52 | const size_t index = blockIdx.x * blockDim.x + threadIdx.x; 53 | if(index<<>>( 76 | param.data(), 77 | grad.data(), 78 | exp_avg.data(), 79 | exp_avg_sq.data(), 80 | N, step_size, beta1, beta2, eps); 81 | })); 82 | } 83 | 84 | void masked_adam_upd_cuda( 85 | torch::Tensor param, 86 | torch::Tensor grad, 87 | torch::Tensor exp_avg, 88 | torch::Tensor exp_avg_sq, 89 | const int step, const float beta1, const float beta2, const float lr, const float eps) { 90 | 91 | const size_t N = param.numel(); 92 | 93 | const int threads = 256; 94 | const int blocks = (N + threads - 1) / threads; 95 | 96 | const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step)); 97 | 98 | AT_DISPATCH_FLOATING_TYPES(param.type(), "masked_adam_upd_cuda", ([&] { 99 | masked_adam_upd_cuda_kernel<<>>( 100 | param.data(), 101 | grad.data(), 102 | exp_avg.data(), 103 | exp_avg_sq.data(), 104 | N, step_size, beta1, beta2, eps); 105 | })); 106 | } 107 | 108 | void adam_upd_with_perlr_cuda( 109 | torch::Tensor param, 110 | torch::Tensor grad, 111 | torch::Tensor exp_avg, 112 | torch::Tensor exp_avg_sq, 113 | torch::Tensor perlr, 114 | const int step, const float beta1, const float beta2, const float lr, const float eps) { 115 | 116 | const size_t N = param.numel(); 117 | 118 | const int threads = 256; 119 | const int blocks = (N + threads - 1) / threads; 120 | 121 | const float step_size = lr * sqrt(1 - pow(beta2, (float)step)) / (1 - pow(beta1, (float)step)); 122 | 123 | AT_DISPATCH_FLOATING_TYPES(param.type(), "adam_upd_with_perlr_cuda", ([&] { 124 | adam_upd_with_perlr_cuda_kernel<<>>( 125 | param.data(), 126 | grad.data(), 127 | exp_avg.data(), 128 | exp_avg_sq.data(), 129 | perlr.data(), 130 | N, step_size, beta1, beta2, eps); 131 | })); 132 | } 133 | 134 | -------------------------------------------------------------------------------- /lib/cuda/render_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | // CUDA forward declarations 6 | 7 | std::vector infer_t_minmax_cuda( 8 | torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max, 9 | const float near, const float far); 10 | 11 | torch::Tensor infer_n_samples_cuda(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist); 12 | 13 | std::vector infer_ray_start_dir_cuda(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min); 14 | 15 | std::vector sample_pts_on_rays_cuda( 16 | torch::Tensor rays_o, torch::Tensor rays_d, 17 | torch::Tensor xyz_min, torch::Tensor xyz_max, 18 | const float near, const float far, const float stepdist); 19 | 20 | std::vector sample_ndc_pts_on_rays_cuda( 21 | torch::Tensor rays_o, torch::Tensor rays_d, 22 | torch::Tensor xyz_min, torch::Tensor xyz_max, 23 | const int N_samples); 24 | 25 | torch::Tensor sample_bg_pts_on_rays_cuda( 26 | torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max, 27 | const float bg_preserve, const int N_samples); 28 | 29 | torch::Tensor maskcache_lookup_cuda(torch::Tensor world, torch::Tensor xyz, torch::Tensor xyz2ijk_scale, torch::Tensor xyz2ijk_shift); 30 | 31 | std::vector raw2alpha_cuda(torch::Tensor density, const float shift, const float interval); 32 | std::vector raw2alpha_nonuni_cuda(torch::Tensor density, const float shift, torch::Tensor interval); 33 | 34 | torch::Tensor raw2alpha_backward_cuda(torch::Tensor exp, torch::Tensor grad_back, const float interval); 35 | torch::Tensor raw2alpha_nonuni_backward_cuda(torch::Tensor exp, torch::Tensor grad_back, torch::Tensor interval); 36 | 37 | std::vector alpha2weight_cuda(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays); 38 | 39 | torch::Tensor alpha2weight_backward_cuda( 40 | torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last, 41 | torch::Tensor i_start, torch::Tensor i_end, const int n_rays, 42 | torch::Tensor grad_weights, torch::Tensor grad_last); 43 | 44 | // C++ interface 45 | 46 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 47 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 48 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 49 | 50 | std::vector infer_t_minmax( 51 | torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor xyz_min, torch::Tensor xyz_max, 52 | const float near, const float far) { 53 | CHECK_INPUT(rays_o); 54 | CHECK_INPUT(rays_d); 55 | CHECK_INPUT(xyz_min); 56 | CHECK_INPUT(xyz_max); 57 | return infer_t_minmax_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far); 58 | } 59 | 60 | torch::Tensor infer_n_samples(torch::Tensor rays_d, torch::Tensor t_min, torch::Tensor t_max, const float stepdist) { 61 | CHECK_INPUT(rays_d); 62 | CHECK_INPUT(t_min); 63 | CHECK_INPUT(t_max); 64 | return infer_n_samples_cuda(rays_d, t_min, t_max, stepdist); 65 | } 66 | 67 | std::vector infer_ray_start_dir(torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_min) { 68 | CHECK_INPUT(rays_o); 69 | CHECK_INPUT(rays_d); 70 | CHECK_INPUT(t_min); 71 | return infer_ray_start_dir_cuda(rays_o, rays_d, t_min); 72 | } 73 | 74 | std::vector sample_pts_on_rays( 75 | torch::Tensor rays_o, torch::Tensor rays_d, 76 | torch::Tensor xyz_min, torch::Tensor xyz_max, 77 | const float near, const float far, const float stepdist) { 78 | CHECK_INPUT(rays_o); 79 | CHECK_INPUT(rays_d); 80 | CHECK_INPUT(xyz_min); 81 | CHECK_INPUT(xyz_max); 82 | assert(rays_o.dim()==2); 83 | assert(rays_o.size(1)==3); 84 | return sample_pts_on_rays_cuda(rays_o, rays_d, xyz_min, xyz_max, near, far, stepdist); 85 | } 86 | 87 | std::vector sample_ndc_pts_on_rays( 88 | torch::Tensor rays_o, torch::Tensor rays_d, 89 | torch::Tensor xyz_min, torch::Tensor xyz_max, 90 | const int N_samples) { 91 | CHECK_INPUT(rays_o); 92 | CHECK_INPUT(rays_d); 93 | CHECK_INPUT(xyz_min); 94 | CHECK_INPUT(xyz_max); 95 | assert(rays_o.dim()==2); 96 | assert(rays_o.size(1)==3); 97 | return sample_ndc_pts_on_rays_cuda(rays_o, rays_d, xyz_min, xyz_max, N_samples); 98 | } 99 | 100 | torch::Tensor sample_bg_pts_on_rays( 101 | torch::Tensor rays_o, torch::Tensor rays_d, torch::Tensor t_max, 102 | const float bg_preserve, const int N_samples) { 103 | CHECK_INPUT(rays_o); 104 | CHECK_INPUT(rays_d); 105 | CHECK_INPUT(t_max); 106 | return sample_bg_pts_on_rays_cuda(rays_o, rays_d, t_max, bg_preserve, N_samples); 107 | } 108 | 109 | torch::Tensor maskcache_lookup(torch::Tensor world, torch::Tensor xyz, torch::Tensor xyz2ijk_scale, torch::Tensor xyz2ijk_shift) { 110 | CHECK_INPUT(world); 111 | CHECK_INPUT(xyz); 112 | CHECK_INPUT(xyz2ijk_scale); 113 | CHECK_INPUT(xyz2ijk_shift); 114 | assert(world.dim()==3); 115 | assert(xyz.dim()==2); 116 | assert(xyz.size(1)==3); 117 | return maskcache_lookup_cuda(world, xyz, xyz2ijk_scale, xyz2ijk_shift); 118 | } 119 | 120 | std::vector raw2alpha(torch::Tensor density, const float shift, const float interval) { 121 | CHECK_INPUT(density); 122 | assert(density.dim()==1); 123 | return raw2alpha_cuda(density, shift, interval); 124 | } 125 | std::vector raw2alpha_nonuni(torch::Tensor density, const float shift, torch::Tensor interval) { 126 | CHECK_INPUT(density); 127 | assert(density.dim()==1); 128 | return raw2alpha_nonuni_cuda(density, shift, interval); 129 | } 130 | 131 | torch::Tensor raw2alpha_backward(torch::Tensor exp, torch::Tensor grad_back, const float interval) { 132 | CHECK_INPUT(exp); 133 | CHECK_INPUT(grad_back); 134 | return raw2alpha_backward_cuda(exp, grad_back, interval); 135 | } 136 | torch::Tensor raw2alpha_nonuni_backward(torch::Tensor exp, torch::Tensor grad_back, torch::Tensor interval) { 137 | CHECK_INPUT(exp); 138 | CHECK_INPUT(grad_back); 139 | return raw2alpha_nonuni_backward_cuda(exp, grad_back, interval); 140 | } 141 | 142 | std::vector alpha2weight(torch::Tensor alpha, torch::Tensor ray_id, const int n_rays) { 143 | CHECK_INPUT(alpha); 144 | CHECK_INPUT(ray_id); 145 | assert(alpha.dim()==1); 146 | assert(ray_id.dim()==1); 147 | assert(alpha.sizes()==ray_id.sizes()); 148 | return alpha2weight_cuda(alpha, ray_id, n_rays); 149 | } 150 | 151 | torch::Tensor alpha2weight_backward( 152 | torch::Tensor alpha, torch::Tensor weight, torch::Tensor T, torch::Tensor alphainv_last, 153 | torch::Tensor i_start, torch::Tensor i_end, const int n_rays, 154 | torch::Tensor grad_weights, torch::Tensor grad_last) { 155 | CHECK_INPUT(alpha); 156 | CHECK_INPUT(weight); 157 | CHECK_INPUT(T); 158 | CHECK_INPUT(alphainv_last); 159 | CHECK_INPUT(i_start); 160 | CHECK_INPUT(i_end); 161 | CHECK_INPUT(grad_weights); 162 | CHECK_INPUT(grad_last); 163 | return alpha2weight_backward_cuda( 164 | alpha, weight, T, alphainv_last, 165 | i_start, i_end, n_rays, 166 | grad_weights, grad_last); 167 | } 168 | 169 | 170 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 171 | m.def("infer_t_minmax", &infer_t_minmax, "Inference t_min and t_max of ray-bbox intersection"); 172 | m.def("infer_n_samples", &infer_n_samples, "Inference the number of points to sample on each ray"); 173 | m.def("infer_ray_start_dir", &infer_ray_start_dir, "Inference the starting point and shooting direction of each ray"); 174 | m.def("sample_pts_on_rays", &sample_pts_on_rays, "Sample points on rays"); 175 | m.def("sample_ndc_pts_on_rays", &sample_ndc_pts_on_rays, "Sample points on rays"); 176 | m.def("sample_bg_pts_on_rays", &sample_bg_pts_on_rays, "Sample points on bg"); 177 | m.def("maskcache_lookup", &maskcache_lookup, "Lookup to skip know freespace."); 178 | m.def("raw2alpha", &raw2alpha, "Raw values [-inf, inf] to alpha [0, 1]."); 179 | m.def("raw2alpha_backward", &raw2alpha_backward, "Backward pass of the raw to alpha"); 180 | m.def("raw2alpha_nonuni", &raw2alpha_nonuni, "Raw values [-inf, inf] to alpha [0, 1]."); 181 | m.def("raw2alpha_nonuni_backward", &raw2alpha_nonuni_backward, "Backward pass of the raw to alpha"); 182 | m.def("alpha2weight", &alpha2weight, "Per-point alpha to accumulated blending weight"); 183 | m.def("alpha2weight_backward", &alpha2weight_backward, "Backward pass of alpha2weight"); 184 | } 185 | 186 | -------------------------------------------------------------------------------- /lib/cuda/total_variation.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | // CUDA forward declarations 6 | 7 | void total_variation_add_grad_cuda(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode); 8 | 9 | 10 | // C++ interface 11 | 12 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 13 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 14 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 15 | 16 | void total_variation_add_grad(torch::Tensor param, torch::Tensor grad, float wx, float wy, float wz, bool dense_mode) { 17 | CHECK_INPUT(param); 18 | CHECK_INPUT(grad); 19 | total_variation_add_grad_cuda(param, grad, wx, wy, wz, dense_mode); 20 | } 21 | 22 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 23 | m.def("total_variation_add_grad", &total_variation_add_grad, "Add total variation grad"); 24 | } 25 | 26 | -------------------------------------------------------------------------------- /lib/cuda/total_variation_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | template 9 | __device__ __forceinline__ scalar_t clamp(const scalar_t v, const bound_t lo, const bound_t hi) { 10 | return min(max(v, lo), hi); 11 | } 12 | 13 | template 14 | __global__ void total_variation_add_grad_cuda_kernel( 15 | const scalar_t* __restrict__ param, 16 | scalar_t* __restrict__ grad, 17 | float wx, float wy, float wz, 18 | const size_t sz_i, const size_t sz_j, const size_t sz_k, const size_t N) { 19 | 20 | const size_t index = blockIdx.x * blockDim.x + threadIdx.x; 21 | if(index<<>>( 52 | param.data(), 53 | grad.data(), 54 | wx, wy, wz, 55 | sz_i, sz_j, sz_k, N); 56 | })); 57 | } 58 | else { 59 | AT_DISPATCH_FLOATING_TYPES(param.type(), "total_variation_add_grad_cuda", ([&] { 60 | total_variation_add_grad_cuda_kernel<<>>( 61 | param.data(), 62 | grad.data(), 63 | wx, wy, wz, 64 | sz_i, sz_j, sz_k, N); 65 | })); 66 | } 67 | } 68 | 69 | -------------------------------------------------------------------------------- /lib/cuda/ub360_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | // CUDA forward declarations 6 | 7 | torch::Tensor cumdist_thres_cuda(torch::Tensor dist, float thres); 8 | 9 | // C++ interface 10 | 11 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 12 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 13 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 14 | 15 | torch::Tensor cumdist_thres(torch::Tensor dist, float thres) { 16 | CHECK_INPUT(dist); 17 | return cumdist_thres_cuda(dist, thres); 18 | } 19 | 20 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 21 | m.def("cumdist_thres", &cumdist_thres, "Generate mask for cumulative dist."); 22 | } 23 | 24 | -------------------------------------------------------------------------------- /lib/cuda/ub360_utils_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | /* 9 | helper function to skip oversampled points, 10 | especially near the foreground scene bbox boundary 11 | */ 12 | template 13 | __global__ void cumdist_thres_cuda_kernel( 14 | scalar_t* __restrict__ dist, 15 | const float thres, 16 | const int n_rays, 17 | const int n_pts, 18 | bool* __restrict__ mask) { 19 | const int i_ray = blockIdx.x * blockDim.x + threadIdx.x; 20 | if(i_ray thres); 28 | cum_dist *= float(!over); 29 | mask[i] = over; 30 | } 31 | } 32 | } 33 | 34 | torch::Tensor cumdist_thres_cuda(torch::Tensor dist, float thres) { 35 | const int n_rays = dist.size(0); 36 | const int n_pts = dist.size(1); 37 | const int threads = 256; 38 | const int blocks = (n_rays + threads - 1) / threads; 39 | auto mask = torch::zeros({n_rays, n_pts}, torch::dtype(torch::kBool).device(torch::kCUDA)); 40 | AT_DISPATCH_FLOATING_TYPES(dist.type(), "cumdist_thres_cuda", ([&] { 41 | cumdist_thres_cuda_kernel<<>>( 42 | dist.data(), thres, 43 | n_rays, n_pts, 44 | mask.data()); 45 | })); 46 | return mask; 47 | } 48 | 49 | -------------------------------------------------------------------------------- /lib/dbvgo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import functools 4 | import numpy as np 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | from torch_scatter import segment_coo 11 | 12 | from . import grid 13 | from .dvgo import Raw2Alpha, Alphas2Weights, render_utils_cuda 14 | from .dmpigo import create_full_step_id 15 | 16 | 17 | '''Model''' 18 | class DirectBiVoxGO(nn.Module): 19 | def __init__(self, xyz_min, xyz_max, 20 | num_voxels=0, num_voxels_base=0, 21 | alpha_init=None, 22 | mask_cache_world_size=None, 23 | fast_color_thres=0, bg_preserve=0.5, 24 | density_type='DenseGrid', k0_type='DenseGrid', 25 | density_config={}, k0_config={}, 26 | rgbnet_dim=0, bg_use_mlp=True, 27 | rgbnet_depth=3, rgbnet_width=128, 28 | viewbase_pe=4, 29 | **kwargs): 30 | super(DirectBiVoxGO, self).__init__() 31 | xyz_min = torch.Tensor(xyz_min) 32 | xyz_max = torch.Tensor(xyz_max) 33 | assert len(((xyz_max - xyz_min) * 100000).long().unique()), 'scene bbox must be a cube in DirectBiVoxGO' 34 | self.register_buffer('scene_center', (xyz_min + xyz_max) * 0.5) 35 | self.register_buffer('scene_radius', (xyz_max - xyz_min) * 0.5) 36 | self.register_buffer('xyz_min', torch.Tensor([-1,-1,-1])) 37 | self.register_buffer('xyz_max', torch.Tensor([1,1,1])) 38 | self.fast_color_thres = fast_color_thres 39 | self.bg_preserve = bg_preserve 40 | 41 | # determine based grid resolution 42 | self.num_voxels_base = num_voxels_base 43 | self.voxel_size_base = ((self.xyz_max - self.xyz_min).prod() / self.num_voxels_base).pow(1/3) 44 | 45 | # determine the density bias shift 46 | self.alpha_init = alpha_init 47 | self.register_buffer('act_shift', torch.FloatTensor([np.log(1/(1-alpha_init) - 1)])) 48 | print('dvgo: set density bias shift to', self.act_shift) 49 | 50 | # determine init grid resolution 51 | self._set_grid_resolution(num_voxels) 52 | 53 | # init density voxel grid 54 | self.density_type = density_type 55 | self.density_config = density_config 56 | self.density = nn.ModuleList([ 57 | grid.create_grid( 58 | density_type, channels=1, world_size=self.world_size, 59 | xyz_min=self.xyz_min, xyz_max=self.xyz_max, 60 | config=self.density_config) 61 | for _ in range(2) 62 | ]) 63 | 64 | # init color representation 65 | self.rgbnet_kwargs = { 66 | 'rgbnet_dim': rgbnet_dim, 67 | 'rgbnet_depth': rgbnet_depth, 'rgbnet_width': rgbnet_width, 68 | 'viewbase_pe': viewbase_pe, 69 | } 70 | self.k0_type = k0_type 71 | self.k0_config = k0_config 72 | if rgbnet_dim <= 0: 73 | # color voxel grid (coarse stage) 74 | self.k0_dim = 3 75 | self.k0 = nn.ModuleList([ 76 | grid.create_grid( 77 | k0_type, channels=self.k0_dim, world_size=self.world_size, 78 | xyz_min=self.xyz_min, xyz_max=self.xyz_max, 79 | config=self.k0_config) 80 | for _ in range(2) 81 | ]) 82 | self.rgbnet = None 83 | else: 84 | # feature voxel grid + shallow MLP (fine stage) 85 | self.k0_dim = rgbnet_dim 86 | self.k0 = nn.ModuleList([ 87 | grid.create_grid( 88 | k0_type, channels=self.k0_dim, world_size=self.world_size, 89 | xyz_min=self.xyz_min, xyz_max=self.xyz_max, 90 | config=self.k0_config) 91 | for _ in range(2) 92 | ]) 93 | self.register_buffer('viewfreq', torch.FloatTensor([(2**i) for i in range(viewbase_pe)])) 94 | dim0 = (3+3*viewbase_pe*2) 95 | dim0 += self.k0_dim 96 | self.rgbnet = nn.ModuleList([ 97 | nn.Sequential( 98 | nn.Linear(dim0, rgbnet_width), nn.ReLU(inplace=True), 99 | *[ 100 | nn.Sequential(nn.Linear(rgbnet_width, rgbnet_width), nn.ReLU(inplace=True)) 101 | for _ in range(rgbnet_depth-2) 102 | ], 103 | nn.Linear(rgbnet_width, 3), 104 | ) 105 | for _ in range(2) 106 | ]) 107 | nn.init.constant_(self.rgbnet[0][-1].bias, 0) 108 | nn.init.constant_(self.rgbnet[1][-1].bias, 0) 109 | if not bg_use_mlp: 110 | self.k0[1] = grid.create_grid( 111 | k0_type, channels=3, world_size=self.world_size, 112 | xyz_min=self.xyz_min, xyz_max=self.xyz_max, 113 | config=self.k0_config) 114 | self.rgbnet[1] = None 115 | print('dvgo: feature voxel grid', self.k0) 116 | print('dvgo: mlp', self.rgbnet) 117 | 118 | # Using the coarse geometry if provided (used to determine known free space and unknown space) 119 | # Re-implement as occupancy grid (2021/1/31) 120 | if mask_cache_world_size is None: 121 | mask_cache_world_size = self.world_size 122 | mask = torch.ones(list(mask_cache_world_size), dtype=torch.bool) 123 | self.mask_cache = nn.ModuleList([ 124 | grid.MaskGrid( 125 | path=None, mask=mask, 126 | xyz_min=self.xyz_min, xyz_max=self.xyz_max) 127 | for _ in range(2) 128 | ]) 129 | 130 | def _set_grid_resolution(self, num_voxels): 131 | # Determine grid resolution 132 | self.num_voxels = num_voxels 133 | self.voxel_size = ((self.xyz_max - self.xyz_min).prod() / num_voxels).pow(1/3) 134 | self.world_size = ((self.xyz_max - self.xyz_min) / self.voxel_size).long() 135 | self.voxel_size_ratio = self.voxel_size / self.voxel_size_base 136 | print('dvgo: voxel_size ', self.voxel_size) 137 | print('dvgo: world_size ', self.world_size) 138 | print('dvgo: voxel_size_base ', self.voxel_size_base) 139 | print('dvgo: voxel_size_ratio', self.voxel_size_ratio) 140 | 141 | def get_kwargs(self): 142 | return { 143 | 'xyz_min': self.xyz_min.cpu().numpy(), 144 | 'xyz_max': self.xyz_max.cpu().numpy(), 145 | 'num_voxels': self.num_voxels, 146 | 'num_voxels_base': self.num_voxels_base, 147 | 'alpha_init': self.alpha_init, 148 | 'voxel_size_ratio': self.voxel_size_ratio, 149 | 'mask_cache_world_size': list(self.mask_cache[0].mask.shape), 150 | 'fast_color_thres': self.fast_color_thres, 151 | 'density_type': self.density_type, 152 | 'k0_type': self.k0_type, 153 | 'density_config': self.density_config, 154 | 'k0_config': self.k0_config, 155 | **self.rgbnet_kwargs, 156 | } 157 | 158 | @torch.no_grad() 159 | def scale_volume_grid(self, num_voxels): 160 | print('dvgo: scale_volume_grid start') 161 | ori_world_size = self.world_size 162 | self._set_grid_resolution(num_voxels) 163 | print('dvgo: scale_volume_grid scale world_size from', ori_world_size.tolist(), 'to', self.world_size.tolist()) 164 | 165 | self.density[0].scale_volume_grid(self.world_size) 166 | self.density[1].scale_volume_grid(self.world_size) 167 | self.k0[0].scale_volume_grid(self.world_size) 168 | self.k0[1].scale_volume_grid(self.world_size) 169 | 170 | if np.prod(list(self.world_size)) <= 256**3: 171 | self_grid_xyz = torch.stack(torch.meshgrid( 172 | torch.linspace(self.xyz_min[0], self.xyz_max[0], self.world_size[0]), 173 | torch.linspace(self.xyz_min[1], self.xyz_max[1], self.world_size[1]), 174 | torch.linspace(self.xyz_min[2], self.xyz_max[2], self.world_size[2]), 175 | ), -1) 176 | self_alpha = [ 177 | F.max_pool3d(self.activate_density(self.density[0].get_dense_grid()), kernel_size=3, padding=1, stride=1)[0,0], 178 | F.max_pool3d(self.activate_density(self.density[1].get_dense_grid()), kernel_size=3, padding=1, stride=1)[0,0], 179 | ] 180 | self.mask_cache = nn.ModuleList([ 181 | grid.MaskGrid( 182 | path=None, mask=(self_alpha[i]>self.fast_color_thres), 183 | xyz_min=self.xyz_min, xyz_max=self.xyz_max) 184 | for i in range(2) 185 | ]) 186 | 187 | print('dvgo: scale_volume_grid finish') 188 | 189 | @torch.no_grad() 190 | def update_occupancy_cache(self): 191 | cache_grid_xyz = torch.stack(torch.meshgrid( 192 | torch.linspace(self.xyz_min[0], self.xyz_max[0], self.mask_cache[0].mask.shape[0]), 193 | torch.linspace(self.xyz_min[1], self.xyz_max[1], self.mask_cache[0].mask.shape[1]), 194 | torch.linspace(self.xyz_min[2], self.xyz_max[2], self.mask_cache[0].mask.shape[2]), 195 | ), -1) 196 | for i in range(2): 197 | cache_grid_density = self.density[i](cache_grid_xyz)[None,None] 198 | cache_grid_alpha = self.activate_density(cache_grid_density) 199 | cache_grid_alpha = F.max_pool3d(cache_grid_alpha, kernel_size=3, padding=1, stride=1)[0,0] 200 | self.mask_cache[i].mask &= (cache_grid_alpha > self.fast_color_thres) 201 | 202 | def density_total_variation_add_grad(self, weight, dense_mode): 203 | w = weight * self.world_size.max() / 128 204 | self.density[0].total_variation_add_grad(w, w, w, dense_mode) 205 | self.density[1].total_variation_add_grad(w, w, w, dense_mode) 206 | 207 | def k0_total_variation_add_grad(self, weight, dense_mode): 208 | w = weight * self.world_size.max() / 128 209 | self.k0[0].total_variation_add_grad(w, w, w, dense_mode) 210 | self.k0[1].total_variation_add_grad(w, w, w, dense_mode) 211 | 212 | def activate_density(self, density, interval=None): 213 | interval = interval if interval is not None else self.voxel_size_ratio 214 | shape = density.shape 215 | return Raw2Alpha.apply(density.flatten(), self.act_shift, interval).reshape(shape) 216 | 217 | def sample_ray(self, ori_rays_o, ori_rays_d, stepsize, is_train=False, **render_kwargs): 218 | '''Sample query points on rays. 219 | All the output points are sorted from near to far. 220 | Input: 221 | rays_o, rayd_d: both in [N, 3] indicating ray configurations. 222 | near, far: the near and far distance of the rays. 223 | stepsize: the number of voxels of each sample step. 224 | Output: 225 | ray_pts: [M, 3] storing all the sampled points. 226 | ray_id: [M] the index of the ray of each point. 227 | step_id: [M] the i'th step on a ray of each point. 228 | ''' 229 | rays_o = (ori_rays_o - self.scene_center) / self.scene_radius 230 | rays_d = ori_rays_d / ori_rays_d.norm(dim=-1, keepdim=True) 231 | # sample query points in inter scene 232 | near = 0 233 | far = 2 * np.sqrt(3) 234 | stepdist = stepsize * self.voxel_size 235 | ray_pts, mask_outbbox, ray_id, step_id, N_steps, t_min, t_max = render_utils_cuda.sample_pts_on_rays( 236 | rays_o, rays_d, self.xyz_min, self.xyz_max, near, far, stepdist) 237 | mask_inbbox = ~mask_outbbox 238 | ray_pts = ray_pts[mask_inbbox] 239 | ray_id = ray_id[mask_inbbox] 240 | step_id = step_id[mask_inbbox] 241 | # sample query points in outer scene 242 | N_outer = int(np.sqrt(3) / stepdist.item() * (1-self.bg_preserve)) + 1 243 | ray_pts_outer = render_utils_cuda.sample_bg_pts_on_rays( 244 | rays_o, rays_d, t_max, self.bg_preserve, N_outer) 245 | return ray_pts, ray_id, step_id, ray_pts_outer 246 | 247 | def _forward(self, ray_pts, viewdirs, interval, N, 248 | mask_grid, density_grid, k0_grid, rgbnet=None, 249 | ray_id=None, step_id=None, prev_alphainv_last=None): 250 | # preprocess for bg queries 251 | if ray_id is None: 252 | # ray_pts is [N, M, 3] in bg query 253 | assert len(ray_pts.shape) == 3 254 | ray_id, step_id = create_full_step_id(ray_pts.shape[:2]) 255 | ray_pts = ray_pts.reshape(-1, 3) 256 | 257 | # skip ray which is already occluded by fg 258 | if prev_alphainv_last is not None: 259 | mask = (prev_alphainv_last > self.fast_color_thres) 260 | ray_id = ray_id.view(N,-1)[mask].view(-1) 261 | step_id = step_id.view(N,-1)[mask].view(-1) 262 | ray_pts = ray_pts.view(N,-1,3)[mask].view(-1,3) 263 | 264 | # skip known free space 265 | mask = mask_grid(ray_pts) 266 | ray_pts = ray_pts[mask] 267 | ray_id = ray_id[mask] 268 | step_id = step_id[mask] 269 | 270 | # query for alpha w/ post-activation 271 | density = density_grid(ray_pts) 272 | alpha = self.activate_density(density, interval) 273 | if self.fast_color_thres > 0: 274 | mask = (alpha > self.fast_color_thres) 275 | ray_pts = ray_pts[mask] 276 | ray_id = ray_id[mask] 277 | step_id = step_id[mask] 278 | density = density[mask] 279 | alpha = alpha[mask] 280 | 281 | # compute accumulated transmittance 282 | weights, alphainv_last = Alphas2Weights.apply(alpha, ray_id, N) 283 | if self.fast_color_thres > 0: 284 | mask = (weights > self.fast_color_thres) 285 | weights = weights[mask] 286 | alpha = alpha[mask] 287 | ray_pts = ray_pts[mask] 288 | ray_id = ray_id[mask] 289 | step_id = step_id[mask] 290 | 291 | # query for color 292 | k0 = k0_grid(ray_pts) 293 | if rgbnet is None: 294 | # no view-depend effect 295 | rgb = torch.sigmoid(k0) 296 | else: 297 | # view-dependent color emission 298 | k0_view = k0 299 | viewdirs_emb = (viewdirs.unsqueeze(-1) * self.viewfreq).flatten(-2) 300 | viewdirs_emb = torch.cat([viewdirs, viewdirs_emb.sin(), viewdirs_emb.cos()], -1) 301 | viewdirs_emb = viewdirs_emb.flatten(0,-2)[ray_id] 302 | rgb_feat = torch.cat([k0_view, viewdirs_emb], -1) 303 | rgb_logit = rgbnet(rgb_feat) 304 | rgb = torch.sigmoid(rgb_logit) 305 | 306 | return dict( 307 | rgb=rgb, alpha=alpha, weights=weights, alphainv_last=alphainv_last, 308 | ray_id=ray_id, step_id=step_id) 309 | 310 | def forward(self, rays_o, rays_d, viewdirs, global_step=None, **render_kwargs): 311 | '''Volume rendering 312 | @rays_o: [N, 3] the starting point of the N shooting rays. 313 | @rays_d: [N, 3] the shooting direction of the N rays. 314 | @viewdirs: [N, 3] viewing direction to compute positional embedding for MLP. 315 | ''' 316 | assert len(rays_o.shape)==2 and rays_o.shape[-1]==3, 'Only suuport point queries in [N, 3] format' 317 | 318 | ret_dict = {} 319 | N = len(rays_o) 320 | 321 | # sample points on rays 322 | ray_pts, ray_id, step_id, ray_pts_outer = self.sample_ray( 323 | ori_rays_o=rays_o, ori_rays_d=rays_d, is_train=global_step is not None, **render_kwargs) 324 | interval = render_kwargs['stepsize'] * self.voxel_size_ratio 325 | 326 | # query for foreground 327 | fg = self._forward( 328 | ray_pts=ray_pts, viewdirs=viewdirs, 329 | interval=interval, N=N, 330 | mask_grid=self.mask_cache[0], 331 | density_grid=self.density[0], 332 | k0_grid=self.k0[0], 333 | rgbnet=self.rgbnet[0], 334 | ray_id=ray_id, step_id=step_id) 335 | 336 | # query for background 337 | bg = self._forward( 338 | ray_pts=ray_pts_outer, viewdirs=viewdirs, 339 | interval=interval, N=N, 340 | mask_grid=self.mask_cache[1], 341 | density_grid=self.density[1], 342 | k0_grid=self.k0[1], 343 | rgbnet=self.rgbnet[1], 344 | prev_alphainv_last=fg['alphainv_last']) 345 | 346 | # Ray marching 347 | rgb_marched_fg = segment_coo( 348 | src=(fg['weights'].unsqueeze(-1) * fg['rgb']), 349 | index=fg['ray_id'], 350 | out=torch.zeros([N, 3]), 351 | reduce='sum') 352 | rgb_marched_bg = segment_coo( 353 | src=(bg['weights'].unsqueeze(-1) * bg['rgb']), 354 | index=bg['ray_id'], 355 | out=torch.zeros([N, 3]), 356 | reduce='sum') 357 | rgb_marched = rgb_marched_fg + \ 358 | fg['alphainv_last'].unsqueeze(-1) * rgb_marched_bg + \ 359 | (fg['alphainv_last'] * bg['alphainv_last']).unsqueeze(-1) * render_kwargs['bg'] 360 | ret_dict.update({ 361 | 'rgb_marched': rgb_marched, 362 | 'alphainv_last': torch.cat([fg['alphainv_last'], bg['alphainv_last']]), 363 | 'weights': torch.cat([fg['weights'], bg['weights']]), 364 | 'raw_alpha': torch.cat([fg['alpha'], bg['alpha']]), 365 | 'raw_rgb': torch.cat([fg['rgb'], bg['rgb']]), 366 | 'ray_id': torch.cat([fg['ray_id'], bg['ray_id']]), 367 | }) 368 | 369 | if render_kwargs.get('render_depth', False): 370 | # TODO: add bg 371 | with torch.no_grad(): 372 | depth_fg = segment_coo( 373 | src=(fg['weights'] * fg['step_id']), 374 | index=fg['ray_id'], 375 | out=torch.zeros([N]), 376 | reduce='sum') 377 | depth_bg = segment_coo( 378 | src=(bg['weights'] * bg['step_id']), 379 | index=bg['ray_id'], 380 | out=torch.zeros([N]), 381 | reduce='sum') 382 | depth_fg_last = segment_coo( 383 | src=fg['step_id'].float(), 384 | index=fg['ray_id'], 385 | out=torch.zeros([N]), 386 | reduce='max') 387 | depth_bg_last = segment_coo( 388 | src=bg['step_id'].float(), 389 | index=bg['ray_id'], 390 | out=depth_fg_last.clone(), 391 | reduce='max') 392 | depth = depth_fg + \ 393 | fg['alphainv_last'] * (1 + depth_fg_last + depth_bg) + \ 394 | fg['alphainv_last'] * bg['alphainv_last'] * (2 + depth_fg_last + depth_bg_last) 395 | ret_dict.update({'depth': depth}) 396 | 397 | return ret_dict 398 | 399 | -------------------------------------------------------------------------------- /lib/dmpigo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import functools 4 | import numpy as np 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch import Tensor 10 | from einops import rearrange 11 | from torch_scatter import scatter_add, segment_coo 12 | 13 | from . import grid 14 | from .dvgo import Raw2Alpha, Alphas2Weights, render_utils_cuda 15 | 16 | 17 | '''Model''' 18 | class DirectMPIGO(torch.nn.Module): 19 | def __init__(self, xyz_min, xyz_max, 20 | num_voxels=0, mpi_depth=0, 21 | mask_cache_path=None, mask_cache_thres=1e-3, mask_cache_world_size=None, 22 | fast_color_thres=0, 23 | density_type='DenseGrid', k0_type='DenseGrid', 24 | density_config={}, k0_config={}, 25 | rgbnet_dim=0, 26 | rgbnet_depth=3, rgbnet_width=128, 27 | viewbase_pe=0, 28 | **kwargs): 29 | super(DirectMPIGO, self).__init__() 30 | self.register_buffer('xyz_min', torch.Tensor(xyz_min)) 31 | self.register_buffer('xyz_max', torch.Tensor(xyz_max)) 32 | self.fast_color_thres = fast_color_thres 33 | 34 | # determine init grid resolution 35 | self._set_grid_resolution(num_voxels, mpi_depth) 36 | 37 | # init density voxel grid 38 | self.density_type = density_type 39 | self.density_config = density_config 40 | self.density = grid.create_grid( 41 | density_type, channels=1, world_size=self.world_size, 42 | xyz_min=self.xyz_min, xyz_max=self.xyz_max, 43 | config=self.density_config) 44 | 45 | # init density bias so that the initial contribution (the alpha values) 46 | # of each query points on a ray is equal 47 | self.act_shift = grid.DenseGrid( 48 | channels=1, world_size=[1,1,mpi_depth], 49 | xyz_min=xyz_min, xyz_max=xyz_max) 50 | self.act_shift.grid.requires_grad = False 51 | with torch.no_grad(): 52 | g = np.full([mpi_depth], 1./mpi_depth - 1e-6) 53 | p = [1-g[0]] 54 | for i in range(1, len(g)): 55 | p.append((1-g[:i+1].sum())/(1-g[:i].sum())) 56 | for i in range(len(p)): 57 | self.act_shift.grid[..., i].fill_(np.log(p[i] ** (-1/self.voxel_size_ratio) - 1)) 58 | 59 | # init color representation 60 | # feature voxel grid + shallow MLP (fine stage) 61 | self.rgbnet_kwargs = { 62 | 'rgbnet_dim': rgbnet_dim, 63 | 'rgbnet_depth': rgbnet_depth, 'rgbnet_width': rgbnet_width, 64 | 'viewbase_pe': viewbase_pe, 65 | } 66 | self.k0_type = k0_type 67 | self.k0_config = k0_config 68 | if rgbnet_dim <= 0: 69 | # color voxel grid (coarse stage) 70 | self.k0_dim = 3 71 | self.k0 = grid.create_grid( 72 | k0_type, channels=self.k0_dim, world_size=self.world_size, 73 | xyz_min=self.xyz_min, xyz_max=self.xyz_max, 74 | config=self.k0_config) 75 | self.rgbnet = None 76 | else: 77 | self.k0_dim = rgbnet_dim 78 | self.k0 = grid.create_grid( 79 | k0_type, channels=self.k0_dim, world_size=self.world_size, 80 | xyz_min=self.xyz_min, xyz_max=self.xyz_max, 81 | config=self.k0_config) 82 | self.register_buffer('viewfreq', torch.FloatTensor([(2**i) for i in range(viewbase_pe)])) 83 | dim0 = (3+3*viewbase_pe*2) + self.k0_dim 84 | self.rgbnet = nn.Sequential( 85 | nn.Linear(dim0, rgbnet_width), nn.ReLU(inplace=True), 86 | *[ 87 | nn.Sequential(nn.Linear(rgbnet_width, rgbnet_width), nn.ReLU(inplace=True)) 88 | for _ in range(rgbnet_depth-2) 89 | ], 90 | nn.Linear(rgbnet_width, 3), 91 | ) 92 | nn.init.constant_(self.rgbnet[-1].bias, 0) 93 | 94 | print('dmpigo: densitye grid', self.density) 95 | print('dmpigo: feature grid', self.k0) 96 | print('dmpigo: mlp', self.rgbnet) 97 | 98 | # Using the coarse geometry if provided (used to determine known free space and unknown space) 99 | # Re-implement as occupancy grid (2021/1/31) 100 | self.mask_cache_path = mask_cache_path 101 | self.mask_cache_thres = mask_cache_thres 102 | if mask_cache_world_size is None: 103 | mask_cache_world_size = self.world_size 104 | if mask_cache_path is not None and mask_cache_path: 105 | mask_cache = grid.MaskGrid( 106 | path=mask_cache_path, 107 | mask_cache_thres=mask_cache_thres).to(self.xyz_min.device) 108 | self_grid_xyz = torch.stack(torch.meshgrid( 109 | torch.linspace(self.xyz_min[0], self.xyz_max[0], mask_cache_world_size[0]), 110 | torch.linspace(self.xyz_min[1], self.xyz_max[1], mask_cache_world_size[1]), 111 | torch.linspace(self.xyz_min[2], self.xyz_max[2], mask_cache_world_size[2]), 112 | ), -1) 113 | mask = mask_cache(self_grid_xyz) 114 | else: 115 | mask = torch.ones(list(mask_cache_world_size), dtype=torch.bool) 116 | self.mask_cache = grid.MaskGrid( 117 | path=None, mask=mask, 118 | xyz_min=self.xyz_min, xyz_max=self.xyz_max) 119 | 120 | def _set_grid_resolution(self, num_voxels, mpi_depth): 121 | # Determine grid resolution 122 | self.num_voxels = num_voxels 123 | self.mpi_depth = mpi_depth 124 | r = (num_voxels / self.mpi_depth / (self.xyz_max - self.xyz_min)[:2].prod()).sqrt() 125 | self.world_size = torch.zeros(3, dtype=torch.long) 126 | self.world_size[:2] = (self.xyz_max - self.xyz_min)[:2] * r 127 | self.world_size[2] = self.mpi_depth 128 | self.voxel_size_ratio = 256. / mpi_depth 129 | print('dmpigo: world_size ', self.world_size) 130 | print('dmpigo: voxel_size_ratio', self.voxel_size_ratio) 131 | 132 | def get_kwargs(self): 133 | return { 134 | 'xyz_min': self.xyz_min.cpu().numpy(), 135 | 'xyz_max': self.xyz_max.cpu().numpy(), 136 | 'num_voxels': self.num_voxels, 137 | 'mpi_depth': self.mpi_depth, 138 | 'voxel_size_ratio': self.voxel_size_ratio, 139 | 'mask_cache_path': self.mask_cache_path, 140 | 'mask_cache_thres': self.mask_cache_thres, 141 | 'mask_cache_world_size': list(self.mask_cache.mask.shape), 142 | 'fast_color_thres': self.fast_color_thres, 143 | 'density_type': self.density_type, 144 | 'k0_type': self.k0_type, 145 | 'density_config': self.density_config, 146 | 'k0_config': self.k0_config, 147 | **self.rgbnet_kwargs, 148 | } 149 | 150 | @torch.no_grad() 151 | def scale_volume_grid(self, num_voxels, mpi_depth): 152 | print('dmpigo: scale_volume_grid start') 153 | ori_world_size = self.world_size 154 | self._set_grid_resolution(num_voxels, mpi_depth) 155 | print('dmpigo: scale_volume_grid scale world_size from', ori_world_size.tolist(), 'to', self.world_size.tolist()) 156 | 157 | self.density.scale_volume_grid(self.world_size) 158 | self.k0.scale_volume_grid(self.world_size) 159 | 160 | if np.prod(self.world_size.tolist()) <= 256**3: 161 | self_grid_xyz = torch.stack(torch.meshgrid( 162 | torch.linspace(self.xyz_min[0], self.xyz_max[0], self.world_size[0]), 163 | torch.linspace(self.xyz_min[1], self.xyz_max[1], self.world_size[1]), 164 | torch.linspace(self.xyz_min[2], self.xyz_max[2], self.world_size[2]), 165 | ), -1) 166 | dens = self.density.get_dense_grid() + self.act_shift.grid 167 | self_alpha = F.max_pool3d(self.activate_density(dens), kernel_size=3, padding=1, stride=1)[0,0] 168 | self.mask_cache = grid.MaskGrid( 169 | path=None, mask=self.mask_cache(self_grid_xyz) & (self_alpha>self.fast_color_thres), 170 | xyz_min=self.xyz_min, xyz_max=self.xyz_max) 171 | 172 | print('dmpigo: scale_volume_grid finish') 173 | 174 | @torch.no_grad() 175 | def update_occupancy_cache(self): 176 | ori_p = self.mask_cache.mask.float().mean().item() 177 | cache_grid_xyz = torch.stack(torch.meshgrid( 178 | torch.linspace(self.xyz_min[0], self.xyz_max[0], self.mask_cache.mask.shape[0]), 179 | torch.linspace(self.xyz_min[1], self.xyz_max[1], self.mask_cache.mask.shape[1]), 180 | torch.linspace(self.xyz_min[2], self.xyz_max[2], self.mask_cache.mask.shape[2]), 181 | ), -1) 182 | cache_grid_density = self.density(cache_grid_xyz)[None,None] 183 | cache_grid_alpha = self.activate_density(cache_grid_density) 184 | cache_grid_alpha = F.max_pool3d(cache_grid_alpha, kernel_size=3, padding=1, stride=1)[0,0] 185 | self.mask_cache.mask &= (cache_grid_alpha > self.fast_color_thres) 186 | new_p = self.mask_cache.mask.float().mean().item() 187 | print(f'dmpigo: update mask_cache {ori_p:.4f} => {new_p:.4f}') 188 | 189 | def update_occupancy_cache_lt_nviews(self, rays_o_tr, rays_d_tr, imsz, render_kwargs, maskout_lt_nviews): 190 | print('dmpigo: update mask_cache lt_nviews start') 191 | eps_time = time.time() 192 | count = torch.zeros_like(self.density.get_dense_grid()).long() 193 | device = count.device 194 | for rays_o_, rays_d_ in zip(rays_o_tr.split(imsz), rays_d_tr.split(imsz)): 195 | ones = grid.DenseGrid(1, self.world_size, self.xyz_min, self.xyz_max) 196 | for rays_o, rays_d in zip(rays_o_.split(8192), rays_d_.split(8192)): 197 | ray_pts, ray_id, step_id, N_samples = self.sample_ray( 198 | rays_o=rays_o.to(device), rays_d=rays_d.to(device), **render_kwargs) 199 | ones(ray_pts).sum().backward() 200 | count.data += (ones.grid.grad > 1) 201 | ori_p = self.mask_cache.mask.float().mean().item() 202 | self.mask_cache.mask &= (count >= maskout_lt_nviews)[0,0] 203 | new_p = self.mask_cache.mask.float().mean().item() 204 | print(f'dmpigo: update mask_cache {ori_p:.4f} => {new_p:.4f}') 205 | torch.cuda.empty_cache() 206 | eps_time = time.time() - eps_time 207 | print(f'dmpigo: update mask_cache lt_nviews finish (eps time:', eps_time, 'sec)') 208 | 209 | def density_total_variation_add_grad(self, weight, dense_mode): 210 | wxy = weight * self.world_size[:2].max() / 128 211 | wz = weight * self.mpi_depth / 128 212 | self.density.total_variation_add_grad(wxy, wxy, wz, dense_mode) 213 | 214 | def k0_total_variation_add_grad(self, weight, dense_mode): 215 | wxy = weight * self.world_size[:2].max() / 128 216 | wz = weight * self.mpi_depth / 128 217 | self.k0.total_variation_add_grad(wxy, wxy, wz, dense_mode) 218 | 219 | def activate_density(self, density, interval=None): 220 | interval = interval if interval is not None else self.voxel_size_ratio 221 | shape = density.shape 222 | return Raw2Alpha.apply(density.flatten(), 0, interval).reshape(shape) 223 | 224 | def sample_ray(self, rays_o, rays_d, near, far, stepsize, **render_kwargs): 225 | '''Sample query points on rays. 226 | All the output points are sorted from near to far. 227 | Input: 228 | rays_o, rayd_d: both in [N, 3] indicating ray configurations. 229 | near, far: the near and far distance of the rays. 230 | stepsize: the number of voxels of each sample step. 231 | Output: 232 | ray_pts: [M, 3] storing all the sampled points. 233 | ray_id: [M] the index of the ray of each point. 234 | step_id: [M] the i'th step on a ray of each point. 235 | ''' 236 | assert near==0 and far==1 237 | rays_o = rays_o.contiguous() 238 | rays_d = rays_d.contiguous() 239 | N_samples = int((self.mpi_depth-1)/stepsize) + 1 240 | ray_pts, mask_outbbox = render_utils_cuda.sample_ndc_pts_on_rays( 241 | rays_o, rays_d, self.xyz_min, self.xyz_max, N_samples) 242 | mask_inbbox = ~mask_outbbox 243 | ray_pts = ray_pts[mask_inbbox] 244 | if mask_inbbox.all(): 245 | ray_id, step_id = create_full_step_id(mask_inbbox.shape) 246 | else: 247 | ray_id = torch.arange(mask_inbbox.shape[0]).view(-1,1).expand_as(mask_inbbox)[mask_inbbox] 248 | step_id = torch.arange(mask_inbbox.shape[1]).view(1,-1).expand_as(mask_inbbox)[mask_inbbox] 249 | return ray_pts, ray_id, step_id, N_samples 250 | 251 | def forward(self, rays_o, rays_d, viewdirs, global_step=None, **render_kwargs): 252 | '''Volume rendering 253 | @rays_o: [N, 3] the starting point of the N shooting rays. 254 | @rays_d: [N, 3] the shooting direction of the N rays. 255 | @viewdirs: [N, 3] viewing direction to compute positional embedding for MLP. 256 | ''' 257 | assert len(rays_o.shape)==2 and rays_o.shape[-1]==3, 'Only suuport point queries in [N, 3] format' 258 | 259 | ret_dict = {} 260 | N = len(rays_o) 261 | 262 | # sample points on rays 263 | ray_pts, ray_id, step_id, N_samples = self.sample_ray( 264 | rays_o=rays_o, rays_d=rays_d, **render_kwargs) 265 | interval = render_kwargs['stepsize'] * self.voxel_size_ratio 266 | 267 | # skip known free space 268 | if self.mask_cache is not None: 269 | mask = self.mask_cache(ray_pts) 270 | ray_pts = ray_pts[mask] 271 | ray_id = ray_id[mask] 272 | step_id = step_id[mask] 273 | 274 | # query for alpha w/ post-activation 275 | density = self.density(ray_pts) + self.act_shift(ray_pts) 276 | alpha = self.activate_density(density, interval) 277 | if self.fast_color_thres > 0: 278 | mask = (alpha > self.fast_color_thres) 279 | ray_pts = ray_pts[mask] 280 | ray_id = ray_id[mask] 281 | step_id = step_id[mask] 282 | alpha = alpha[mask] 283 | 284 | # compute accumulated transmittance 285 | weights, alphainv_last = Alphas2Weights.apply(alpha, ray_id, N) 286 | if self.fast_color_thres > 0: 287 | mask = (weights > self.fast_color_thres) 288 | ray_pts = ray_pts[mask] 289 | ray_id = ray_id[mask] 290 | step_id = step_id[mask] 291 | alpha = alpha[mask] 292 | weights = weights[mask] 293 | 294 | # query for color 295 | vox_emb = self.k0(ray_pts) 296 | 297 | if self.rgbnet is None: 298 | # no view-depend effect 299 | rgb = torch.sigmoid(vox_emb) 300 | else: 301 | # view-dependent color emission 302 | viewdirs_emb = (viewdirs.unsqueeze(-1) * self.viewfreq).flatten(-2) 303 | viewdirs_emb = torch.cat([viewdirs, viewdirs_emb.sin(), viewdirs_emb.cos()], -1) 304 | viewdirs_emb = viewdirs_emb[ray_id] 305 | rgb_feat = torch.cat([vox_emb, viewdirs_emb], -1) 306 | rgb_logit = self.rgbnet(rgb_feat) 307 | rgb = torch.sigmoid(rgb_logit) 308 | 309 | # Ray marching 310 | rgb_marched = segment_coo( 311 | src=(weights.unsqueeze(-1) * rgb), 312 | index=ray_id, 313 | out=torch.zeros([N, 3]), 314 | reduce='sum') 315 | if render_kwargs.get('rand_bkgd', False) and global_step is not None: 316 | rgb_marched += (alphainv_last.unsqueeze(-1) * torch.rand_like(rgb_marched)) 317 | else: 318 | rgb_marched += (alphainv_last.unsqueeze(-1) * render_kwargs['bg']) 319 | s = (step_id+0.5) / N_samples 320 | ret_dict.update({ 321 | 'alphainv_last': alphainv_last, 322 | 'weights': weights, 323 | 'rgb_marched': rgb_marched, 324 | 'raw_alpha': alpha, 325 | 'raw_rgb': rgb, 326 | 'ray_id': ray_id, 327 | 'n_max': N_samples, 328 | 's': s, 329 | }) 330 | 331 | if render_kwargs.get('render_depth', False): 332 | with torch.no_grad(): 333 | depth = segment_coo( 334 | src=(weights * s), 335 | index=ray_id, 336 | out=torch.zeros([N]), 337 | reduce='sum') 338 | ret_dict.update({'depth': depth}) 339 | 340 | return ret_dict 341 | 342 | 343 | @functools.lru_cache(maxsize=128) 344 | def create_full_step_id(shape): 345 | ray_id = torch.arange(shape[0]).view(-1,1).expand(shape).flatten() 346 | step_id = torch.arange(shape[1]).view(1,-1).expand(shape).flatten() 347 | return ray_id, step_id 348 | 349 | -------------------------------------------------------------------------------- /lib/grid.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import functools 4 | import numpy as np 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | from torch.utils.cpp_extension import load 11 | parent_dir = os.path.dirname(os.path.abspath(__file__)) 12 | render_utils_cuda = load( 13 | name='render_utils_cuda', 14 | sources=[ 15 | os.path.join(parent_dir, path) 16 | for path in ['cuda/render_utils.cpp', 'cuda/render_utils_kernel.cu']], 17 | verbose=True) 18 | 19 | total_variation_cuda = load( 20 | name='total_variation_cuda', 21 | sources=[ 22 | os.path.join(parent_dir, path) 23 | for path in ['cuda/total_variation.cpp', 'cuda/total_variation_kernel.cu']], 24 | verbose=True) 25 | 26 | 27 | def create_grid(type, **kwargs): 28 | if type == 'DenseGrid': 29 | return DenseGrid(**kwargs) 30 | elif type == 'TensoRFGrid': 31 | return TensoRFGrid(**kwargs) 32 | else: 33 | raise NotImplementedError 34 | 35 | 36 | ''' Dense 3D grid 37 | ''' 38 | class DenseGrid(nn.Module): 39 | def __init__(self, channels, world_size, xyz_min, xyz_max, **kwargs): 40 | super(DenseGrid, self).__init__() 41 | self.channels = channels 42 | self.world_size = world_size 43 | self.register_buffer('xyz_min', torch.Tensor(xyz_min)) 44 | self.register_buffer('xyz_max', torch.Tensor(xyz_max)) 45 | self.grid = nn.Parameter(torch.zeros([1, channels, *world_size])) 46 | 47 | def forward(self, xyz): 48 | ''' 49 | xyz: global coordinates to query 50 | ''' 51 | shape = xyz.shape[:-1] 52 | xyz = xyz.reshape(1,1,1,-1,3) 53 | ind_norm = ((xyz - self.xyz_min) / (self.xyz_max - self.xyz_min)).flip((-1,)) * 2 - 1 54 | out = F.grid_sample(self.grid, ind_norm, mode='bilinear', align_corners=True) 55 | out = out.reshape(self.channels,-1).T.reshape(*shape,self.channels) 56 | if self.channels == 1: 57 | out = out.squeeze(-1) 58 | return out 59 | 60 | def scale_volume_grid(self, new_world_size): 61 | if self.channels == 0: 62 | self.grid = nn.Parameter(torch.zeros([1, self.channels, *new_world_size])) 63 | else: 64 | self.grid = nn.Parameter( 65 | F.interpolate(self.grid.data, size=tuple(new_world_size), mode='trilinear', align_corners=True)) 66 | 67 | def total_variation_add_grad(self, wx, wy, wz, dense_mode): 68 | '''Add gradients by total variation loss in-place''' 69 | total_variation_cuda.total_variation_add_grad( 70 | self.grid, self.grid.grad, wx, wy, wz, dense_mode) 71 | 72 | def get_dense_grid(self): 73 | return self.grid 74 | 75 | @torch.no_grad() 76 | def __isub__(self, val): 77 | self.grid.data -= val 78 | return self 79 | 80 | def extra_repr(self): 81 | return f'channels={self.channels}, world_size={self.world_size.tolist()}' 82 | 83 | 84 | ''' Vector-Matrix decomposited grid 85 | See TensoRF: Tensorial Radiance Fields (https://arxiv.org/abs/2203.09517) 86 | ''' 87 | class TensoRFGrid(nn.Module): 88 | def __init__(self, channels, world_size, xyz_min, xyz_max, config): 89 | super(TensoRFGrid, self).__init__() 90 | self.channels = channels 91 | self.world_size = world_size 92 | self.config = config 93 | self.register_buffer('xyz_min', torch.Tensor(xyz_min)) 94 | self.register_buffer('xyz_max', torch.Tensor(xyz_max)) 95 | X, Y, Z = world_size 96 | R = config['n_comp'] 97 | Rxy = config.get('n_comp_xy', R) 98 | self.xy_plane = nn.Parameter(torch.randn([1, Rxy, X, Y]) * 0.1) 99 | self.xz_plane = nn.Parameter(torch.randn([1, R, X, Z]) * 0.1) 100 | self.yz_plane = nn.Parameter(torch.randn([1, R, Y, Z]) * 0.1) 101 | self.x_vec = nn.Parameter(torch.randn([1, R, X, 1]) * 0.1) 102 | self.y_vec = nn.Parameter(torch.randn([1, R, Y, 1]) * 0.1) 103 | self.z_vec = nn.Parameter(torch.randn([1, Rxy, Z, 1]) * 0.1) 104 | if self.channels > 1: 105 | self.f_vec = nn.Parameter(torch.ones([R+R+Rxy, channels])) 106 | nn.init.kaiming_uniform_(self.f_vec, a=np.sqrt(5)) 107 | 108 | def forward(self, xyz): 109 | ''' 110 | xyz: global coordinates to query 111 | ''' 112 | shape = xyz.shape[:-1] 113 | xyz = xyz.reshape(1,1,-1,3) 114 | ind_norm = (xyz - self.xyz_min) / (self.xyz_max - self.xyz_min) * 2 - 1 115 | ind_norm = torch.cat([ind_norm, torch.zeros_like(ind_norm[...,[0]])], dim=-1) 116 | if self.channels > 1: 117 | out = compute_tensorf_feat( 118 | self.xy_plane, self.xz_plane, self.yz_plane, 119 | self.x_vec, self.y_vec, self.z_vec, self.f_vec, ind_norm) 120 | out = out.reshape(*shape,self.channels) 121 | else: 122 | out = compute_tensorf_val( 123 | self.xy_plane, self.xz_plane, self.yz_plane, 124 | self.x_vec, self.y_vec, self.z_vec, ind_norm) 125 | out = out.reshape(*shape) 126 | return out 127 | 128 | def scale_volume_grid(self, new_world_size): 129 | if self.channels == 0: 130 | return 131 | X, Y, Z = new_world_size 132 | self.xy_plane = nn.Parameter(F.interpolate(self.xy_plane.data, size=[X,Y], mode='bilinear', align_corners=True)) 133 | self.xz_plane = nn.Parameter(F.interpolate(self.xz_plane.data, size=[X,Z], mode='bilinear', align_corners=True)) 134 | self.yz_plane = nn.Parameter(F.interpolate(self.yz_plane.data, size=[Y,Z], mode='bilinear', align_corners=True)) 135 | self.x_vec = nn.Parameter(F.interpolate(self.x_vec.data, size=[X,1], mode='bilinear', align_corners=True)) 136 | self.y_vec = nn.Parameter(F.interpolate(self.y_vec.data, size=[Y,1], mode='bilinear', align_corners=True)) 137 | self.z_vec = nn.Parameter(F.interpolate(self.z_vec.data, size=[Z,1], mode='bilinear', align_corners=True)) 138 | 139 | def total_variation_add_grad(self, wx, wy, wz, dense_mode): 140 | '''Add gradients by total variation loss in-place''' 141 | loss = wx * F.smooth_l1_loss(self.xy_plane[:,:,1:], self.xy_plane[:,:,:-1], reduction='sum') +\ 142 | wy * F.smooth_l1_loss(self.xy_plane[:,:,:,1:], self.xy_plane[:,:,:,:-1], reduction='sum') +\ 143 | wx * F.smooth_l1_loss(self.xz_plane[:,:,1:], self.xz_plane[:,:,:-1], reduction='sum') +\ 144 | wz * F.smooth_l1_loss(self.xz_plane[:,:,:,1:], self.xz_plane[:,:,:,:-1], reduction='sum') +\ 145 | wy * F.smooth_l1_loss(self.yz_plane[:,:,1:], self.yz_plane[:,:,:-1], reduction='sum') +\ 146 | wz * F.smooth_l1_loss(self.yz_plane[:,:,:,1:], self.yz_plane[:,:,:,:-1], reduction='sum') +\ 147 | wx * F.smooth_l1_loss(self.x_vec[:,:,1:], self.x_vec[:,:,:-1], reduction='sum') +\ 148 | wy * F.smooth_l1_loss(self.y_vec[:,:,1:], self.y_vec[:,:,:-1], reduction='sum') +\ 149 | wz * F.smooth_l1_loss(self.z_vec[:,:,1:], self.z_vec[:,:,:-1], reduction='sum') 150 | loss /= 6 151 | loss.backward() 152 | 153 | def get_dense_grid(self): 154 | if self.channels > 1: 155 | feat = torch.cat([ 156 | torch.einsum('rxy,rz->rxyz', self.xy_plane[0], self.z_vec[0,:,:,0]), 157 | torch.einsum('rxz,ry->rxyz', self.xz_plane[0], self.y_vec[0,:,:,0]), 158 | torch.einsum('ryz,rx->rxyz', self.yz_plane[0], self.x_vec[0,:,:,0]), 159 | ]) 160 | grid = torch.einsum('rxyz,rc->cxyz', feat, self.f_vec)[None] 161 | else: 162 | grid = torch.einsum('rxy,rz->xyz', self.xy_plane[0], self.z_vec[0,:,:,0]) + \ 163 | torch.einsum('rxz,ry->xyz', self.xz_plane[0], self.y_vec[0,:,:,0]) + \ 164 | torch.einsum('ryz,rx->xyz', self.yz_plane[0], self.x_vec[0,:,:,0]) 165 | grid = grid[None,None] 166 | return grid 167 | 168 | def extra_repr(self): 169 | return f'channels={self.channels}, world_size={self.world_size.tolist()}, n_comp={self.config["n_comp"]}' 170 | 171 | def compute_tensorf_feat(xy_plane, xz_plane, yz_plane, x_vec, y_vec, z_vec, f_vec, ind_norm): 172 | # Interp feature (feat shape: [n_pts, n_comp]) 173 | xy_feat = F.grid_sample(xy_plane, ind_norm[:,:,:,[1,0]], mode='bilinear', align_corners=True).flatten(0,2).T 174 | xz_feat = F.grid_sample(xz_plane, ind_norm[:,:,:,[2,0]], mode='bilinear', align_corners=True).flatten(0,2).T 175 | yz_feat = F.grid_sample(yz_plane, ind_norm[:,:,:,[2,1]], mode='bilinear', align_corners=True).flatten(0,2).T 176 | x_feat = F.grid_sample(x_vec, ind_norm[:,:,:,[3,0]], mode='bilinear', align_corners=True).flatten(0,2).T 177 | y_feat = F.grid_sample(y_vec, ind_norm[:,:,:,[3,1]], mode='bilinear', align_corners=True).flatten(0,2).T 178 | z_feat = F.grid_sample(z_vec, ind_norm[:,:,:,[3,2]], mode='bilinear', align_corners=True).flatten(0,2).T 179 | # Aggregate components 180 | feat = torch.cat([ 181 | xy_feat * z_feat, 182 | xz_feat * y_feat, 183 | yz_feat * x_feat, 184 | ], dim=-1) 185 | feat = torch.mm(feat, f_vec) 186 | return feat 187 | 188 | def compute_tensorf_val(xy_plane, xz_plane, yz_plane, x_vec, y_vec, z_vec, ind_norm): 189 | # Interp feature (feat shape: [n_pts, n_comp]) 190 | xy_feat = F.grid_sample(xy_plane, ind_norm[:,:,:,[1,0]], mode='bilinear', align_corners=True).flatten(0,2).T 191 | xz_feat = F.grid_sample(xz_plane, ind_norm[:,:,:,[2,0]], mode='bilinear', align_corners=True).flatten(0,2).T 192 | yz_feat = F.grid_sample(yz_plane, ind_norm[:,:,:,[2,1]], mode='bilinear', align_corners=True).flatten(0,2).T 193 | x_feat = F.grid_sample(x_vec, ind_norm[:,:,:,[3,0]], mode='bilinear', align_corners=True).flatten(0,2).T 194 | y_feat = F.grid_sample(y_vec, ind_norm[:,:,:,[3,1]], mode='bilinear', align_corners=True).flatten(0,2).T 195 | z_feat = F.grid_sample(z_vec, ind_norm[:,:,:,[3,2]], mode='bilinear', align_corners=True).flatten(0,2).T 196 | # Aggregate components 197 | feat = (xy_feat * z_feat).sum(-1) + (xz_feat * y_feat).sum(-1) + (yz_feat * x_feat).sum(-1) 198 | return feat 199 | 200 | 201 | ''' Mask grid 202 | It supports query for the known free space and unknown space. 203 | ''' 204 | class MaskGrid(nn.Module): 205 | def __init__(self, path=None, mask_cache_thres=None, mask=None, xyz_min=None, xyz_max=None): 206 | super(MaskGrid, self).__init__() 207 | if path is not None: 208 | st = torch.load(path) 209 | self.mask_cache_thres = mask_cache_thres 210 | density = F.max_pool3d(st['model_state_dict']['density.grid'], kernel_size=3, padding=1, stride=1) 211 | alpha = 1 - torch.exp(-F.softplus(density + st['model_state_dict']['act_shift']) * st['model_kwargs']['voxel_size_ratio']) 212 | mask = (alpha >= self.mask_cache_thres).squeeze(0).squeeze(0) 213 | xyz_min = torch.Tensor(st['model_kwargs']['xyz_min']) 214 | xyz_max = torch.Tensor(st['model_kwargs']['xyz_max']) 215 | else: 216 | mask = mask.bool() 217 | xyz_min = torch.Tensor(xyz_min) 218 | xyz_max = torch.Tensor(xyz_max) 219 | 220 | self.register_buffer('mask', mask) 221 | xyz_len = xyz_max - xyz_min 222 | self.register_buffer('xyz2ijk_scale', (torch.Tensor(list(mask.shape)) - 1) / xyz_len) 223 | self.register_buffer('xyz2ijk_shift', -xyz_min * self.xyz2ijk_scale) 224 | 225 | @torch.no_grad() 226 | def forward(self, xyz): 227 | '''Skip know freespace 228 | @xyz: [..., 3] the xyz in global coordinate. 229 | ''' 230 | shape = xyz.shape[:-1] 231 | xyz = xyz.reshape(-1, 3) 232 | mask = render_utils_cuda.maskcache_lookup(self.mask, xyz, self.xyz2ijk_scale, self.xyz2ijk_shift) 233 | mask = mask.reshape(shape) 234 | return mask 235 | 236 | def extra_repr(self): 237 | return f'mask.shape=list(self.mask.shape)' 238 | 239 | -------------------------------------------------------------------------------- /lib/load_blendedmvs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import torch 4 | import numpy as np 5 | import imageio 6 | import json 7 | import torch.nn.functional as F 8 | import cv2 9 | 10 | 11 | def load_blendedmvs_data(basedir): 12 | pose_paths = sorted(glob.glob(os.path.join(basedir, 'pose', '*txt'))) 13 | rgb_paths = sorted(glob.glob(os.path.join(basedir, 'rgb', '*png'))) 14 | 15 | all_poses = [] 16 | all_imgs = [] 17 | i_split = [[], []] 18 | for i, (pose_path, rgb_path) in enumerate(zip(pose_paths, rgb_paths)): 19 | i_set = int(os.path.split(rgb_path)[-1][0]) 20 | all_imgs.append((imageio.imread(rgb_path) / 255.).astype(np.float32)) 21 | all_poses.append(np.loadtxt(pose_path).astype(np.float32)) 22 | i_split[i_set].append(i) 23 | 24 | imgs = np.stack(all_imgs, 0) 25 | poses = np.stack(all_poses, 0) 26 | i_split.append(i_split[-1]) 27 | 28 | path_intrinsics = os.path.join(basedir, 'intrinsics.txt') 29 | H, W = imgs[0].shape[:2] 30 | K = np.loadtxt(path_intrinsics) 31 | focal = float(K[0,0]) 32 | 33 | render_poses = torch.Tensor(np.loadtxt(os.path.join(basedir, 'test_traj.txt')).reshape(-1,4,4).astype(np.float32)) 34 | 35 | return imgs, poses, render_poses, [H, W, focal], K, i_split 36 | 37 | -------------------------------------------------------------------------------- /lib/load_blender.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import imageio 5 | import json 6 | import torch.nn.functional as F 7 | import cv2 8 | 9 | 10 | trans_t = lambda t : torch.Tensor([ 11 | [1,0,0,0], 12 | [0,1,0,0], 13 | [0,0,1,t], 14 | [0,0,0,1]]).float() 15 | 16 | rot_phi = lambda phi : torch.Tensor([ 17 | [1,0,0,0], 18 | [0,np.cos(phi),-np.sin(phi),0], 19 | [0,np.sin(phi), np.cos(phi),0], 20 | [0,0,0,1]]).float() 21 | 22 | rot_theta = lambda th : torch.Tensor([ 23 | [np.cos(th),0,-np.sin(th),0], 24 | [0,1,0,0], 25 | [np.sin(th),0, np.cos(th),0], 26 | [0,0,0,1]]).float() 27 | 28 | 29 | def pose_spherical(theta, phi, radius): 30 | c2w = trans_t(radius) 31 | c2w = rot_phi(phi/180.*np.pi) @ c2w 32 | c2w = rot_theta(theta/180.*np.pi) @ c2w 33 | c2w = torch.Tensor(np.array([[-1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]])) @ c2w 34 | return c2w 35 | 36 | 37 | def load_blender_data(basedir, half_res=False, testskip=1): 38 | splits = ['train', 'val', 'test'] 39 | metas = {} 40 | for s in splits: 41 | with open(os.path.join(basedir, 'transforms_{}.json'.format(s)), 'r') as fp: 42 | metas[s] = json.load(fp) 43 | 44 | all_imgs = [] 45 | all_poses = [] 46 | counts = [0] 47 | for s in splits: 48 | meta = metas[s] 49 | imgs = [] 50 | poses = [] 51 | if s=='train' or testskip==0: 52 | skip = 1 53 | else: 54 | skip = testskip 55 | 56 | for frame in meta['frames'][::skip]: 57 | fname = os.path.join(basedir, frame['file_path'] + '.png') 58 | imgs.append(imageio.imread(fname)) 59 | poses.append(np.array(frame['transform_matrix'])) 60 | imgs = (np.array(imgs) / 255.).astype(np.float32) # keep all 4 channels (RGBA) 61 | poses = np.array(poses).astype(np.float32) 62 | counts.append(counts[-1] + imgs.shape[0]) 63 | all_imgs.append(imgs) 64 | all_poses.append(poses) 65 | 66 | i_split = [np.arange(counts[i], counts[i+1]) for i in range(3)] 67 | 68 | imgs = np.concatenate(all_imgs, 0) 69 | poses = np.concatenate(all_poses, 0) 70 | 71 | H, W = imgs[0].shape[:2] 72 | camera_angle_x = float(meta['camera_angle_x']) 73 | focal = .5 * W / np.tan(.5 * camera_angle_x) 74 | 75 | render_poses = torch.stack([pose_spherical(angle, -30.0, 4.0) for angle in np.linspace(-180,180,160+1)[:-1]], 0) 76 | 77 | if half_res: 78 | H = H//2 79 | W = W//2 80 | focal = focal/2. 81 | 82 | imgs_half_res = np.zeros((imgs.shape[0], H, W, 4)) 83 | for i, img in enumerate(imgs): 84 | imgs_half_res[i] = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA) 85 | imgs = imgs_half_res 86 | # imgs = tf.image.resize_area(imgs, [400, 400]).numpy() 87 | 88 | return imgs, poses, render_poses, [H, W, focal], i_split 89 | 90 | 91 | -------------------------------------------------------------------------------- /lib/load_co3d.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import gzip 4 | import glob 5 | import torch 6 | import numpy as np 7 | import imageio 8 | import torch.nn.functional as F 9 | import cv2 10 | 11 | 12 | def load_co3d_data(cfg): 13 | 14 | # load meta 15 | with gzip.open(cfg.annot_path, 'rt', encoding='utf8') as zipfile: 16 | annot = [v for v in json.load(zipfile) if v['sequence_name'] == cfg.sequence_name] 17 | with open(cfg.split_path) as f: 18 | split = json.load(f) 19 | train_im_path = set() 20 | test_im_path = set() 21 | for k, lst in split.items(): 22 | for v in lst: 23 | if v[0] == cfg.sequence_name: 24 | if 'known' in k: 25 | train_im_path.add(v[-1]) 26 | else: 27 | test_im_path.add(v[-1]) 28 | assert len(annot) == len(train_im_path) + len(test_im_path), 'Mismatch: '\ 29 | f'{len(annot)} == {len(train_im_path) + len(test_im_path)}' 30 | 31 | # load datas 32 | imgs = [] 33 | masks = [] 34 | poses = [] 35 | Ks = [] 36 | i_split = [[], []] 37 | remove_empty_masks_cnt = [0, 0] 38 | for i, meta in enumerate(annot): 39 | im_fname = meta['image']['path'] 40 | assert im_fname in train_im_path or im_fname in test_im_path 41 | sid = 0 if im_fname in train_im_path else 1 42 | if meta['mask']['mass'] == 0: 43 | remove_empty_masks_cnt[sid] += 1 44 | continue 45 | im_path = os.path.join(cfg.datadir, im_fname) 46 | mask_path = os.path.join(cfg.datadir, meta['mask']['path']) 47 | mask = imageio.imread(mask_path) / 255. 48 | if mask.max() < 0.5: 49 | remove_empty_masks_cnt[sid] += 1 50 | continue 51 | Rt = np.concatenate([meta['viewpoint']['R'], np.array(meta['viewpoint']['T'])[:,None]], 1) 52 | pose = np.linalg.inv(np.concatenate([Rt, [[0,0,0,1]]])) 53 | imgs.append(imageio.imread(im_path) / 255.) 54 | masks.append(mask) 55 | poses.append(pose) 56 | assert imgs[-1].shape[:2] == tuple(meta['image']['size']) 57 | half_image_size_wh = np.float32(meta['image']['size'][::-1]) * 0.5 58 | principal_point = np.float32(meta['viewpoint']['principal_point']) 59 | focal_length = np.float32(meta['viewpoint']['focal_length']) 60 | principal_point_px = -1.0 * (principal_point - 1.0) * half_image_size_wh 61 | focal_length_px = focal_length * half_image_size_wh 62 | Ks.append(np.array([ 63 | [focal_length_px[0], 0, principal_point_px[0]], 64 | [0, focal_length_px[1], principal_point_px[1]], 65 | [0, 0, 1], 66 | ])) 67 | i_split[sid].append(len(imgs)-1) 68 | 69 | if sum(remove_empty_masks_cnt) > 0: 70 | print('load_co3d_data: removed %d train / %d test due to empty mask' % tuple(remove_empty_masks_cnt)) 71 | print(f'load_co3d_data: num images {len(i_split[0])} train / {len(i_split[1])} test') 72 | 73 | imgs = np.array(imgs) 74 | masks = np.array(masks) 75 | poses = np.stack(poses, 0) 76 | Ks = np.stack(Ks, 0) 77 | render_poses = poses[i_split[-1]] 78 | i_split.append(i_split[-1]) 79 | 80 | # visyalization hwf 81 | H, W = np.array([im.shape[:2] for im in imgs]).mean(0).astype(int) 82 | focal = Ks[:,[0,1],[0,1]].mean() 83 | 84 | return imgs, masks, poses, render_poses, [H, W, focal], Ks, i_split 85 | 86 | -------------------------------------------------------------------------------- /lib/load_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .load_llff import load_llff_data 4 | from .load_blender import load_blender_data 5 | from .load_nsvf import load_nsvf_data 6 | from .load_blendedmvs import load_blendedmvs_data 7 | from .load_tankstemple import load_tankstemple_data 8 | from .load_deepvoxels import load_dv_data 9 | from .load_co3d import load_co3d_data 10 | from .load_nerfpp import load_nerfpp_data 11 | 12 | 13 | def load_data(args): 14 | 15 | K, depths = None, None 16 | near_clip = None 17 | 18 | if args.dataset_type == 'llff': 19 | images, depths, poses, bds, render_poses, i_test = load_llff_data( 20 | args.datadir, args.factor, args.width, args.height, 21 | recenter=True, bd_factor=args.bd_factor, 22 | spherify=args.spherify, 23 | load_depths=args.load_depths, 24 | movie_render_kwargs=args.movie_render_kwargs) 25 | hwf = poses[0,:3,-1] 26 | poses = poses[:,:3,:4] 27 | print('Loaded llff', images.shape, render_poses.shape, hwf, args.datadir) 28 | if not isinstance(i_test, list): 29 | i_test = [i_test] 30 | 31 | if args.llffhold > 0: 32 | print('Auto LLFF holdout,', args.llffhold) 33 | i_test = np.arange(images.shape[0])[::args.llffhold] 34 | 35 | i_val = i_test 36 | i_train = np.array([i for i in np.arange(int(images.shape[0])) if 37 | (i not in i_test and i not in i_val)]) 38 | 39 | print('DEFINING BOUNDS') 40 | if args.ndc: 41 | near = 0. 42 | far = 1. 43 | else: 44 | near_clip = max(np.ndarray.min(bds) * .9, 0) 45 | _far = max(np.ndarray.max(bds) * 1., 0) 46 | near = 0 47 | far = inward_nearfar_heuristic(poses[i_train, :3, 3])[1] 48 | print('near_clip', near_clip) 49 | print('original far', _far) 50 | print('NEAR FAR', near, far) 51 | 52 | elif args.dataset_type == 'blender': 53 | images, poses, render_poses, hwf, i_split = load_blender_data(args.datadir, args.half_res, args.testskip) 54 | print('Loaded blender', images.shape, render_poses.shape, hwf, args.datadir) 55 | i_train, i_val, i_test = i_split 56 | 57 | near, far = 2., 6. 58 | 59 | if images.shape[-1] == 4: 60 | if args.white_bkgd: 61 | images = images[...,:3]*images[...,-1:] + (1.-images[...,-1:]) 62 | else: 63 | images = images[...,:3]*images[...,-1:] 64 | 65 | elif args.dataset_type == 'blendedmvs': 66 | images, poses, render_poses, hwf, K, i_split = load_blendedmvs_data(args.datadir) 67 | print('Loaded blendedmvs', images.shape, render_poses.shape, hwf, args.datadir) 68 | i_train, i_val, i_test = i_split 69 | 70 | near, far = inward_nearfar_heuristic(poses[i_train, :3, 3]) 71 | 72 | assert images.shape[-1] == 3 73 | 74 | elif args.dataset_type == 'tankstemple': 75 | images, poses, render_poses, hwf, K, i_split = load_tankstemple_data( 76 | args.datadir, movie_render_kwargs=args.movie_render_kwargs) 77 | print('Loaded tankstemple', images.shape, render_poses.shape, hwf, args.datadir) 78 | i_train, i_val, i_test = i_split 79 | 80 | near, far = inward_nearfar_heuristic(poses[i_train, :3, 3], ratio=0) 81 | 82 | if images.shape[-1] == 4: 83 | if args.white_bkgd: 84 | images = images[...,:3]*images[...,-1:] + (1.-images[...,-1:]) 85 | else: 86 | images = images[...,:3]*images[...,-1:] 87 | 88 | elif args.dataset_type == 'nsvf': 89 | images, poses, render_poses, hwf, i_split = load_nsvf_data(args.datadir) 90 | print('Loaded nsvf', images.shape, render_poses.shape, hwf, args.datadir) 91 | i_train, i_val, i_test = i_split 92 | 93 | near, far = inward_nearfar_heuristic(poses[i_train, :3, 3]) 94 | 95 | if images.shape[-1] == 4: 96 | if args.white_bkgd: 97 | images = images[...,:3]*images[...,-1:] + (1.-images[...,-1:]) 98 | else: 99 | images = images[...,:3]*images[...,-1:] 100 | 101 | elif args.dataset_type == 'deepvoxels': 102 | images, poses, render_poses, hwf, i_split = load_dv_data(scene=args.scene, basedir=args.datadir, testskip=args.testskip) 103 | print('Loaded deepvoxels', images.shape, render_poses.shape, hwf, args.datadir) 104 | i_train, i_val, i_test = i_split 105 | 106 | hemi_R = np.mean(np.linalg.norm(poses[:,:3,-1], axis=-1)) 107 | near = hemi_R - 1 108 | far = hemi_R + 1 109 | assert args.white_bkgd 110 | assert images.shape[-1] == 3 111 | 112 | elif args.dataset_type == 'co3d': 113 | # each image can be in different shapes and intrinsics 114 | images, masks, poses, render_poses, hwf, K, i_split = load_co3d_data(args) 115 | print('Loaded co3d', args.datadir, args.annot_path, args.sequence_name) 116 | i_train, i_val, i_test = i_split 117 | 118 | near, far = inward_nearfar_heuristic(poses[i_train, :3, 3], ratio=0) 119 | 120 | for i in range(len(images)): 121 | if args.white_bkgd: 122 | images[i] = images[i] * masks[i][...,None] + (1.-masks[i][...,None]) 123 | else: 124 | images[i] = images[i] * masks[i][...,None] 125 | 126 | elif args.dataset_type == 'nerfpp': 127 | images, poses, render_poses, hwf, K, i_split = load_nerfpp_data(args.datadir) 128 | print('Loaded nerf_pp', images.shape, hwf, args.datadir) 129 | i_train, i_val, i_test = i_split 130 | 131 | near_clip, far = inward_nearfar_heuristic(poses[i_train, :3, 3], ratio=0.02) 132 | near = 0 133 | 134 | else: 135 | raise NotImplementedError(f'Unknown dataset type {args.dataset_type} exiting') 136 | 137 | # Cast intrinsics to right types 138 | H, W, focal = hwf 139 | H, W = int(H), int(W) 140 | hwf = [H, W, focal] 141 | HW = np.array([im.shape[:2] for im in images]) 142 | irregular_shape = (images.dtype is np.dtype('object')) 143 | 144 | if K is None: 145 | K = np.array([ 146 | [focal, 0, 0.5*W], 147 | [0, focal, 0.5*H], 148 | [0, 0, 1] 149 | ]) 150 | 151 | if len(K.shape) == 2: 152 | Ks = K[None].repeat(len(poses), axis=0) 153 | else: 154 | Ks = K 155 | 156 | render_poses = render_poses[...,:4] 157 | 158 | data_dict = dict( 159 | hwf=hwf, HW=HW, Ks=Ks, 160 | near=near, far=far, near_clip=near_clip, 161 | i_train=i_train, i_val=i_val, i_test=i_test, 162 | poses=poses, render_poses=render_poses, 163 | images=images, depths=depths, 164 | irregular_shape=irregular_shape, 165 | ) 166 | return data_dict 167 | 168 | 169 | def inward_nearfar_heuristic(cam_o, ratio=0.05): 170 | dist = np.linalg.norm(cam_o[:,None] - cam_o, axis=-1) 171 | far = dist.max() # could be too small to exist the scene bbox 172 | # it is only used to determined scene bbox 173 | # lib/dvgo use 1e9 as far 174 | near = far * ratio 175 | return near, far 176 | 177 | -------------------------------------------------------------------------------- /lib/load_deepvoxels.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import imageio 4 | 5 | 6 | def load_dv_data(scene='cube', basedir='/data/deepvoxels', testskip=1): 7 | 8 | def parse_intrinsics(filepath, trgt_sidelength, invert_y=False): 9 | # Get camera intrinsics 10 | with open(filepath, 'r') as file: 11 | f, cx, cy = list(map(float, file.readline().split()))[:3] 12 | grid_barycenter = np.array(list(map(float, file.readline().split()))) 13 | near_plane = float(file.readline()) 14 | scale = float(file.readline()) 15 | height, width = map(float, file.readline().split()) 16 | 17 | try: 18 | world2cam_poses = int(file.readline()) 19 | except ValueError: 20 | world2cam_poses = None 21 | 22 | if world2cam_poses is None: 23 | world2cam_poses = False 24 | 25 | world2cam_poses = bool(world2cam_poses) 26 | 27 | print(cx,cy,f,height,width) 28 | 29 | cx = cx / width * trgt_sidelength 30 | cy = cy / height * trgt_sidelength 31 | f = trgt_sidelength / height * f 32 | 33 | fx = f 34 | if invert_y: 35 | fy = -f 36 | else: 37 | fy = f 38 | 39 | # Build the intrinsic matrices 40 | full_intrinsic = np.array([[fx, 0., cx, 0.], 41 | [0., fy, cy, 0], 42 | [0., 0, 1, 0], 43 | [0, 0, 0, 1]]) 44 | 45 | return full_intrinsic, grid_barycenter, scale, near_plane, world2cam_poses 46 | 47 | 48 | def load_pose(filename): 49 | assert os.path.isfile(filename) 50 | nums = open(filename).read().split() 51 | return np.array([float(x) for x in nums]).reshape([4,4]).astype(np.float32) 52 | 53 | 54 | H = 512 55 | W = 512 56 | deepvoxels_base = '{}/train/{}/'.format(basedir, scene) 57 | 58 | full_intrinsic, grid_barycenter, scale, near_plane, world2cam_poses = parse_intrinsics(os.path.join(deepvoxels_base, 'intrinsics.txt'), H) 59 | print(full_intrinsic, grid_barycenter, scale, near_plane, world2cam_poses) 60 | focal = full_intrinsic[0,0] 61 | print(H, W, focal) 62 | 63 | def dir2poses(posedir): 64 | poses = np.stack([load_pose(os.path.join(posedir, f)) for f in sorted(os.listdir(posedir)) if f.endswith('txt')], 0) 65 | transf = np.array([ 66 | [1,0,0,0], 67 | [0,-1,0,0], 68 | [0,0,-1,0], 69 | [0,0,0,1.], 70 | ]) 71 | poses = poses @ transf 72 | poses = poses[:,:3,:4].astype(np.float32) 73 | return poses 74 | 75 | posedir = os.path.join(deepvoxels_base, 'pose') 76 | poses = dir2poses(posedir) 77 | testposes = dir2poses('{}/test/{}/pose'.format(basedir, scene)) 78 | testposes = testposes[::testskip] 79 | valposes = dir2poses('{}/validation/{}/pose'.format(basedir, scene)) 80 | valposes = valposes[::testskip] 81 | 82 | imgfiles = [f for f in sorted(os.listdir(os.path.join(deepvoxels_base, 'rgb'))) if f.endswith('png')] 83 | imgs = np.stack([imageio.imread(os.path.join(deepvoxels_base, 'rgb', f))/255. for f in imgfiles], 0).astype(np.float32) 84 | 85 | testimgd = '{}/test/{}/rgb'.format(basedir, scene) 86 | imgfiles = [f for f in sorted(os.listdir(testimgd)) if f.endswith('png')] 87 | testimgs = np.stack([imageio.imread(os.path.join(testimgd, f))/255. for f in imgfiles[::testskip]], 0).astype(np.float32) 88 | 89 | valimgd = '{}/validation/{}/rgb'.format(basedir, scene) 90 | imgfiles = [f for f in sorted(os.listdir(valimgd)) if f.endswith('png')] 91 | valimgs = np.stack([imageio.imread(os.path.join(valimgd, f))/255. for f in imgfiles[::testskip]], 0).astype(np.float32) 92 | 93 | all_imgs = [imgs, valimgs, testimgs] 94 | counts = [0] + [x.shape[0] for x in all_imgs] 95 | counts = np.cumsum(counts) 96 | i_split = [np.arange(counts[i], counts[i+1]) for i in range(3)] 97 | 98 | imgs = np.concatenate(all_imgs, 0) 99 | poses = np.concatenate([poses, valposes, testposes], 0) 100 | 101 | render_poses = testposes 102 | 103 | print(poses.shape, imgs.shape) 104 | 105 | return imgs, poses, render_poses, [H, W, focal], i_split 106 | 107 | 108 | -------------------------------------------------------------------------------- /lib/load_llff.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os, imageio 3 | import torch 4 | import scipy 5 | 6 | ########## Slightly modified version of LLFF data loading code 7 | ########## see https://github.com/Fyusion/LLFF for original 8 | def imread(f): 9 | if f.endswith('png'): 10 | return imageio.imread(f, ignoregamma=True) 11 | else: 12 | return imageio.imread(f) 13 | 14 | def depthread(path): 15 | with open(path, "rb") as fid: 16 | width, height, channels = np.genfromtxt(fid, delimiter="&", max_rows=1, 17 | usecols=(0, 1, 2), dtype=int) 18 | fid.seek(0) 19 | num_delimiter = 0 20 | byte = fid.read(1) 21 | while True: 22 | if byte == b"&": 23 | num_delimiter += 1 24 | if num_delimiter >= 3: 25 | break 26 | byte = fid.read(1) 27 | array = np.fromfile(fid, np.float32) 28 | array = array.reshape((width, height, channels), order="F") 29 | return np.transpose(array, (1, 0, 2)).squeeze() 30 | 31 | 32 | def _minify(basedir, factors=[], resolutions=[]): 33 | needtoload = False 34 | for r in factors: 35 | imgdir = os.path.join(basedir, 'images_{}'.format(r)) 36 | if not os.path.exists(imgdir): 37 | needtoload = True 38 | for r in resolutions: 39 | imgdir = os.path.join(basedir, 'images_{}x{}'.format(r[1], r[0])) 40 | if not os.path.exists(imgdir): 41 | needtoload = True 42 | if not needtoload: 43 | return 44 | 45 | from shutil import copy 46 | from subprocess import check_output 47 | 48 | imgdir = os.path.join(basedir, 'images') 49 | imgs = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir))] 50 | imgs = [f for f in imgs if any([f.endswith(ex) for ex in ['JPG', 'jpg', 'png', 'jpeg', 'PNG']])] 51 | imgdir_orig = imgdir 52 | 53 | wd = os.getcwd() 54 | 55 | for r in factors + resolutions: 56 | if isinstance(r, int): 57 | name = 'images_{}'.format(r) 58 | resizearg = '{}%'.format(100./r) 59 | else: 60 | name = 'images_{}x{}'.format(r[1], r[0]) 61 | resizearg = '{}x{}'.format(r[1], r[0]) 62 | imgdir = os.path.join(basedir, name) 63 | if os.path.exists(imgdir): 64 | continue 65 | 66 | print('Minifying', r, basedir) 67 | 68 | os.makedirs(imgdir) 69 | check_output('cp {}/* {}'.format(imgdir_orig, imgdir), shell=True) 70 | 71 | ext = imgs[0].split('.')[-1] 72 | args = ' '.join(['mogrify', '-resize', resizearg, '-format', 'png', '*.{}'.format(ext)]) 73 | print(args) 74 | os.chdir(imgdir) 75 | check_output(args, shell=True) 76 | os.chdir(wd) 77 | 78 | if ext != 'png': 79 | check_output('rm {}/*.{}'.format(imgdir, ext), shell=True) 80 | print('Removed duplicates') 81 | print('Done') 82 | 83 | 84 | def _load_data(basedir, factor=None, width=None, height=None, load_imgs=True, load_depths=False): 85 | 86 | poses_arr = np.load(os.path.join(basedir, 'poses_bounds.npy')) 87 | if poses_arr.shape[1] == 17: 88 | poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1,2,0]) 89 | elif poses_arr.shape[1] == 14: 90 | poses = poses_arr[:, :-2].reshape([-1, 3, 4]).transpose([1,2,0]) 91 | else: 92 | raise NotImplementedError 93 | bds = poses_arr[:, -2:].transpose([1,0]) 94 | 95 | img0 = [os.path.join(basedir, 'images', f) for f in sorted(os.listdir(os.path.join(basedir, 'images'))) \ 96 | if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')][0] 97 | sh = imageio.imread(img0).shape 98 | 99 | sfx = '' 100 | 101 | if height is not None and width is not None: 102 | _minify(basedir, resolutions=[[height, width]]) 103 | sfx = '_{}x{}'.format(width, height) 104 | elif factor is not None and factor != 1: 105 | sfx = '_{}'.format(factor) 106 | _minify(basedir, factors=[factor]) 107 | factor = factor 108 | elif height is not None: 109 | factor = sh[0] / float(height) 110 | width = int(sh[1] / factor) 111 | _minify(basedir, resolutions=[[height, width]]) 112 | sfx = '_{}x{}'.format(width, height) 113 | elif width is not None: 114 | factor = sh[1] / float(width) 115 | height = int(sh[0] / factor) 116 | _minify(basedir, resolutions=[[height, width]]) 117 | sfx = '_{}x{}'.format(width, height) 118 | else: 119 | factor = 1 120 | 121 | imgdir = os.path.join(basedir, 'images' + sfx) 122 | print(f'Loading images from {imgdir}') 123 | if not os.path.exists(imgdir): 124 | print( imgdir, 'does not exist, returning' ) 125 | return 126 | 127 | imgfiles = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir)) if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')] 128 | if poses.shape[-1] != len(imgfiles): 129 | print() 130 | print( 'Mismatch between imgs {} and poses {} !!!!'.format(len(imgfiles), poses.shape[-1]) ) 131 | names = set(name[:-4] for name in np.load(os.path.join(basedir, 'poses_names.npy'))) 132 | assert len(names) == poses.shape[-1] 133 | print('Below failed files are skip due to SfM failure:') 134 | new_imgfiles = [] 135 | for i in imgfiles: 136 | fname = os.path.split(i)[1][:-4] 137 | if fname in names: 138 | new_imgfiles.append(i) 139 | else: 140 | print('==>', i) 141 | imgfiles = new_imgfiles 142 | 143 | if len(imgfiles) < 3: 144 | print('Too few images...') 145 | import sys; sys.exit() 146 | 147 | sh = imageio.imread(imgfiles[0]).shape 148 | if poses.shape[1] == 4: 149 | poses = np.concatenate([poses, np.zeros_like(poses[:,[0]])], 1) 150 | poses[2, 4, :] = np.load(os.path.join(basedir, 'hwf_cxcy.npy'))[2] 151 | poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1]) 152 | poses[2, 4, :] = poses[2, 4, :] * 1./factor 153 | 154 | if not load_imgs: 155 | return poses, bds 156 | 157 | 158 | imgs = imgs = [imread(f)[...,:3]/255. for f in imgfiles] 159 | imgs = np.stack(imgs, -1) 160 | 161 | print('Loaded image data', imgs.shape, poses[:,-1,0]) 162 | 163 | if not load_depths: 164 | return poses, bds, imgs 165 | 166 | depthdir = os.path.join(basedir, 'stereo', 'depth_maps') 167 | assert os.path.exists(depthdir), f'Dir not found: {depthdir}' 168 | 169 | depthfiles = [os.path.join(depthdir, f) for f in sorted(os.listdir(depthdir)) if f.endswith('.geometric.bin')] 170 | assert poses.shape[-1] == len(depthfiles), 'Mismatch between imgs {} and poses {} !!!!'.format(len(depthfiles), poses.shape[-1]) 171 | 172 | depths = [depthread(f) for f in depthfiles] 173 | depths = np.stack(depths, -1) 174 | print('Loaded depth data', depths.shape) 175 | return poses, bds, imgs, depths 176 | 177 | 178 | def normalize(x): 179 | return x / np.linalg.norm(x) 180 | 181 | def viewmatrix(z, up, pos): 182 | vec2 = normalize(z) 183 | vec1_avg = up 184 | vec0 = normalize(np.cross(vec1_avg, vec2)) 185 | vec1 = normalize(np.cross(vec2, vec0)) 186 | m = np.stack([vec0, vec1, vec2, pos], 1) 187 | return m 188 | 189 | def ptstocam(pts, c2w): 190 | tt = np.matmul(c2w[:3,:3].T, (pts-c2w[:3,3])[...,np.newaxis])[...,0] 191 | return tt 192 | 193 | def poses_avg(poses): 194 | 195 | hwf = poses[0, :3, -1:] 196 | 197 | center = poses[:, :3, 3].mean(0) 198 | vec2 = normalize(poses[:, :3, 2].sum(0)) 199 | up = poses[:, :3, 1].sum(0) 200 | c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1) 201 | 202 | return c2w 203 | 204 | 205 | 206 | def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N): 207 | render_poses = [] 208 | rads = np.array(list(rads) + [1.]) 209 | hwf = c2w[:,4:5] 210 | 211 | for theta in np.linspace(0., 2. * np.pi * rots, N+1)[:-1]: 212 | c = np.dot(c2w[:3,:4], np.array([np.cos(theta), -np.sin(theta), -np.sin(theta*zrate)*zdelta, 1.]) * rads) 213 | z = normalize(c - np.dot(c2w[:3,:4], np.array([0,0,-focal, 1.]))) 214 | render_poses.append(np.concatenate([viewmatrix(z, up, c), hwf], 1)) 215 | return render_poses 216 | 217 | 218 | 219 | def recenter_poses(poses): 220 | 221 | poses_ = poses+0 222 | bottom = np.reshape([0,0,0,1.], [1,4]) 223 | c2w = poses_avg(poses) 224 | c2w = np.concatenate([c2w[:3,:4], bottom], -2) 225 | bottom = np.tile(np.reshape(bottom, [1,1,4]), [poses.shape[0],1,1]) 226 | poses = np.concatenate([poses[:,:3,:4], bottom], -2) 227 | 228 | poses = np.linalg.inv(c2w) @ poses 229 | poses_[:,:3,:4] = poses[:,:3,:4] 230 | poses = poses_ 231 | return poses 232 | 233 | 234 | def rerotate_poses(poses): 235 | poses = np.copy(poses) 236 | centroid = poses[:,:3,3].mean(0) 237 | 238 | poses[:,:3,3] = poses[:,:3,3] - centroid 239 | 240 | # Find the minimum pca vector with minimum eigen value 241 | x = poses[:,:,3] 242 | mu = x.mean(0) 243 | cov = np.cov((x-mu).T) 244 | ev , eig = np.linalg.eig(cov) 245 | cams_up = eig[:,np.argmin(ev)] 246 | if cams_up[1] < 0: 247 | cams_up = -cams_up 248 | 249 | # Find rotation matrix that align cams_up with [0,1,0] 250 | R = scipy.spatial.transform.Rotation.align_vectors( 251 | [[0,1,0]], cams_up[None])[0].as_matrix() 252 | 253 | # Apply rotation and add back the centroid position 254 | poses[:,:3,:3] = R @ poses[:,:3,:3] 255 | poses[:,:3,[3]] = R @ poses[:,:3,[3]] 256 | poses[:,:3,3] = poses[:,:3,3] + centroid 257 | return poses 258 | 259 | ##################### 260 | 261 | 262 | def spherify_poses(poses, bds, depths): 263 | 264 | p34_to_44 = lambda p : np.concatenate([p, np.tile(np.reshape(np.eye(4)[-1,:], [1,1,4]), [p.shape[0], 1,1])], 1) 265 | 266 | rays_d = poses[:,:3,2:3] 267 | rays_o = poses[:,:3,3:4] 268 | 269 | def min_line_dist(rays_o, rays_d): 270 | A_i = np.eye(3) - rays_d * np.transpose(rays_d, [0,2,1]) 271 | b_i = -A_i @ rays_o 272 | pt_mindist = np.squeeze(-np.linalg.inv((np.transpose(A_i, [0,2,1]) @ A_i).mean(0)) @ (b_i).mean(0)) 273 | return pt_mindist 274 | 275 | pt_mindist = min_line_dist(rays_o, rays_d) 276 | 277 | center = pt_mindist 278 | up = (poses[:,:3,3] - center).mean(0) 279 | 280 | vec0 = normalize(up) 281 | vec1 = normalize(np.cross([.1,.2,.3], vec0)) 282 | vec2 = normalize(np.cross(vec0, vec1)) 283 | pos = center 284 | c2w = np.stack([vec1, vec2, vec0, pos], 1) 285 | 286 | poses_reset = np.linalg.inv(p34_to_44(c2w[None])) @ p34_to_44(poses[:,:3,:4]) 287 | 288 | radius = np.sqrt(np.mean(np.sum(np.square(poses_reset[:,:3,3]), -1))) 289 | 290 | sc = 1./radius 291 | poses_reset[:,:3,3] *= sc 292 | bds *= sc 293 | radius *= sc 294 | depths *= sc 295 | 296 | poses_reset = np.concatenate([poses_reset[:,:3,:4], np.broadcast_to(poses[0,:3,-1:], poses_reset[:,:3,-1:].shape)], -1) 297 | 298 | return poses_reset, radius, bds, depths 299 | 300 | 301 | def load_llff_data(basedir, factor=8, width=None, height=None, 302 | recenter=True, rerotate=True, 303 | bd_factor=.75, spherify=False, path_zflat=False, load_depths=False, 304 | movie_render_kwargs={}): 305 | 306 | poses, bds, imgs, *depths = _load_data(basedir, factor=factor, width=width, height=height, 307 | load_depths=load_depths) # factor=8 downsamples original imgs by 8x 308 | print('Loaded', basedir, bds.min(), bds.max()) 309 | if load_depths: 310 | depths = depths[0] 311 | else: 312 | depths = 0 313 | 314 | # Correct rotation matrix ordering and move variable dim to axis 0 315 | poses = np.concatenate([poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1) 316 | poses = np.moveaxis(poses, -1, 0).astype(np.float32) 317 | imgs = np.moveaxis(imgs, -1, 0).astype(np.float32) 318 | images = imgs 319 | bds = np.moveaxis(bds, -1, 0).astype(np.float32) 320 | 321 | # Rescale if bd_factor is provided 322 | if bds.min() < 0 and bd_factor is not None: 323 | print('Found negative z values from SfM sparse points!?') 324 | print('Please try bd_factor=None') 325 | import sys; sys.exit() 326 | sc = 1. if bd_factor is None else 1./(bds.min() * bd_factor) 327 | poses[:,:3,3] *= sc 328 | bds *= sc 329 | depths *= sc 330 | 331 | if recenter: 332 | poses = recenter_poses(poses) 333 | 334 | if spherify: 335 | poses, radius, bds, depths = spherify_poses(poses, bds, depths) 336 | if rerotate: 337 | poses = rerotate_poses(poses) 338 | 339 | ### generate spiral poses for rendering fly-through movie 340 | centroid = poses[:,:3,3].mean(0) 341 | radcircle = movie_render_kwargs.get('scale_r', 1) * np.linalg.norm(poses[:,:3,3] - centroid, axis=-1).mean() 342 | centroid[0] += movie_render_kwargs.get('shift_x', 0) 343 | centroid[1] += movie_render_kwargs.get('shift_y', 0) 344 | centroid[2] += movie_render_kwargs.get('shift_z', 0) 345 | new_up_rad = movie_render_kwargs.get('pitch_deg', 0) * np.pi / 180 346 | target_y = radcircle * np.tan(new_up_rad) 347 | 348 | render_poses = [] 349 | 350 | for th in np.linspace(0., 2.*np.pi, 200): 351 | camorigin = np.array([radcircle * np.cos(th), 0, radcircle * np.sin(th)]) 352 | if movie_render_kwargs.get('flip_up', False): 353 | up = np.array([0,1.,0]) 354 | else: 355 | up = np.array([0,-1.,0]) 356 | vec2 = normalize(camorigin) 357 | vec0 = normalize(np.cross(vec2, up)) 358 | vec1 = normalize(np.cross(vec2, vec0)) 359 | pos = camorigin + centroid 360 | # rotate to align with new pitch rotation 361 | lookat = -vec2 362 | lookat[1] = target_y 363 | lookat = normalize(lookat) 364 | vec2 = -lookat 365 | vec1 = normalize(np.cross(vec2, vec0)) 366 | 367 | p = np.stack([vec0, vec1, vec2, pos], 1) 368 | 369 | render_poses.append(p) 370 | 371 | render_poses = np.stack(render_poses, 0) 372 | render_poses = np.concatenate([render_poses, np.broadcast_to(poses[0,:3,-1:], render_poses[:,:3,-1:].shape)], -1) 373 | 374 | else: 375 | 376 | c2w = poses_avg(poses) 377 | print('recentered', c2w.shape) 378 | print(c2w[:3,:4]) 379 | 380 | ## Get spiral 381 | # Get average pose 382 | up = normalize(poses[:, :3, 1].sum(0)) 383 | 384 | # Find a reasonable "focus depth" for this dataset 385 | close_depth, inf_depth = bds.min()*.9, bds.max()*5. 386 | dt = .75 387 | mean_dz = 1./(((1.-dt)/close_depth + dt/inf_depth)) 388 | focal = mean_dz * movie_render_kwargs.get('scale_f', 1) 389 | 390 | # Get radii for spiral path 391 | zdelta = movie_render_kwargs.get('zdelta', 0.5) 392 | zrate = movie_render_kwargs.get('zrate', 1.0) 393 | tt = poses[:,:3,3] # ptstocam(poses[:3,3,:].T, c2w).T 394 | rads = np.percentile(np.abs(tt), 90, 0) * movie_render_kwargs.get('scale_r', 1) 395 | c2w_path = c2w 396 | N_views = 120 397 | N_rots = movie_render_kwargs.get('N_rots', 1) 398 | if path_zflat: 399 | # zloc = np.percentile(tt, 10, 0)[2] 400 | zloc = -close_depth * .1 401 | c2w_path[:3,3] = c2w_path[:3,3] + zloc * c2w_path[:3,2] 402 | rads[2] = 0. 403 | N_rots = 1 404 | N_views/=2 405 | 406 | # Generate poses for spiral path 407 | render_poses = render_path_spiral(c2w_path, up, rads, focal, zdelta, zrate=zrate, rots=N_rots, N=N_views) 408 | 409 | render_poses = torch.Tensor(render_poses) 410 | 411 | c2w = poses_avg(poses) 412 | print('Data:') 413 | print(poses.shape, images.shape, bds.shape) 414 | 415 | dists = np.sum(np.square(c2w[:3,3] - poses[:,:3,3]), -1) 416 | i_test = np.argmin(dists) 417 | print('HOLDOUT view is', i_test) 418 | 419 | images = images.astype(np.float32) 420 | poses = poses.astype(np.float32) 421 | 422 | return images, depths, poses, bds, render_poses, i_test 423 | 424 | -------------------------------------------------------------------------------- /lib/load_nerfpp.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Modify from 3 | https://github.com/Kai-46/nerfplusplus/blob/master/data_loader_split.py 4 | ''' 5 | import os 6 | import glob 7 | import scipy 8 | import imageio 9 | import numpy as np 10 | import torch 11 | 12 | ######################################################################################################################## 13 | # camera coordinate system: x-->right, y-->down, z-->scene (opencv/colmap convention) 14 | # poses is camera-to-world 15 | ######################################################################################################################## 16 | def find_files(dir, exts): 17 | if os.path.isdir(dir): 18 | files_grabbed = [] 19 | for ext in exts: 20 | files_grabbed.extend(glob.glob(os.path.join(dir, ext))) 21 | if len(files_grabbed) > 0: 22 | files_grabbed = sorted(files_grabbed) 23 | return files_grabbed 24 | else: 25 | return [] 26 | 27 | 28 | def load_data_split(split_dir, skip=1, try_load_min_depth=True, only_img_files=False): 29 | 30 | def parse_txt(filename): 31 | assert os.path.isfile(filename) 32 | nums = open(filename).read().split() 33 | return np.array([float(x) for x in nums]).reshape([4, 4]).astype(np.float32) 34 | 35 | if only_img_files: 36 | img_files = find_files('{}/rgb'.format(split_dir), exts=['*.png', '*.jpg']) 37 | return img_files 38 | 39 | # camera parameters files 40 | intrinsics_files = find_files('{}/intrinsics'.format(split_dir), exts=['*.txt']) 41 | pose_files = find_files('{}/pose'.format(split_dir), exts=['*.txt']) 42 | 43 | intrinsics_files = intrinsics_files[::skip] 44 | pose_files = pose_files[::skip] 45 | cam_cnt = len(pose_files) 46 | 47 | # img files 48 | img_files = find_files('{}/rgb'.format(split_dir), exts=['*.png', '*.jpg']) 49 | if len(img_files) > 0: 50 | img_files = img_files[::skip] 51 | assert(len(img_files) == cam_cnt) 52 | else: 53 | img_files = [None, ] * cam_cnt 54 | 55 | # mask files 56 | mask_files = find_files('{}/mask'.format(split_dir), exts=['*.png', '*.jpg']) 57 | if len(mask_files) > 0: 58 | mask_files = mask_files[::skip] 59 | assert(len(mask_files) == cam_cnt) 60 | else: 61 | mask_files = [None, ] * cam_cnt 62 | 63 | # min depth files 64 | mindepth_files = find_files('{}/min_depth'.format(split_dir), exts=['*.png', '*.jpg']) 65 | if try_load_min_depth and len(mindepth_files) > 0: 66 | mindepth_files = mindepth_files[::skip] 67 | assert(len(mindepth_files) == cam_cnt) 68 | else: 69 | mindepth_files = [None, ] * cam_cnt 70 | 71 | return intrinsics_files, pose_files, img_files, mask_files, mindepth_files 72 | 73 | 74 | def rerotate_poses(poses, render_poses): 75 | poses = np.copy(poses) 76 | centroid = poses[:,:3,3].mean(0) 77 | 78 | poses[:,:3,3] = poses[:,:3,3] - centroid 79 | 80 | # Find the minimum pca vector with minimum eigen value 81 | x = poses[:,:3,3] 82 | mu = x.mean(0) 83 | cov = np.cov((x-mu).T) 84 | ev , eig = np.linalg.eig(cov) 85 | cams_up = eig[:,np.argmin(ev)] 86 | if cams_up[1] < 0: 87 | cams_up = -cams_up 88 | 89 | # Find rotation matrix that align cams_up with [0,1,0] 90 | R = scipy.spatial.transform.Rotation.align_vectors( 91 | [[0,-1,0]], cams_up[None])[0].as_matrix() 92 | 93 | # Apply rotation and add back the centroid position 94 | poses[:,:3,:3] = R @ poses[:,:3,:3] 95 | poses[:,:3,[3]] = R @ poses[:,:3,[3]] 96 | poses[:,:3,3] = poses[:,:3,3] + centroid 97 | render_poses = np.copy(render_poses) 98 | render_poses[:,:3,3] = render_poses[:,:3,3] - centroid 99 | render_poses[:,:3,:3] = R @ render_poses[:,:3,:3] 100 | render_poses[:,:3,[3]] = R @ render_poses[:,:3,[3]] 101 | render_poses[:,:3,3] = render_poses[:,:3,3] + centroid 102 | return poses, render_poses 103 | 104 | 105 | def load_nerfpp_data(basedir, rerotate=True): 106 | tr_K, tr_c2w, tr_im_path = load_data_split(os.path.join(basedir, 'train'))[:3] 107 | te_K, te_c2w, te_im_path = load_data_split(os.path.join(basedir, 'test'))[:3] 108 | assert len(tr_K) == len(tr_c2w) and len(tr_K) == len(tr_im_path) 109 | assert len(te_K) == len(te_c2w) and len(te_K) == len(te_im_path) 110 | 111 | # Determine split id list 112 | i_split = [[], []] 113 | i = 0 114 | for _ in tr_c2w: 115 | i_split[0].append(i) 116 | i += 1 117 | for _ in te_c2w: 118 | i_split[1].append(i) 119 | i += 1 120 | 121 | # Load camera intrinsics. Assume all images share a intrinsic. 122 | K_flatten = np.loadtxt(tr_K[0]) 123 | for path in tr_K: 124 | assert np.allclose(np.loadtxt(path), K_flatten) 125 | for path in te_K: 126 | assert np.allclose(np.loadtxt(path), K_flatten) 127 | K = K_flatten.reshape(4,4)[:3,:3] 128 | 129 | # Load camera poses 130 | poses = [] 131 | for path in tr_c2w: 132 | poses.append(np.loadtxt(path).reshape(4,4)) 133 | for path in te_c2w: 134 | poses.append(np.loadtxt(path).reshape(4,4)) 135 | 136 | # Load images 137 | imgs = [] 138 | for path in tr_im_path: 139 | imgs.append(imageio.imread(path) / 255.) 140 | for path in te_im_path: 141 | imgs.append(imageio.imread(path) / 255.) 142 | 143 | # Bundle all data 144 | imgs = np.stack(imgs, 0) 145 | poses = np.stack(poses, 0) 146 | i_split.append(i_split[1]) 147 | H, W = imgs.shape[1:3] 148 | focal = K[[0,1], [0,1]].mean() 149 | 150 | # Generate movie trajectory 151 | render_poses_path = sorted(glob.glob(os.path.join(basedir, 'camera_path', 'pose', '*txt'))) 152 | render_poses = [] 153 | for path in render_poses_path: 154 | render_poses.append(np.loadtxt(path).reshape(4,4)) 155 | render_poses = np.array(render_poses) 156 | render_K = np.loadtxt(glob.glob(os.path.join(basedir, 'camera_path', 'intrinsics', '*txt'))[0]).reshape(4,4)[:3,:3] 157 | render_poses[:,:,0] *= K[0,0] / render_K[0,0] 158 | render_poses[:,:,1] *= K[1,1] / render_K[1,1] 159 | if rerotate: 160 | poses, render_poses = rerotate_poses(poses, render_poses) 161 | 162 | render_poses = torch.Tensor(render_poses) 163 | 164 | return imgs, poses, render_poses, [H, W, focal], K, i_split 165 | 166 | -------------------------------------------------------------------------------- /lib/load_nsvf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import torch 4 | import numpy as np 5 | import imageio 6 | import json 7 | import torch.nn.functional as F 8 | import cv2 9 | 10 | 11 | trans_t = lambda t : torch.Tensor([ 12 | [1,0,0,0], 13 | [0,1,0,0], 14 | [0,0,1,t], 15 | [0,0,0,1]]).float() 16 | 17 | rot_phi = lambda phi : torch.Tensor([ 18 | [1,0,0,0], 19 | [0,np.cos(phi),-np.sin(phi),0], 20 | [0,np.sin(phi), np.cos(phi),0], 21 | [0,0,0,1]]).float() 22 | 23 | rot_theta = lambda th : torch.Tensor([ 24 | [np.cos(th),0,-np.sin(th),0], 25 | [0,1,0,0], 26 | [np.sin(th),0, np.cos(th),0], 27 | [0,0,0,1]]).float() 28 | 29 | 30 | def pose_spherical(theta, phi, radius): 31 | c2w = trans_t(radius) 32 | c2w = rot_phi(phi/180.*np.pi) @ c2w 33 | c2w = rot_theta(theta/180.*np.pi) @ c2w 34 | c2w = torch.Tensor(np.array([[-1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]])) @ c2w 35 | c2w[:,[1,2]] *= -1 36 | return c2w 37 | 38 | 39 | def load_nsvf_data(basedir): 40 | pose_paths = sorted(glob.glob(os.path.join(basedir, 'pose', '*txt'))) 41 | rgb_paths = sorted(glob.glob(os.path.join(basedir, 'rgb', '*png'))) 42 | 43 | all_poses = [] 44 | all_imgs = [] 45 | i_split = [[], [], []] 46 | for i, (pose_path, rgb_path) in enumerate(zip(pose_paths, rgb_paths)): 47 | i_set = int(os.path.split(rgb_path)[-1][0]) 48 | all_imgs.append((imageio.imread(rgb_path) / 255.).astype(np.float32)) 49 | all_poses.append(np.loadtxt(pose_path).astype(np.float32)) 50 | i_split[i_set].append(i) 51 | 52 | imgs = np.stack(all_imgs, 0) 53 | poses = np.stack(all_poses, 0) 54 | 55 | H, W = imgs[0].shape[:2] 56 | with open(os.path.join(basedir, 'intrinsics.txt')) as f: 57 | focal = float(f.readline().split()[0]) 58 | 59 | R = np.sqrt((poses[...,:3,3]**2).sum(-1)).mean() 60 | render_poses = torch.stack([pose_spherical(angle, -30.0, R) for angle in np.linspace(-180,180,200+1)[:-1]], 0) 61 | 62 | return imgs, poses, render_poses, [H, W, focal], i_split 63 | 64 | -------------------------------------------------------------------------------- /lib/load_tankstemple.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import torch 4 | import numpy as np 5 | import imageio 6 | import json 7 | import torch.nn.functional as F 8 | import cv2 9 | 10 | 11 | def normalize(x): 12 | return x / np.linalg.norm(x) 13 | 14 | def load_tankstemple_data(basedir, movie_render_kwargs={}): 15 | pose_paths = sorted(glob.glob(os.path.join(basedir, 'pose', '*txt'))) 16 | rgb_paths = sorted(glob.glob(os.path.join(basedir, 'rgb', '*png'))) 17 | 18 | all_poses = [] 19 | all_imgs = [] 20 | i_split = [[], []] 21 | for i, (pose_path, rgb_path) in enumerate(zip(pose_paths, rgb_paths)): 22 | i_set = int(os.path.split(rgb_path)[-1][0]) 23 | all_poses.append(np.loadtxt(pose_path).astype(np.float32)) 24 | all_imgs.append((imageio.imread(rgb_path) / 255.).astype(np.float32)) 25 | i_split[i_set].append(i) 26 | 27 | imgs = np.stack(all_imgs, 0) 28 | poses = np.stack(all_poses, 0) 29 | i_split.append(i_split[-1]) 30 | 31 | path_intrinsics = os.path.join(basedir, 'intrinsics.txt') 32 | H, W = imgs[0].shape[:2] 33 | K = np.loadtxt(path_intrinsics) 34 | focal = float(K[0,0]) 35 | 36 | ### generate spiral poses for rendering fly-through movie 37 | centroid = poses[:,:3,3].mean(0) 38 | radcircle = movie_render_kwargs.get('scale_r', 1.0) * np.linalg.norm(poses[:,:3,3] - centroid, axis=-1).mean() 39 | centroid[0] += movie_render_kwargs.get('shift_x', 0) 40 | centroid[1] += movie_render_kwargs.get('shift_y', 0) 41 | centroid[2] += movie_render_kwargs.get('shift_z', 0) 42 | new_up_rad = movie_render_kwargs.get('pitch_deg', 0) * np.pi / 180 43 | target_y = radcircle * np.tan(new_up_rad) 44 | 45 | render_poses = [] 46 | 47 | for th in np.linspace(0., 2.*np.pi, 200): 48 | camorigin = np.array([radcircle * np.cos(th), 0, radcircle * np.sin(th)]) 49 | if movie_render_kwargs.get('flip_up_vec', False): 50 | up = np.array([0,-1.,0]) 51 | else: 52 | up = np.array([0,1.,0]) 53 | vec2 = normalize(camorigin) 54 | vec0 = normalize(np.cross(vec2, up)) 55 | vec1 = normalize(np.cross(vec2, vec0)) 56 | pos = camorigin + centroid 57 | # rotate to align with new pitch rotation 58 | lookat = -vec2 59 | lookat[1] = target_y 60 | lookat = normalize(lookat) 61 | lookat *= -1 62 | vec2 = -lookat 63 | vec1 = normalize(np.cross(vec2, vec0)) 64 | 65 | p = np.stack([vec0, vec1, vec2, pos], 1) 66 | 67 | render_poses.append(p) 68 | 69 | render_poses = np.stack(render_poses, 0) 70 | render_poses = np.concatenate([render_poses, np.broadcast_to(poses[0,:3,-1:], render_poses[:,:3,-1:].shape)], -1) 71 | 72 | return imgs, poses, render_poses, [H, W, focal], K, i_split 73 | 74 | -------------------------------------------------------------------------------- /lib/masked_adam.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.cpp_extension import load 4 | 5 | parent_dir = os.path.dirname(os.path.abspath(__file__)) 6 | sources=['cuda/adam_upd.cpp', 'cuda/adam_upd_kernel.cu'] 7 | adam_upd_cuda = load( 8 | name='adam_upd_cuda', 9 | sources=[os.path.join(parent_dir, path) for path in sources], 10 | verbose=True) 11 | 12 | 13 | ''' Extend Adam optimizer 14 | 1. support per-voxel learning rate 15 | 2. masked update (ignore zero grad) which speeduping training 16 | ''' 17 | class MaskedAdam(torch.optim.Optimizer): 18 | 19 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.99), eps=1e-8): 20 | if not 0.0 <= lr: 21 | raise ValueError("Invalid learning rate: {}".format(lr)) 22 | if not 0.0 <= eps: 23 | raise ValueError("Invalid epsilon value: {}".format(eps)) 24 | if not 0.0 <= betas[0] < 1.0: 25 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 26 | if not 0.0 <= betas[1] < 1.0: 27 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 28 | defaults = dict(lr=lr, betas=betas, eps=eps) 29 | self.per_lr = None 30 | super(MaskedAdam, self).__init__(params, defaults) 31 | 32 | def __setstate__(self, state): 33 | super(MaskedAdam, self).__setstate__(state) 34 | 35 | def set_pervoxel_lr(self, count): 36 | assert self.param_groups[0]['params'][0].shape == count.shape 37 | self.per_lr = count.float() / count.max() 38 | 39 | @torch.no_grad() 40 | def step(self): 41 | for group in self.param_groups: 42 | lr = group['lr'] 43 | beta1, beta2 = group['betas'] 44 | eps = group['eps'] 45 | skip_zero_grad = group['skip_zero_grad'] 46 | 47 | for param in group['params']: 48 | if param.grad is not None: 49 | state = self.state[param] 50 | # Lazy state initialization 51 | if len(state) == 0: 52 | state['step'] = 0 53 | # Exponential moving average of gradient values 54 | state['exp_avg'] = torch.zeros_like(param, memory_format=torch.preserve_format) 55 | # Exponential moving average of squared gradient values 56 | state['exp_avg_sq'] = torch.zeros_like(param, memory_format=torch.preserve_format) 57 | 58 | state['step'] += 1 59 | 60 | if self.per_lr is not None and param.shape == self.per_lr.shape: 61 | adam_upd_cuda.adam_upd_with_perlr( 62 | param, param.grad, state['exp_avg'], state['exp_avg_sq'], self.per_lr, 63 | state['step'], beta1, beta2, lr, eps) 64 | elif skip_zero_grad: 65 | adam_upd_cuda.masked_adam_upd( 66 | param, param.grad, state['exp_avg'], state['exp_avg_sq'], 67 | state['step'], beta1, beta2, lr, eps) 68 | else: 69 | adam_upd_cuda.adam_upd( 70 | param, param.grad, state['exp_avg'], state['exp_avg_sq'], 71 | state['step'], beta1, beta2, lr, eps) 72 | 73 | -------------------------------------------------------------------------------- /lib/utils.py: -------------------------------------------------------------------------------- 1 | import os, math 2 | import numpy as np 3 | import scipy.signal 4 | from typing import List, Optional 5 | 6 | from torch import Tensor 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | from .masked_adam import MaskedAdam 12 | 13 | 14 | ''' Misc 15 | ''' 16 | mse2psnr = lambda x : -10. * torch.log10(x) 17 | to8b = lambda x : (255*np.clip(x,0,1)).astype(np.uint8) 18 | 19 | def create_optimizer_or_freeze_model(model, cfg_train, global_step): 20 | decay_steps = cfg_train.lrate_decay * 1000 21 | decay_factor = 0.1 ** (global_step/decay_steps) 22 | 23 | param_group = [] 24 | for k in cfg_train.keys(): 25 | if not k.startswith('lrate_'): 26 | continue 27 | k = k[len('lrate_'):] 28 | 29 | if not hasattr(model, k): 30 | continue 31 | 32 | param = getattr(model, k) 33 | if param is None: 34 | print(f'create_optimizer_or_freeze_model: param {k} not exist') 35 | continue 36 | 37 | lr = getattr(cfg_train, f'lrate_{k}') * decay_factor 38 | if lr > 0: 39 | print(f'create_optimizer_or_freeze_model: param {k} lr {lr}') 40 | if isinstance(param, nn.Module): 41 | param = param.parameters() 42 | param_group.append({'params': param, 'lr': lr, 'skip_zero_grad': (k in cfg_train.skip_zero_grad_fields)}) 43 | else: 44 | print(f'create_optimizer_or_freeze_model: param {k} freeze') 45 | param.requires_grad = False 46 | return MaskedAdam(param_group) 47 | 48 | 49 | ''' Checkpoint utils 50 | ''' 51 | def load_checkpoint(model, optimizer, ckpt_path, no_reload_optimizer): 52 | ckpt = torch.load(ckpt_path) 53 | start = ckpt['global_step'] 54 | model.load_state_dict(ckpt['model_state_dict']) 55 | if not no_reload_optimizer: 56 | optimizer.load_state_dict(ckpt['optimizer_state_dict']) 57 | return model, optimizer, start 58 | 59 | 60 | def load_model(model_class, ckpt_path): 61 | ckpt = torch.load(ckpt_path) 62 | model = model_class(**ckpt['model_kwargs']) 63 | model.load_state_dict(ckpt['model_state_dict']) 64 | return model 65 | 66 | 67 | ''' Evaluation metrics (ssim, lpips) 68 | ''' 69 | def rgb_ssim(img0, img1, max_val, 70 | filter_size=11, 71 | filter_sigma=1.5, 72 | k1=0.01, 73 | k2=0.03, 74 | return_map=False): 75 | # Modified from https://github.com/google/mipnerf/blob/16e73dfdb52044dcceb47cda5243a686391a6e0f/internal/math.py#L58 76 | assert len(img0.shape) == 3 77 | assert img0.shape[-1] == 3 78 | assert img0.shape == img1.shape 79 | 80 | # Construct a 1D Gaussian blur filter. 81 | hw = filter_size // 2 82 | shift = (2 * hw - filter_size + 1) / 2 83 | f_i = ((np.arange(filter_size) - hw + shift) / filter_sigma)**2 84 | filt = np.exp(-0.5 * f_i) 85 | filt /= np.sum(filt) 86 | 87 | # Blur in x and y (faster than the 2D convolution). 88 | def convolve2d(z, f): 89 | return scipy.signal.convolve2d(z, f, mode='valid') 90 | 91 | filt_fn = lambda z: np.stack([ 92 | convolve2d(convolve2d(z[...,i], filt[:, None]), filt[None, :]) 93 | for i in range(z.shape[-1])], -1) 94 | mu0 = filt_fn(img0) 95 | mu1 = filt_fn(img1) 96 | mu00 = mu0 * mu0 97 | mu11 = mu1 * mu1 98 | mu01 = mu0 * mu1 99 | sigma00 = filt_fn(img0**2) - mu00 100 | sigma11 = filt_fn(img1**2) - mu11 101 | sigma01 = filt_fn(img0 * img1) - mu01 102 | 103 | # Clip the variances and covariances to valid values. 104 | # Variance must be non-negative: 105 | sigma00 = np.maximum(0., sigma00) 106 | sigma11 = np.maximum(0., sigma11) 107 | sigma01 = np.sign(sigma01) * np.minimum( 108 | np.sqrt(sigma00 * sigma11), np.abs(sigma01)) 109 | c1 = (k1 * max_val)**2 110 | c2 = (k2 * max_val)**2 111 | numer = (2 * mu01 + c1) * (2 * sigma01 + c2) 112 | denom = (mu00 + mu11 + c1) * (sigma00 + sigma11 + c2) 113 | ssim_map = numer / denom 114 | ssim = np.mean(ssim_map) 115 | return ssim_map if return_map else ssim 116 | 117 | 118 | __LPIPS__ = {} 119 | def init_lpips(net_name, device): 120 | assert net_name in ['alex', 'vgg'] 121 | import lpips 122 | print(f'init_lpips: lpips_{net_name}') 123 | return lpips.LPIPS(net=net_name, version='0.1').eval().to(device) 124 | 125 | def rgb_lpips(np_gt, np_im, net_name, device): 126 | if net_name not in __LPIPS__: 127 | __LPIPS__[net_name] = init_lpips(net_name, device) 128 | gt = torch.from_numpy(np_gt).permute([2, 0, 1]).contiguous().to(device) 129 | im = torch.from_numpy(np_im).permute([2, 0, 1]).contiguous().to(device) 130 | return __LPIPS__[net_name](gt, im, normalize=True).item() 131 | 132 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | tqdm 4 | lpips 5 | mmcv 6 | imageio 7 | imageio-ffmpeg 8 | opencv-python 9 | torch_efficient_distloss 10 | 11 | -------------------------------------------------------------------------------- /tools/colmap_utils/colmap_read_model.py: -------------------------------------------------------------------------------- 1 | # Source: https://github.com/Fyusion/LLFF 2 | # Copyright (c) 2018, ETH Zurich and UNC Chapel Hill. 3 | # All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # * Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 11 | # * Redistributions in binary form must reproduce the above copyright 12 | # notice, this list of conditions and the following disclaimer in the 13 | # documentation and/or other materials provided with the distribution. 14 | # 15 | # * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of 16 | # its contributors may be used to endorse or promote products derived 17 | # from this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE 23 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 | # POSSIBILITY OF SUCH DAMAGE. 30 | # 31 | # Author: Johannes L. Schoenberger (jsch at inf.ethz.ch) 32 | 33 | import os 34 | import sys 35 | import collections 36 | import numpy as np 37 | import struct 38 | 39 | 40 | CameraModel = collections.namedtuple( 41 | "CameraModel", ["model_id", "model_name", "num_params"]) 42 | Camera = collections.namedtuple( 43 | "Camera", ["id", "model", "width", "height", "params"]) 44 | BaseImage = collections.namedtuple( 45 | "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]) 46 | Point3D = collections.namedtuple( 47 | "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"]) 48 | 49 | class Image(BaseImage): 50 | def qvec2rotmat(self): 51 | return qvec2rotmat(self.qvec) 52 | 53 | 54 | CAMERA_MODELS = { 55 | CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3), 56 | CameraModel(model_id=1, model_name="PINHOLE", num_params=4), 57 | CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4), 58 | CameraModel(model_id=3, model_name="RADIAL", num_params=5), 59 | CameraModel(model_id=4, model_name="OPENCV", num_params=8), 60 | CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8), 61 | CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12), 62 | CameraModel(model_id=7, model_name="FOV", num_params=5), 63 | CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4), 64 | CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5), 65 | CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12) 66 | } 67 | CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model) \ 68 | for camera_model in CAMERA_MODELS]) 69 | 70 | 71 | def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"): 72 | """Read and unpack the next bytes from a binary file. 73 | :param fid: 74 | :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc. 75 | :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}. 76 | :param endian_character: Any of {@, =, <, >, !} 77 | :return: Tuple of read and unpacked values. 78 | """ 79 | data = fid.read(num_bytes) 80 | return struct.unpack(endian_character + format_char_sequence, data) 81 | 82 | 83 | def read_cameras_text(path): 84 | """ 85 | see: src/base/reconstruction.cc 86 | void Reconstruction::WriteCamerasText(const std::string& path) 87 | void Reconstruction::ReadCamerasText(const std::string& path) 88 | """ 89 | cameras = {} 90 | with open(path, "r") as fid: 91 | while True: 92 | line = fid.readline() 93 | if not line: 94 | break 95 | line = line.strip() 96 | if len(line) > 0 and line[0] != "#": 97 | elems = line.split() 98 | camera_id = int(elems[0]) 99 | model = elems[1] 100 | width = int(elems[2]) 101 | height = int(elems[3]) 102 | params = np.array(tuple(map(float, elems[4:]))) 103 | cameras[camera_id] = Camera(id=camera_id, model=model, 104 | width=width, height=height, 105 | params=params) 106 | return cameras 107 | 108 | 109 | def read_cameras_binary(path_to_model_file): 110 | """ 111 | see: src/base/reconstruction.cc 112 | void Reconstruction::WriteCamerasBinary(const std::string& path) 113 | void Reconstruction::ReadCamerasBinary(const std::string& path) 114 | """ 115 | cameras = {} 116 | with open(path_to_model_file, "rb") as fid: 117 | num_cameras = read_next_bytes(fid, 8, "Q")[0] 118 | for camera_line_index in range(num_cameras): 119 | camera_properties = read_next_bytes( 120 | fid, num_bytes=24, format_char_sequence="iiQQ") 121 | camera_id = camera_properties[0] 122 | model_id = camera_properties[1] 123 | model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name 124 | width = camera_properties[2] 125 | height = camera_properties[3] 126 | num_params = CAMERA_MODEL_IDS[model_id].num_params 127 | params = read_next_bytes(fid, num_bytes=8*num_params, 128 | format_char_sequence="d"*num_params) 129 | cameras[camera_id] = Camera(id=camera_id, 130 | model=model_name, 131 | width=width, 132 | height=height, 133 | params=np.array(params)) 134 | assert len(cameras) == num_cameras 135 | return cameras 136 | 137 | 138 | def read_images_text(path): 139 | """ 140 | see: src/base/reconstruction.cc 141 | void Reconstruction::ReadImagesText(const std::string& path) 142 | void Reconstruction::WriteImagesText(const std::string& path) 143 | """ 144 | images = {} 145 | with open(path, "r") as fid: 146 | while True: 147 | line = fid.readline() 148 | if not line: 149 | break 150 | line = line.strip() 151 | if len(line) > 0 and line[0] != "#": 152 | elems = line.split() 153 | image_id = int(elems[0]) 154 | qvec = np.array(tuple(map(float, elems[1:5]))) 155 | tvec = np.array(tuple(map(float, elems[5:8]))) 156 | camera_id = int(elems[8]) 157 | image_name = elems[9] 158 | elems = fid.readline().split() 159 | xys = np.column_stack([tuple(map(float, elems[0::3])), 160 | tuple(map(float, elems[1::3]))]) 161 | point3D_ids = np.array(tuple(map(int, elems[2::3]))) 162 | images[image_id] = Image( 163 | id=image_id, qvec=qvec, tvec=tvec, 164 | camera_id=camera_id, name=image_name, 165 | xys=xys, point3D_ids=point3D_ids) 166 | return images 167 | 168 | 169 | def read_images_binary(path_to_model_file): 170 | """ 171 | see: src/base/reconstruction.cc 172 | void Reconstruction::ReadImagesBinary(const std::string& path) 173 | void Reconstruction::WriteImagesBinary(const std::string& path) 174 | """ 175 | images = {} 176 | with open(path_to_model_file, "rb") as fid: 177 | num_reg_images = read_next_bytes(fid, 8, "Q")[0] 178 | for image_index in range(num_reg_images): 179 | binary_image_properties = read_next_bytes( 180 | fid, num_bytes=64, format_char_sequence="idddddddi") 181 | image_id = binary_image_properties[0] 182 | qvec = np.array(binary_image_properties[1:5]) 183 | tvec = np.array(binary_image_properties[5:8]) 184 | camera_id = binary_image_properties[8] 185 | image_name = "" 186 | current_char = read_next_bytes(fid, 1, "c")[0] 187 | while current_char != b"\x00": # look for the ASCII 0 entry 188 | image_name += current_char.decode("utf-8") 189 | current_char = read_next_bytes(fid, 1, "c")[0] 190 | num_points2D = read_next_bytes(fid, num_bytes=8, 191 | format_char_sequence="Q")[0] 192 | x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D, 193 | format_char_sequence="ddq"*num_points2D) 194 | xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])), 195 | tuple(map(float, x_y_id_s[1::3]))]) 196 | point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) 197 | images[image_id] = Image( 198 | id=image_id, qvec=qvec, tvec=tvec, 199 | camera_id=camera_id, name=image_name, 200 | xys=xys, point3D_ids=point3D_ids) 201 | return images 202 | 203 | 204 | def read_points3D_text(path): 205 | """ 206 | see: src/base/reconstruction.cc 207 | void Reconstruction::ReadPoints3DText(const std::string& path) 208 | void Reconstruction::WritePoints3DText(const std::string& path) 209 | """ 210 | points3D = {} 211 | with open(path, "r") as fid: 212 | while True: 213 | line = fid.readline() 214 | if not line: 215 | break 216 | line = line.strip() 217 | if len(line) > 0 and line[0] != "#": 218 | elems = line.split() 219 | point3D_id = int(elems[0]) 220 | xyz = np.array(tuple(map(float, elems[1:4]))) 221 | rgb = np.array(tuple(map(int, elems[4:7]))) 222 | error = float(elems[7]) 223 | image_ids = np.array(tuple(map(int, elems[8::2]))) 224 | point2D_idxs = np.array(tuple(map(int, elems[9::2]))) 225 | points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb, 226 | error=error, image_ids=image_ids, 227 | point2D_idxs=point2D_idxs) 228 | return points3D 229 | 230 | 231 | def read_points3d_binary(path_to_model_file): 232 | """ 233 | see: src/base/reconstruction.cc 234 | void Reconstruction::ReadPoints3DBinary(const std::string& path) 235 | void Reconstruction::WritePoints3DBinary(const std::string& path) 236 | """ 237 | points3D = {} 238 | with open(path_to_model_file, "rb") as fid: 239 | num_points = read_next_bytes(fid, 8, "Q")[0] 240 | for point_line_index in range(num_points): 241 | binary_point_line_properties = read_next_bytes( 242 | fid, num_bytes=43, format_char_sequence="QdddBBBd") 243 | point3D_id = binary_point_line_properties[0] 244 | xyz = np.array(binary_point_line_properties[1:4]) 245 | rgb = np.array(binary_point_line_properties[4:7]) 246 | error = np.array(binary_point_line_properties[7]) 247 | track_length = read_next_bytes( 248 | fid, num_bytes=8, format_char_sequence="Q")[0] 249 | track_elems = read_next_bytes( 250 | fid, num_bytes=8*track_length, 251 | format_char_sequence="ii"*track_length) 252 | image_ids = np.array(tuple(map(int, track_elems[0::2]))) 253 | point2D_idxs = np.array(tuple(map(int, track_elems[1::2]))) 254 | points3D[point3D_id] = Point3D( 255 | id=point3D_id, xyz=xyz, rgb=rgb, 256 | error=error, image_ids=image_ids, 257 | point2D_idxs=point2D_idxs) 258 | return points3D 259 | 260 | 261 | def read_model(path, ext): 262 | if ext == ".txt": 263 | cameras = read_cameras_text(os.path.join(path, "cameras" + ext)) 264 | images = read_images_text(os.path.join(path, "images" + ext)) 265 | points3D = read_points3D_text(os.path.join(path, "points3D") + ext) 266 | else: 267 | cameras = read_cameras_binary(os.path.join(path, "cameras" + ext)) 268 | images = read_images_binary(os.path.join(path, "images" + ext)) 269 | points3D = read_points3d_binary(os.path.join(path, "points3D") + ext) 270 | return cameras, images, points3D 271 | 272 | 273 | def qvec2rotmat(qvec): 274 | return np.array([ 275 | [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2, 276 | 2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3], 277 | 2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]], 278 | [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3], 279 | 1 - 2 * qvec[1]**2 - 2 * qvec[3]**2, 280 | 2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]], 281 | [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2], 282 | 2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1], 283 | 1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]]) 284 | 285 | 286 | def rotmat2qvec(R): 287 | Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat 288 | K = np.array([ 289 | [Rxx - Ryy - Rzz, 0, 0, 0], 290 | [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0], 291 | [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0], 292 | [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0 293 | eigvals, eigvecs = np.linalg.eigh(K) 294 | qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)] 295 | if qvec[0] < 0: 296 | qvec *= -1 297 | return qvec 298 | 299 | 300 | def main(): 301 | if len(sys.argv) != 3: 302 | print("Usage: python read_model.py path/to/model/folder [.txt,.bin]") 303 | return 304 | 305 | cameras, images, points3D = read_model(path=sys.argv[1], ext=sys.argv[2]) 306 | 307 | print("num_cameras:", len(cameras)) 308 | print("num_images:", len(images)) 309 | print("num_points3D:", len(points3D)) 310 | 311 | 312 | if __name__ == "__main__": 313 | main() 314 | -------------------------------------------------------------------------------- /tools/colmap_utils/colmap_wrapper.py: -------------------------------------------------------------------------------- 1 | # Source: https://github.com/Fyusion/LLFF 2 | import os 3 | import subprocess 4 | 5 | 6 | 7 | # $ DATASET_PATH=/path/to/dataset 8 | 9 | # $ colmap feature_extractor \ 10 | # --database_path $DATASET_PATH/database.db \ 11 | # --image_path $DATASET_PATH/images 12 | 13 | # $ colmap exhaustive_matcher \ 14 | # --database_path $DATASET_PATH/database.db 15 | 16 | # $ mkdir $DATASET_PATH/sparse 17 | 18 | # $ colmap mapper \ 19 | # --database_path $DATASET_PATH/database.db \ 20 | # --image_path $DATASET_PATH/images \ 21 | # --output_path $DATASET_PATH/sparse 22 | 23 | # $ mkdir $DATASET_PATH/dense 24 | def run_colmap(basedir, match_type): 25 | 26 | logfile_name = os.path.join(basedir, 'colmap_output.txt') 27 | logfile = open(logfile_name, 'w') 28 | 29 | feature_extractor_args = [ 30 | 'colmap', 'feature_extractor', 31 | '--database_path', os.path.join(basedir, 'database.db'), 32 | '--image_path', os.path.join(basedir, 'source'), 33 | '--ImageReader.single_camera', '1', 34 | # '--SiftExtraction.use_gpu', '0', 35 | ] 36 | feat_output = ( subprocess.check_output(feature_extractor_args, universal_newlines=True) ) 37 | logfile.write(feat_output) 38 | print('Features extracted') 39 | 40 | exhaustive_matcher_args = [ 41 | 'colmap', match_type, 42 | '--database_path', os.path.join(basedir, 'database.db'), 43 | ] 44 | 45 | match_output = ( subprocess.check_output(exhaustive_matcher_args, universal_newlines=True) ) 46 | logfile.write(match_output) 47 | print('Features matched') 48 | 49 | p = os.path.join(basedir, 'sparse') 50 | if not os.path.exists(p): 51 | os.makedirs(p) 52 | 53 | # mapper_args = [ 54 | # 'colmap', 'mapper', 55 | # '--database_path', os.path.join(basedir, 'database.db'), 56 | # '--image_path', os.path.join(basedir, 'images'), 57 | # '--output_path', os.path.join(basedir, 'sparse'), 58 | # '--Mapper.num_threads', '16', 59 | # '--Mapper.init_min_tri_angle', '4', 60 | # ] 61 | mapper_args = [ 62 | 'colmap', 'mapper', 63 | '--database_path', os.path.join(basedir, 'database.db'), 64 | '--image_path', os.path.join(basedir, 'source'), 65 | '--output_path', os.path.join(basedir, 'sparse'), # --export_path changed to --output_path in colmap 3.6 66 | '--Mapper.num_threads', '16', 67 | '--Mapper.init_min_tri_angle', '4', 68 | '--Mapper.multiple_models', '0', 69 | '--Mapper.extract_colors', '0', 70 | ] 71 | 72 | map_output = ( subprocess.check_output(mapper_args, universal_newlines=True) ) 73 | logfile.write(map_output) 74 | print('Sparse map created') 75 | 76 | undistorter = [ 77 | 'colmap', 'image_undistorter', 78 | '--image_path', os.path.join(basedir, 'source'), 79 | '--input_path', os.path.join(basedir, 'sparse', '0'), 80 | '--output_path', os.path.join(basedir, 'dense'), 81 | '--output_type', 'COLMAP', 82 | ] 83 | undistort_output = subprocess.check_output(undistorter, universal_newlines=True) 84 | logfile.write(undistort_output) 85 | print('Undistort images') 86 | 87 | logfile.close() 88 | print( 'Finished running COLMAP, see {} for logs'.format(logfile_name) ) 89 | 90 | 91 | -------------------------------------------------------------------------------- /tools/colmap_utils/pose_utils.py: -------------------------------------------------------------------------------- 1 | # Source: https://github.com/Fyusion/LLFF 2 | import numpy as np 3 | import os 4 | import sys 5 | import imageio 6 | import skimage.transform 7 | 8 | from .colmap_wrapper import run_colmap 9 | from . import colmap_read_model as read_model 10 | 11 | 12 | def load_colmap_data(realdir): 13 | 14 | #camerasfile = os.path.join(realdir, 'sparse/0/cameras.bin') 15 | camerasfile = os.path.join(realdir, 'dense/sparse/cameras.bin') 16 | camdata = read_model.read_cameras_binary(camerasfile) 17 | 18 | # cam = camdata[camdata.keys()[0]] 19 | list_of_keys = list(camdata.keys()) 20 | cam = camdata[list_of_keys[0]] 21 | print( 'Cameras', len(cam)) 22 | 23 | h, w, f = cam.height, cam.width, cam.params[0] 24 | # w, h, f = factor * w, factor * h, factor * f 25 | hwf = np.array([h,w,f]).reshape([3,1]) 26 | 27 | #imagesfile = os.path.join(realdir, 'sparse/0/images.bin') 28 | imagesfile = os.path.join(realdir, 'dense/sparse/images.bin') 29 | imdata = read_model.read_images_binary(imagesfile) 30 | 31 | w2c_mats = [] 32 | bottom = np.array([0,0,0,1.]).reshape([1,4]) 33 | 34 | names = [imdata[k].name for k in imdata] 35 | print( 'Images #', len(names)) 36 | perm = np.argsort(names) 37 | for k in imdata: 38 | im = imdata[k] 39 | R = im.qvec2rotmat() 40 | t = im.tvec.reshape([3,1]) 41 | m = np.concatenate([np.concatenate([R, t], 1), bottom], 0) 42 | w2c_mats.append(m) 43 | 44 | w2c_mats = np.stack(w2c_mats, 0) 45 | c2w_mats = np.linalg.inv(w2c_mats) 46 | 47 | poses = c2w_mats[:, :3, :4].transpose([1,2,0]) 48 | poses = np.concatenate([poses, np.tile(hwf[..., np.newaxis], [1,1,poses.shape[-1]])], 1) 49 | 50 | points3dfile = os.path.join(realdir, 'dense/sparse/points3D.bin') 51 | pts3d = read_model.read_points3d_binary(points3dfile) 52 | 53 | # must switch to [-u, r, -t] from [r, -u, t], NOT [r, u, -t] 54 | poses = np.concatenate([poses[:, 1:2, :], poses[:, 0:1, :], -poses[:, 2:3, :], poses[:, 3:4, :], poses[:, 4:5, :]], 1) 55 | 56 | return poses, pts3d, perm, names 57 | 58 | 59 | def save_poses(basedir, poses, pts3d, perm, names): 60 | pts_arr = [] 61 | vis_arr = [] 62 | for k in pts3d: 63 | pts_arr.append(pts3d[k].xyz) 64 | cams = [0] * poses.shape[-1] 65 | for ind in pts3d[k].image_ids: 66 | if len(cams) < ind - 1: 67 | print('ERROR: the correct camera poses for current points cannot be accessed') 68 | return 69 | cams[ind-1] = 1 70 | vis_arr.append(cams) 71 | 72 | pts_arr = np.array(pts_arr) 73 | vis_arr = np.array(vis_arr) 74 | print( 'Points', pts_arr.shape, 'Visibility', vis_arr.shape ) 75 | 76 | zvals = np.sum(-(pts_arr[:, np.newaxis, :].transpose([2,0,1]) - poses[:3, 3:4, :]) * poses[:3, 2:3, :], 0) 77 | valid_z = zvals[vis_arr==1] 78 | print( 'Depth stats', valid_z.min(), valid_z.max(), valid_z.mean() ) 79 | 80 | save_arr = [] 81 | for i in perm: 82 | vis = vis_arr[:, i] 83 | zs = zvals[:, i] 84 | zs = zs[vis==1] 85 | close_depth, inf_depth = np.percentile(zs, .1), np.percentile(zs, 99.9) 86 | # print( i, close_depth, inf_depth ) 87 | 88 | save_arr.append(np.concatenate([poses[..., i].ravel(), np.array([close_depth, inf_depth])], 0)) 89 | save_arr = np.array(save_arr) 90 | 91 | np.save(os.path.join(basedir, 'poses_bounds.npy'), save_arr) 92 | np.save(os.path.join(basedir, 'poses_names.npy'), sorted(names)) 93 | 94 | 95 | def minify(basedir, factors=[], resolutions=[]): 96 | needtoload = False 97 | for r in factors: 98 | imgdir = os.path.join(basedir, 'images_{}'.format(r)) 99 | if not os.path.exists(imgdir): 100 | needtoload = True 101 | for r in resolutions: 102 | imgdir = os.path.join(basedir, 'images_{}x{}'.format(r[1], r[0])) 103 | if not os.path.exists(imgdir): 104 | needtoload = True 105 | if not needtoload: 106 | return 107 | 108 | from shutil import copy 109 | from subprocess import check_output 110 | 111 | imgdir = os.path.join(basedir, 'images') 112 | imgs = [os.path.join(imgdir, f) for f in sorted(os.listdir(imgdir))] 113 | imgs = [f for f in imgs if any([f.endswith(ex) for ex in ['JPG', 'jpg', 'png', 'jpeg', 'PNG']])] 114 | imgdir_orig = imgdir 115 | 116 | wd = os.getcwd() 117 | 118 | for r in factors + resolutions: 119 | if isinstance(r, int): 120 | name = 'images_{}'.format(r) 121 | resizearg = '{}%'.format(int(100./r)) 122 | else: 123 | name = 'images_{}x{}'.format(r[1], r[0]) 124 | resizearg = '{}x{}'.format(r[1], r[0]) 125 | imgdir = os.path.join(basedir, name) 126 | if os.path.exists(imgdir): 127 | continue 128 | 129 | print('Minifying', r, basedir) 130 | 131 | os.makedirs(imgdir) 132 | check_output('cp {}/* {}'.format(imgdir_orig, imgdir), shell=True) 133 | 134 | ext = imgs[0].split('.')[-1] 135 | args = ' '.join(['mogrify', '-resize', resizearg, '-format', 'png', '*.{}'.format(ext)]) 136 | print(args) 137 | os.chdir(imgdir) 138 | check_output(args, shell=True) 139 | os.chdir(wd) 140 | 141 | if ext != 'png': 142 | check_output('rm {}/*.{}'.format(imgdir, ext), shell=True) 143 | print('Removed duplicates') 144 | print('Done') 145 | 146 | 147 | 148 | 149 | def gen_poses(basedir, match_type, factors=None): 150 | 151 | files_needed = ['{}.bin'.format(f) for f in ['cameras', 'images', 'points3D']] 152 | if os.path.exists(os.path.join(basedir, 'sparse/0')): 153 | files_had = os.listdir(os.path.join(basedir, 'sparse/0')) 154 | else: 155 | files_had = [] 156 | if not all([f in files_had for f in files_needed]): 157 | print( 'Need to run COLMAP' ) 158 | run_colmap(basedir, match_type) 159 | else: 160 | print('Don\'t need to run COLMAP') 161 | 162 | print( 'Post-colmap') 163 | 164 | poses, pts3d, perm, names = load_colmap_data(basedir) 165 | 166 | densedir = os.path.join(basedir, 'dense') 167 | 168 | save_poses(densedir, poses, pts3d, perm, names) 169 | 170 | if factors is not None: 171 | print( 'Factors:', factors) 172 | minify(densedir, factors) 173 | 174 | print( 'Done with imgs2poses' ) 175 | 176 | return True 177 | 178 | -------------------------------------------------------------------------------- /tools/imgs2poses.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/Fyusion/LLFF 2 | import os 3 | import sys 4 | import glob 5 | 6 | from colmap_utils.pose_utils import gen_poses 7 | 8 | 9 | def check_structure(scenedir): 10 | source = os.path.join(scenedir, 'source') 11 | if not os.path.isdir(source): 12 | print('Invalid directory structure.') 13 | print('Please put all your images under', source, '!') 14 | sys.exit() 15 | if len(glob.glob(f'{source}/*[JPG\|jpg\|png\|jpeg\|PNG]')) == 0: 16 | print('Invalid directory structure.') 17 | print('No image in', source, '!') 18 | sys.exit() 19 | print('Directory structure check: PASS.') 20 | 21 | 22 | if __name__=='__main__': 23 | 24 | import argparse 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('--match_type', type=str, 27 | default='exhaustive_matcher', help='type of matcher used. Valid options: \ 28 | exhaustive_matcher sequential_matcher. Other matchers not supported at this time') 29 | parser.add_argument('scenedir', type=str, 30 | help='input scene directory') 31 | args = parser.parse_args() 32 | 33 | if args.match_type != 'exhaustive_matcher' and args.match_type != 'sequential_matcher': 34 | print('ERROR: matcher type ' + args.match_type + ' is not valid. Aborting') 35 | sys.exit() 36 | 37 | check_structure(args.scenedir) 38 | 39 | gen_poses(args.scenedir, args.match_type, factors=[2,4,8]) 40 | 41 | -------------------------------------------------------------------------------- /tools/vis_train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import open3d as o3d 4 | 5 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 6 | parser.add_argument('path') 7 | args = parser.parse_args() 8 | 9 | data = np.load(args.path) 10 | xyz_min = data['xyz_min'] 11 | xyz_max = data['xyz_max'] 12 | cam_lst = data['cam_lst'] 13 | 14 | # Outer aabb 15 | aabb_01 = np.array([[0, 0, 0], 16 | [0, 0, 1], 17 | [0, 1, 1], 18 | [0, 1, 0], 19 | [1, 0, 0], 20 | [1, 0, 1], 21 | [1, 1, 1], 22 | [1, 1, 0]]) 23 | out_bbox = o3d.geometry.LineSet() 24 | out_bbox.points = o3d.utility.Vector3dVector(xyz_min + aabb_01 * (xyz_max - xyz_min)) 25 | out_bbox.colors = o3d.utility.Vector3dVector([[1,0,0] for i in range(12)]) 26 | out_bbox.lines = o3d.utility.Vector2iVector([[0,1],[1,2],[2,3],[3,0],[4,5],[5,6],[6,7],[7,4],[0,4],[1,5],[2,6],[3,7]]) 27 | 28 | # Cameras 29 | cam_frustrm_lst = [] 30 | for cam in cam_lst: 31 | cam_frustrm = o3d.geometry.LineSet() 32 | cam_frustrm.points = o3d.utility.Vector3dVector(cam) 33 | if len(cam) == 5: 34 | cam_frustrm.colors = o3d.utility.Vector3dVector([[0,0,0] for i in range(8)]) 35 | cam_frustrm.lines = o3d.utility.Vector2iVector([[0,1],[0,2],[0,3],[0,4],[1,2],[2,4],[4,3],[3,1]]) 36 | elif len(cam) == 8: 37 | cam_frustrm.colors = o3d.utility.Vector3dVector([[0,0,0] for i in range(12)]) 38 | cam_frustrm.lines = o3d.utility.Vector2iVector([ 39 | [0,1],[1,3],[3,2],[2,0], 40 | [4,5],[5,7],[7,6],[6,4], 41 | [0,4],[1,5],[3,7],[2,6], 42 | ]) 43 | else: 44 | raise NotImplementedError 45 | cam_frustrm_lst.append(cam_frustrm) 46 | 47 | # Show 48 | o3d.visualization.draw_geometries([ 49 | o3d.geometry.TriangleMesh.create_coordinate_frame(size=1.0, origin=xyz_min), 50 | out_bbox, *cam_frustrm_lst]) 51 | 52 | -------------------------------------------------------------------------------- /tools/vis_volume.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import open3d as o3d 4 | 5 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 6 | parser.add_argument('path') 7 | parser.add_argument('thres', type=float) 8 | parser.add_argument('--cam') 9 | args = parser.parse_args() 10 | 11 | data = np.load(args.path) 12 | alpha = data['alpha'] 13 | rgb = data['rgb'] 14 | if rgb.shape[0] < rgb.shape[-1]: 15 | alpha = np.transpose(alpha, (1,2,0)) 16 | rgb = np.transpose(rgb, (1,2,3,0)) 17 | print('Shape', alpha.shape, rgb.shape) 18 | print('Active rate', (alpha > args.thres).mean()) 19 | print('Active nums', (alpha > args.thres).sum()) 20 | xyz_min = np.array([0,0,0]) 21 | xyz_max = np.array(alpha.shape) 22 | 23 | if args.cam: 24 | data = np.load(args.cam) 25 | xyz_min = data['xyz_min'] 26 | xyz_max = data['xyz_max'] 27 | cam_lst = data['cam_lst'] 28 | cam_frustrm_lst = [] 29 | for cam in cam_lst: 30 | cam_frustrm = o3d.geometry.LineSet() 31 | cam_frustrm.points = o3d.utility.Vector3dVector(cam) 32 | if len(cam) == 5: 33 | cam_frustrm.colors = o3d.utility.Vector3dVector([[0.5,0.5,0.5] for i in range(8)]) 34 | cam_frustrm.lines = o3d.utility.Vector2iVector([[0,1],[0,2],[0,3],[0,4],[1,2],[2,4],[4,3],[3,1]]) 35 | elif len(cam) == 8: 36 | cam_frustrm.colors = o3d.utility.Vector3dVector([[0.5,0.5,0.5] for i in range(12)]) 37 | cam_frustrm.lines = o3d.utility.Vector2iVector([ 38 | [0,1],[1,3],[3,2],[2,0], 39 | [4,5],[5,7],[7,6],[6,4], 40 | [0,4],[1,5],[3,7],[2,6], 41 | ]) 42 | cam_frustrm_lst.append(cam_frustrm) 43 | else: 44 | cam_frustrm_lst = [] 45 | 46 | 47 | aabb_01 = np.array([[0, 0, 0], 48 | [0, 0, 1], 49 | [0, 1, 1], 50 | [0, 1, 0], 51 | [1, 0, 0], 52 | [1, 0, 1], 53 | [1, 1, 1], 54 | [1, 1, 0]]) 55 | out_bbox = o3d.geometry.LineSet() 56 | out_bbox.points = o3d.utility.Vector3dVector(xyz_min + aabb_01 * (xyz_max - xyz_min)) 57 | out_bbox.colors = o3d.utility.Vector3dVector([[1,0,0] for i in range(12)]) 58 | out_bbox.lines = o3d.utility.Vector2iVector([[0,1],[1,2],[2,3],[3,0],[4,5],[5,6],[6,7],[7,4],[0,4],[1,5],[2,6],[3,7]]) 59 | 60 | xyz = np.stack((alpha > args.thres).nonzero(), -1) 61 | color = rgb[xyz[:,0], xyz[:,1], xyz[:,2]] 62 | pcd = o3d.geometry.PointCloud() 63 | pcd.points = o3d.utility.Vector3dVector(xyz / alpha.shape * (xyz_max - xyz_min) + xyz_min) 64 | pcd.colors = o3d.utility.Vector3dVector(color[:, :3]) 65 | voxel_grid = o3d.geometry.VoxelGrid.create_from_point_cloud(pcd, voxel_size=max((xyz_max - xyz_min) / alpha.shape)) 66 | 67 | def change_background_to_black(vis): 68 | opt = vis.get_render_option() 69 | opt.background_color = np.asarray([0, 0, 0]) 70 | return False 71 | 72 | o3d.visualization.draw_geometries_with_key_callbacks([ 73 | o3d.geometry.TriangleMesh.create_coordinate_frame(size=(xyz_max-xyz_min).min()*0.1, origin=xyz_min), 74 | out_bbox, voxel_grid, *cam_frustrm_lst, 75 | ], {ord("K"): change_background_to_black}) 76 | 77 | --------------------------------------------------------------------------------