├── .gitignore ├── LICENSE ├── LICENSE_inria.md ├── README.md ├── articles ├── model_creation_tutor.md ├── model_functions.md └── scannetpp_dataset.md ├── asset ├── fusing_radio.jpg ├── fusing_segformer.jpg └── teaser.jpg ├── cfg ├── deep_blending.yaml ├── dtu_mesh.yaml ├── mipnerf360.yaml ├── mipnerf360_fast_rend.yaml ├── mipnerf360_fast_train.yaml ├── scannetpp.yaml ├── synthetic_nerf.yaml └── tnt_mesh.yaml ├── cuda ├── binding.cpp ├── setup.py ├── src │ ├── adam_step.cu │ ├── adam_step.h │ ├── auxiliary.h │ ├── backward.cu │ ├── backward.h │ ├── config.h │ ├── forward.cu │ ├── forward.h │ ├── geo_params_gather.cu │ ├── geo_params_gather.h │ ├── preprocess.cu │ ├── preprocess.h │ ├── raster_state.cu │ ├── raster_state.h │ ├── sh_compute.cu │ ├── sh_compute.h │ ├── tv_compute.cu │ ├── tv_compute.h │ ├── utils.cu │ └── utils.h └── svraster_cuda │ ├── __init__.py │ ├── grid_loss_bw.py │ ├── meta.py │ ├── renderer.py │ ├── sparse_adam.py │ └── utils.py ├── eval.py ├── extract_mesh.py ├── notebooks ├── demo_segformer.ipynb ├── demo_vfm_radio.ipynb └── example.ipynb ├── render.py ├── render_by_trace_sdf.py ├── render_fly_through.py ├── requirements.txt ├── scripts ├── dtu_clean_for_eval.py ├── dtu_eval │ ├── Offical_DTU_Dataset │ └── eval.py ├── dtu_preproc.py ├── dtu_run.sh ├── dtu_stat.py ├── eval_tnt │ ├── README.md │ ├── compute_bbox_for_mesh.py │ ├── config.py │ ├── cull_mesh.py │ ├── evaluate_single_scene.py │ ├── evaluation.py │ ├── help_func.py │ ├── plot.py │ ├── registration.py │ ├── requirements.txt │ ├── run.py │ ├── trajectory_io.py │ └── util.py ├── mipnerf360_run.sh ├── mipnerf360_stat.py ├── scannetpp_preproc.py ├── scannetpp_run.sh ├── scannetpp_stat.py ├── synthetic_nerf_run.sh ├── synthetic_nerf_stat.py ├── tandt_db_run.sh ├── tandt_db_stat.py ├── tnt_run.sh └── tnt_stat.py ├── src ├── cameras.py ├── config.py ├── dataloader │ ├── data_pack.py │ ├── reader_colmap_dataset.py │ └── reader_nerf_dataset.py ├── sparse_voxel_gears │ ├── adaptive.py │ ├── constructor.py │ ├── io.py │ ├── pooling.py │ ├── properties.py │ └── renderer.py ├── sparse_voxel_model.py └── utils │ ├── activation_utils.py │ ├── bounding_utils.py │ ├── camera_utils.py │ ├── colmap_utils.py │ ├── fuser_utils.py │ ├── image_utils.py │ ├── loss_utils.py │ ├── marching_cubes_utils.py │ ├── mono_utils.py │ ├── octree_utils.py │ └── system_utils.py ├── train.py └── viz.py /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | *.so 3 | __pycache__/ 4 | *.egg-info/ 5 | *.egg 6 | eggs/ 7 | .eggs/ 8 | 9 | .ipynb_checkpoints/ 10 | 11 | log 12 | logs 13 | ckpt 14 | ckpts 15 | output 16 | outputs 17 | result 18 | results 19 | data 20 | datas 21 | 22 | 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | # NVIDIA Source Code License for SVRaster 2 | 3 | Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | 5 | ## 1. Definitions 6 | 7 | “Licensor” means any person or entity that distributes its Work. 8 | 9 | “Work” means (a) the original work of authorship made available under this license, which may include software, documentation, or other files, and (b) any additions to or derivative works thereof that are made available under this license. 10 | 11 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this license, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. 12 | 13 | Works are “made available” under this license by including in or with the Work either (a) a copyright notice referencing the applicability of this license to the Work, or (b) a copy of this license. 14 | 15 | ## 2. License Grant 16 | 17 | ### 2.1 Copyright Grant 18 | Subject to the terms and conditions of this license, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to use, reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 19 | 20 | ## 3. Limitations 21 | 22 | ### 3.1 Redistribution 23 | You may reproduce or distribute the Work only if (a) you do so under this license, (b) you include a complete copy of this license with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 24 | 25 | ### 3.2 Derivative Works 26 | You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this license (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 27 | 28 | ### 3.3 Use Limitation 29 | The Work includes files from [GitHub - graphdeco-inria/gaussian-splatting: Original reference implementation of "3D Gaussian Splatting for Real-Time Radiance Field Rendering"](https://github.com/graphdeco-inria/gaussian-splatting) and [GitHub - graphdeco-inria/diff-gaussian-rasterization at 9c5c2028f6fbee2be239bc4c9421ff894fe4fbe0](https://github.com/graphdeco-inria/diff-gaussian-rasterization/tree/9c5c2028f6fbee2be239bc4c9421ff894fe4fbe0), which are subject to the terms of the Gaussian-Splatting License (a copy the license is available at [diff-gaussian-rasterization/LICENSE.md at 9c5c2028f6fbee2be239bc4c9421ff894fe4fbe0 · graphdeco-inria/diff-gaussian-rasterization · GitHub](https://github.com/graphdeco-inria/diff-gaussian-rasterization/blob/9c5c2028f6fbee2be239bc4c9421ff894fe4fbe0/LICENSE.md)). The Work and any derivative works thereof only may be used or intended for use non-commercially. As used herein, “non-commercially” means for research or evaluation purposes only. 30 | 31 | ### 3.4 Patent Claims 32 | If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this license from such Licensor (including the grant in Section 2.1) will terminate immediately. 33 | 34 | ### 3.5 Trademarks 35 | This license does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this license. 36 | 37 | ### 3.6 Termination 38 | If you violate any term of this license, then your rights under this license (including the grant in Section 2.1) will terminate immediately. 39 | 40 | ## 4. Disclaimer of Warranty 41 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 42 | 43 | ## 5. Limitation of Liability 44 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 45 | -------------------------------------------------------------------------------- /LICENSE_inria.md: -------------------------------------------------------------------------------- 1 | Gaussian-Splatting License 2 | =========================== 3 | 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**. 5 | The *Software* is in the process of being registered with the Agence pour la Protection des 6 | Programmes (APP). 7 | 8 | The *Software* is still being developed by the *Licensor*. 9 | 10 | *Licensor*'s goal is to allow the research community to use, test and evaluate 11 | the *Software*. 12 | 13 | ## 1. Definitions 14 | 15 | *Licensee* means any person or entity that uses the *Software* and distributes 16 | its *Work*. 17 | 18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII 19 | 20 | *Software* means the original work of authorship made available under this 21 | License ie gaussian-splatting. 22 | 23 | *Work* means the *Software* and any additions to or derivative works of the 24 | *Software* that are made available under this License. 25 | 26 | 27 | ## 2. Purpose 28 | This license is intended to define the rights granted to the *Licensee* by 29 | Licensors under the *Software*. 30 | 31 | ## 3. Rights granted 32 | 33 | For the above reasons Licensors have decided to distribute the *Software*. 34 | Licensors grant non-exclusive rights to use the *Software* for research purposes 35 | to research users (both academic and industrial), free of charge, without right 36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research 37 | and/or evaluation purposes only. 38 | 39 | Subject to the terms and conditions of this License, you are granted a 40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of, 41 | publicly display, publicly perform and distribute its *Work* and any resulting 42 | derivative works in any form. 43 | 44 | ## 4. Limitations 45 | 46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do 47 | so under this License, (b) you include a complete copy of this License with 48 | your distribution, and (c) you retain without modification any copyright, 49 | patent, trademark, or attribution notices that are present in the *Work*. 50 | 51 | **4.2 Derivative Works.** You may specify that additional or different terms apply 52 | to the use, reproduction, and distribution of your derivative works of the *Work* 53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in 54 | Section 2 applies to your derivative works, and (b) you identify the specific 55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms, 56 | this License (including the redistribution requirements in Section 3.1) will 57 | continue to apply to the *Work* itself. 58 | 59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research 60 | users explicitly acknowledge having received from Licensors all information 61 | allowing to appreciate the adequacy between of the *Software* and their needs and 62 | to undertake all necessary precautions for its execution and use. 63 | 64 | **4.4** The *Software* is provided both as a compiled library file and as source 65 | code. In case of using the *Software* for a publication or other results obtained 66 | through the use of the *Software*, users are strongly encouraged to cite the 67 | corresponding publications as explained in the documentation of the *Software*. 68 | 69 | ## 5. Disclaimer 70 | 71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES 72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY 73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL 74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES 75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL 76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR 77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE 78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) 81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR 83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*. 84 | 85 | ## 6. Files subject to permissive licenses 86 | The contents of the file ```utils/loss_utils.py``` are based on publicly available code authored by Evan Su, which falls under the permissive MIT license. 87 | 88 | Title: pytorch-ssim\ 89 | Project code: https://github.com/Po-Hsun-Su/pytorch-ssim\ 90 | Copyright Evan Su, 2017\ 91 | License: https://github.com/Po-Hsun-Su/pytorch-ssim/blob/master/LICENSE.txt (MIT) -------------------------------------------------------------------------------- /articles/model_functions.md: -------------------------------------------------------------------------------- 1 | # Model functions 2 | 3 | We cover the functions to create a model instance in [model_creation_tutor.md](./model_creation_tutor.md). We describe the other functions in the following. 4 | 5 | ## Properties 6 | - `n_samp_per_vox`: Number of samples per voxel when rendering. 7 | - `ss`: Super-sampling scale. 8 | - We render higher-resolution image (`[H * ss, W * ss]`) and apply anti-aliasing downsampling. 9 | - `white_background`: Indicate if the background is white. 10 | - `black_background`: Indicate if the background is black. 11 | - The background will be the averaged color if neither black or white is set. 12 | - `max_sh_degree`: Maximum SH degree. Support `0~3` degrees. 13 | - This number should be fixed after voxels parameters are allocated. 14 | 15 | ## Derived properties 16 | - `num_voxels`: Number of voxels. 17 | - `num_grid_pts`: Number of grid points. 18 | - Recap that a voxel has 8 corner grid points. A grid point can be shared by adjacent voxels. This is the number of unique grid points. 19 | - `scene_min`: Minimum coordinate of entire scene. 20 | - `scene_max`: Maximum coordinate of entire scene. 21 | - `inside_min`: Minimum coordinate of the main foreground region. 22 | - It's valid when the model is created from `model_init` with `outside_level > 0` which preserves Octree level outside the main foreground bounding box. 23 | - `inside_max`: Maximum coordinate of the main foreground region. 24 | - `inside_mask`: A mask indicating if a voxel is in `inside_min` and `inside_max`. 25 | - `subdivision_priority`: The model automatically tracks and accumulates subdivision priority during rendering backward pass. 26 | - Larger value means higher priority. 27 | - Reset by `reset_subdivision_priority()`. 28 | 29 | The following is the properties that lazily computed at the first time you access them. It automatically recompute when it detect the voxels allocation is updated (e.g., after pruning or subdivision). 30 | - `vox_center`: Voxel center position in the world space. 31 | - `vox_size`: Voxel size. 32 | - `vox_key`: Index to the unique grid points. It's in shape of `[num_voxels, 8]`. 33 | - `grid_pts_xyz`: The world-space position of the unique grid points. 34 | 35 | ## Parameters 36 | - `_sh0`: Base color as zero-degree SH component. The shape is `[num_voxels, 3]`. 37 | - `_shs`: Higher-degree SH component for view-dependent color. The shape is `[num_voxels, (max_sh_degree+1)**2 - 1, 3]`. 38 | - It's the dominant factor of the number of total parameters. 39 | - `_geo_grid_pts`: The density of grid points. The shape is `[num_grid_pts, 1]`. 40 | - When rendering, it's gathered into `[num_voxels, 8]` as voxel trilinear density field for the CUDA to render. 41 | 42 | ## Core functions 43 | - `render_pkg = render(camera, track_max_w=False, output_depth=False, output_normal=False, output_T=False)` 44 | - Rendering a view. 45 | - `track_max_w` whether to track the maximum blending weigth of each voxel. Access by `render_pkg['max_w']`. 46 | - `output_depth` whether to render depth. Access by `render_pkg['depth']` or `render_pkg['raw_depth']`. 47 | - `output_normal` whether to render normal. Access by `render_pkg['normal']` or `render_pkg['raw_normal']`. 48 | - `output_T` whether to output transmittance. Access by `render_pkg['T']` or `render_pkg['raw_T']`. 49 | - The outputs with `raw_` prefix are the results without anti-aliasing downsampling. 50 | - The depth and normal is not normalized by alpha. 51 | - There output depth is in shape `[3, H, W]` for mean depth, distortion cache, median depth. Only the mean depth support backpropagation. 52 | - `pruning(mask)` 53 | - Remove voxels indicating by the given mask. 54 | - `subdividing(mask)` 55 | - Subdivde voxels into their eight octans indicating by the given mask. The source parent voxels are removed after subdivision. 56 | 57 | ## Useful functions 58 | - `compute_training_stat(camera_lst)` 59 | - Compute the per-voxel statistic from the given cameras, including `max_w` for maximum blending weight, `min_samp_interval` for the inverse of maximum sampling rates, and `view_cnt` for visibile camera count. 60 | - `reset_sh_from_cameras(camera_lst)` 61 | - Reset shs to zero. 62 | - Reset sh0 to yield the colors averaged from the given images. 63 | - `apply_tv_on_density_field(lambda_tv_density)` 64 | - Add the gradient of total variation loss to the `_geo_grid_pts` parameter. 65 | - `save(path, quantize=False)` 66 | - Save the model to the given path. You can optionally apply 8-bit quantization to the parameters which save 70% disk space with minor quality difference. 67 | - `load(path)` 68 | - Load checkpoint from the given path. 69 | - `load_iteration(model_path, iteration=-1)` 70 | - Load checkpoint from a model output path with the given iteration. The default load the latest iteration. -------------------------------------------------------------------------------- /articles/scannetpp_dataset.md: -------------------------------------------------------------------------------- 1 | # SVR for ScanNet++ dataset 2 | 3 | We now support scannet++ dataset. The [benchmark results](https://kaldir.vc.in.tum.de/scannetpp/benchmark/nvs) on 3rd-party evaluated hidden set is (at the time of 8 Mar, 2025): 4 | scannet++ benchmark 5 | 6 | https://github.com/user-attachments/assets/85f55a12-b4bb-4581-924e-925a38f6a748 7 | 8 | More results information (averaged on 50 scenes): 9 | - Per-scene optimization time: `12 mins`. 10 | - FPS: `197` at `1752 x 1168` image resolution. As we use `ss=1.5`, the actual rendering resolution is `2628 x 1752`. 11 | - Voxel size distribution: 12 | | <3mm | 3mm-5mm | 5mm-1cm | 1cm-2cm | 2cm-3cm | >3cm | 13 | | :-: | :-: | :-: | :-: | :-: | :-: | 14 | | 13.61% | 19.25% | 32.43% | 23.31% | 6.66% | 4.73% | 15 | - Sparse points from COLMAP is not used in the submitted version. We later find sparse points loss helpful for geometry and slightly improve quality on the public set. Activate it by `--lambda_sparse_depth 1e-2` when running `train.py`. 16 | 17 | ### Data preparation 18 | 1. Download the source data following the procedure in [scannet++ official site](https://kaldir.vc.in.tum.de/scannetpp/). 19 | 2. Run `python scripts/scannetpp_preproc.py --indir $PATH_TO_SOURCE_DATA --outdir data/scannetpp_nvs --ids $SEQUENCE_OF_SCENE_ID`. 20 | 21 | ### Optimization configuration 22 | The config file is provided in `cfg/scannetpp.yaml`. We detail the setting as follow. 23 | 24 | **Scene bound heuristic.** 25 | As this is a fully indoor dataset, we set `outside_level` to zero and assume the entire scene is inside the main scene bound. The world center is set to the centroid of training cameras and the scene radius is set to two times the maximum distance from world center to the cameras. 26 | 27 | **SH reset trick.** 28 | We find the view-dependent color from SH is not generalized well so we implement a trick by resetting the sh component near the end of optimization. This trick improve quality on the view "extrapolation" task like ScanNet++ dataset, while it slightly reduces quality on view "interpolation" task like mipnerf360. 29 | 30 | **Density ascending regularizer.** 31 | It encourages the derived normal from the density field to point toward the camera side. It improves geometry qualitatively and slightly improve quantitative result. 32 | 33 | **Sparse point depth loss.** 34 | It's not used in the submitted version. On the public set, it improves geometry qualitatively and novel-view results quantitatively. 35 | 36 | scannet++ benchmark 37 | -------------------------------------------------------------------------------- /asset/fusing_radio.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/svraster/1413d346f6c2d9e548e41956c9ea3a5dc22beb9e/asset/fusing_radio.jpg -------------------------------------------------------------------------------- /asset/fusing_segformer.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/svraster/1413d346f6c2d9e548e41956c9ea3a5dc22beb9e/asset/fusing_segformer.jpg -------------------------------------------------------------------------------- /asset/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/svraster/1413d346f6c2d9e548e41956c9ea3a5dc22beb9e/asset/teaser.jpg -------------------------------------------------------------------------------- /cfg/deep_blending.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # See `src/config.py` for the base setup. 10 | data: 11 | eval: True 12 | 13 | bounding: 14 | bound_mode: camera_max 15 | bound_scale: 1.5 16 | outside_level: 0 # No outside region is observed. 17 | 18 | regularizer: 19 | lambda_T_inside: 0.01 20 | -------------------------------------------------------------------------------- /cfg/dtu_mesh.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # See `src/config.py` for the base setup. 10 | data: 11 | eval: False 12 | skip_blend_alpha: True 13 | res_downscale: 2.0 14 | 15 | bounding: 16 | outside_level: 1 17 | 18 | model: 19 | n_samp_per_vox: 3 20 | black_background: True 21 | 22 | optimizer: 23 | geo_lr: 0.05 24 | 25 | regularizer: 26 | lambda_T_concen: 0.01 27 | lambda_R_concen: 0.1 28 | lambda_normal_dmean: 0.001 29 | lambda_normal_dmed: 0.001 30 | 31 | init: 32 | init_out_ratio: 0.01 # Use very few voxels for the mostly black background 33 | -------------------------------------------------------------------------------- /cfg/mipnerf360.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # See `src/config.py` for the base setup. 10 | data: 11 | eval: True 12 | 13 | regularizer: 14 | lambda_T_inside: 0.01 15 | -------------------------------------------------------------------------------- /cfg/mipnerf360_fast_rend.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # See `src/config.py` for the base setup. 10 | data: 11 | eval: True 12 | 13 | model: 14 | ss: 1.1 15 | 16 | regularizer: 17 | lambda_T_inside: 0.01 18 | 19 | procedure: 20 | prune_thres_final: 0.15 21 | -------------------------------------------------------------------------------- /cfg/mipnerf360_fast_train.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # See `src/config.py` for the base setup. 10 | data: 11 | eval: True 12 | 13 | regularizer: 14 | lambda_T_inside: 0.01 15 | 16 | procedure: 17 | sche_mult: 0.3 18 | -------------------------------------------------------------------------------- /cfg/scannetpp.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # See `src/config.py` for the base setup. 10 | data: 11 | eval: True 12 | res_downscale: 1.0 # Use source image resolution 13 | 14 | bounding: 15 | bound_mode: camera_max 16 | bound_scale: 2.0 17 | outside_level: 0 # No background region 18 | 19 | regularizer: 20 | lambda_T_inside: 0.01 21 | 22 | lambda_dist: 0.01 23 | dist_from: 3000 24 | 25 | lambda_ascending: 0.01 26 | 27 | procedure: 28 | reset_sh_ckpt: [15000] 29 | -------------------------------------------------------------------------------- /cfg/synthetic_nerf.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # See `src/config.py` for the base setup. 10 | data: 11 | eval: True 12 | 13 | bounding: 14 | outside_level: 0 15 | 16 | model: 17 | white_background: True 18 | 19 | regularizer: 20 | lambda_T_concen: 0.1 21 | 22 | init: 23 | sh_degree_init: 0 24 | 25 | procedure: 26 | subdivide_all_until: 1000 # Subdivide all at the first time 27 | subdivide_samp_thres: 0.5 # Enable smaller voxels 28 | -------------------------------------------------------------------------------- /cfg/tnt_mesh.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | # See `src/config.py` for the base setup. 10 | model: 11 | n_samp_per_vox: 3 12 | 13 | data: 14 | eval: False 15 | res_downscale: 2.0 16 | 17 | regularizer: 18 | lambda_T_inside: 0.01 19 | lambda_R_concen: 0.1 20 | lambda_normal_dmean: 0.001 21 | lambda_normal_dmed: 0.001 22 | -------------------------------------------------------------------------------- /cuda/binding.cpp: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #include 12 | #include "src/config.h" 13 | #include "src/raster_state.h" 14 | #include "src/preprocess.h" 15 | #include "src/forward.h" 16 | #include "src/backward.h" 17 | #include "src/sh_compute.h" 18 | #include "src/tv_compute.h" 19 | #include "src/geo_params_gather.h" 20 | #include "src/utils.h" 21 | #include "src/adam_step.h" 22 | 23 | 24 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 25 | m.def("rasterize_preprocess", &PREPROCESS::rasterize_preprocess); 26 | m.def("rasterize_voxels", &FORWARD::rasterize_voxels); 27 | m.def("rasterize_voxels_backward", &BACKWARD::rasterize_voxels_backward); 28 | m.def("filter_geomState", &RASTER_STATE::filter_geomState); 29 | m.def("unpack_ImageState", &RASTER_STATE::unpack_ImageState); 30 | 31 | m.def("gather_triinterp_geo_params", &GEO_PARAMS_GATHER::gather_triinterp_geo_params); 32 | m.def("gather_triinterp_geo_params_bw", &GEO_PARAMS_GATHER::gather_triinterp_geo_params_bw); 33 | m.def("gather_triinterp_feat_params", &GEO_PARAMS_GATHER::gather_triinterp_feat_params); 34 | m.def("gather_triinterp_feat_params_bw", &GEO_PARAMS_GATHER::gather_triinterp_feat_params_bw); 35 | 36 | m.def("sh_compute", &SH_COMPUTE::sh_compute); 37 | m.def("sh_compute_bw", &SH_COMPUTE::sh_compute_bw); 38 | 39 | m.def("total_variation_bw", &TV_COMPUTE::total_variation_bw); 40 | 41 | m.def("is_in_cone", &UTILS::is_in_cone); 42 | m.def("compute_rd", &UTILS::compute_rd); 43 | m.def("depth2pts", &UTILS::depth2pts); 44 | m.def("voxel_order_rank", &UTILS::voxel_order_rank); 45 | m.def("ijk_2_octpath", &UTILS::ijk_2_octpath); 46 | m.def("octpath_2_ijk", &UTILS::octpath_2_ijk); 47 | 48 | m.def("unbiased_adam_step", &ADAM_STEP::unbiased_adam_step); 49 | m.def("biased_adam_step", &ADAM_STEP::biased_adam_step); 50 | 51 | // Some readonly constant 52 | m.attr("MAX_NUM_LEVELS") = pybind11::int_(MAX_NUM_LEVELS); 53 | m.attr("STEP_SZ_SCALE") = pybind11::float_(STEP_SZ_SCALE); 54 | m.attr("MAX_N_SAMP") = pybind11::int_(MAX_N_SAMP); 55 | } 56 | -------------------------------------------------------------------------------- /cuda/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | from setuptools import setup 11 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 12 | 13 | setup( 14 | name="svraster_cuda", 15 | packages=["svraster_cuda"], 16 | ext_modules=[ 17 | CUDAExtension( 18 | name="svraster_cuda._C", 19 | sources=[ 20 | "src/raster_state.cu", 21 | "src/preprocess.cu", 22 | "src/forward.cu", 23 | "src/backward.cu", 24 | "src/geo_params_gather.cu", 25 | "src/sh_compute.cu", 26 | "src/tv_compute.cu", 27 | "src/utils.cu", 28 | "src/adam_step.cu", 29 | "binding.cpp" 30 | ], 31 | # extra_compile_args={"nvcc": ["--use_fast_math"]}, 32 | ) 33 | ], 34 | cmdclass={ 35 | "build_ext": BuildExtension 36 | } 37 | ) 38 | -------------------------------------------------------------------------------- /cuda/src/adam_step.cu: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #include "adam_step.h" 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | namespace cg = cooperative_groups; 18 | 19 | namespace ADAM_STEP { 20 | 21 | __forceinline__ __device__ float lerp(float v0, float v1, float t) 22 | { 23 | // Compute (1-t) * v0 + t * v1 24 | return fmaf(t, v1, fmaf(-t, v0, v0)); 25 | } 26 | 27 | 28 | template 29 | __global__ void unbiased_adam_step_cuda_kernel( 30 | const int numel, 31 | float* __restrict__ param, 32 | const float* __restrict__ grad, 33 | float* __restrict__ exp_avg, 34 | float* __restrict__ exp_avg_sq, 35 | const float step_size, const float beta1, const float beta2, 36 | const float rsqrt_bias_correction2, const float eps) 37 | { 38 | const int idx = cg::this_grid().thread_rank(); 39 | if (idx >= numel) 40 | return; 41 | 42 | // Check gradient 43 | const float grad_val = grad[idx]; 44 | if (sparse && grad_val == 0.0f) 45 | return; 46 | 47 | // Load parameters 48 | float exp_avg_val = exp_avg[idx]; 49 | float exp_avg_sq_val = exp_avg_sq[idx]; 50 | 51 | // Adam step 52 | // beta1 * exp_avg_val + (1.0f - beta1) * grad_val 53 | // beta2 * exp_avg_sq_val + (1.0f - beta2) * grad_val * grad_val 54 | exp_avg_val = lerp(grad_val, exp_avg_val, beta1); 55 | exp_avg_sq_val = lerp(grad_val * grad_val, exp_avg_sq_val, beta2); 56 | 57 | const float denom = fmaf(sqrtf(exp_avg_sq_val), rsqrt_bias_correction2, eps); 58 | const float param_step = step_size * (exp_avg_val / denom); 59 | 60 | // Save back the new results 61 | param[idx] -= param_step; 62 | exp_avg[idx] = exp_avg_val; 63 | exp_avg_sq[idx] = exp_avg_sq_val; 64 | } 65 | 66 | 67 | template 68 | __global__ void biased_adam_step_cuda_kernel( 69 | const int numel, 70 | float* __restrict__ param, 71 | const float* __restrict__ grad, 72 | float* __restrict__ exp_avg, 73 | float* __restrict__ exp_avg_sq, 74 | const float lr, const float beta1, const float beta2, const float eps) 75 | { 76 | const int idx = cg::this_grid().thread_rank(); 77 | if (idx >= numel) 78 | return; 79 | 80 | // Check gradient 81 | const float grad_val = grad[idx]; 82 | if (sparse && grad_val == 0.0f) 83 | return; 84 | 85 | // Load parameters 86 | float exp_avg_val = exp_avg[idx]; 87 | float exp_avg_sq_val = exp_avg_sq[idx]; 88 | 89 | // Adam step 90 | // beta1 * exp_avg_val + (1.0f - beta1) * grad_val 91 | // beta2 * exp_avg_sq_val + (1.0f - beta2) * grad_val * grad_val 92 | exp_avg_val = lerp(grad_val, exp_avg_val, beta1); 93 | exp_avg_sq_val = lerp(grad_val * grad_val, exp_avg_sq_val, beta2); 94 | 95 | const float denom = sqrtf(exp_avg_sq_val) + eps; 96 | const float param_step = lr * (exp_avg_val / denom); 97 | 98 | // Save back the new results 99 | param[idx] -= param_step; 100 | exp_avg[idx] = exp_avg_val; 101 | exp_avg_sq[idx] = exp_avg_sq_val; 102 | } 103 | 104 | 105 | 106 | void unbiased_adam_step( 107 | bool sparse, 108 | torch::Tensor& param, 109 | const torch::Tensor& grad, 110 | torch::Tensor& exp_avg, 111 | torch::Tensor& exp_avg_sq, 112 | const double step, 113 | const double lr, const double beta1, const double beta2, const float eps) 114 | { 115 | const int numel = param.numel(); 116 | 117 | const double bias_correction1 = 1.0 - pow(beta1, step); 118 | const double bias_correction2 = 1.0 - pow(beta2, step); 119 | 120 | const double step_size = lr / bias_correction1; 121 | 122 | const double rsqrt_bias_correction2 = rsqrt(bias_correction2); 123 | 124 | auto kernel_func = sparse ? unbiased_adam_step_cuda_kernel : 125 | unbiased_adam_step_cuda_kernel; 126 | 127 | kernel_func <<<(numel + 255) / 256, 256>>>( 128 | numel, 129 | param.contiguous().data_ptr(), 130 | grad.contiguous().data_ptr(), 131 | exp_avg.contiguous().data_ptr(), 132 | exp_avg_sq.contiguous().data_ptr(), 133 | step_size, beta1, beta2, rsqrt_bias_correction2, eps 134 | ); 135 | } 136 | 137 | void biased_adam_step( 138 | bool sparse, 139 | torch::Tensor& param, 140 | const torch::Tensor& grad, 141 | torch::Tensor& exp_avg, 142 | torch::Tensor& exp_avg_sq, 143 | const float lr, const float beta1, const float beta2, const float eps) 144 | { 145 | const int numel = param.numel(); 146 | 147 | auto kernel_func = sparse ? biased_adam_step_cuda_kernel : 148 | biased_adam_step_cuda_kernel; 149 | 150 | kernel_func <<<(numel + 255) / 256, 256>>>( 151 | numel, 152 | param.contiguous().data_ptr(), 153 | grad.contiguous().data_ptr(), 154 | exp_avg.contiguous().data_ptr(), 155 | exp_avg_sq.contiguous().data_ptr(), 156 | lr, beta1, beta2, eps 157 | ); 158 | } 159 | 160 | } 161 | -------------------------------------------------------------------------------- /cuda/src/adam_step.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #ifndef ADAM_STEP_H_INCLUDED 12 | #define ADAM_STEP_H_INCLUDED 13 | 14 | #include 15 | 16 | namespace ADAM_STEP { 17 | 18 | // Python interface to run adam optimization step. 19 | void unbiased_adam_step( 20 | bool sparse, 21 | torch::Tensor& param, 22 | const torch::Tensor& grad, 23 | torch::Tensor& exp_avg, 24 | torch::Tensor& exp_avg_sq, 25 | const double step, 26 | const double lr, const double beta1, const double beta2, const float eps); 27 | 28 | void biased_adam_step( 29 | bool sparse, 30 | torch::Tensor& param, 31 | const torch::Tensor& grad, 32 | torch::Tensor& exp_avg, 33 | torch::Tensor& exp_avg_sq, 34 | const float lr, const float beta1, const float beta2, const float eps); 35 | 36 | } 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /cuda/src/backward.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #ifndef RASTERIZER_BACKWARD_H_INCLUDED 12 | #define RASTERIZER_BACKWARD_H_INCLUDED 13 | 14 | #include 15 | 16 | namespace BACKWARD 17 | { 18 | 19 | // Interface for python to run backward pass of voxel rasterization. 20 | std::tuple 21 | rasterize_voxels_backward( 22 | const int R, 23 | const int n_samp_per_vox, 24 | const int image_width, const int image_height, 25 | const float tan_fovx, const float tan_fovy, 26 | const float cx, const float cy, 27 | const torch::Tensor& w2c_matrix, 28 | const torch::Tensor& c2w_matrix, 29 | const float bg_color, 30 | 31 | const torch::Tensor& octree_paths, 32 | const torch::Tensor& vox_centers, 33 | const torch::Tensor& vox_lengths, 34 | const torch::Tensor& geos, 35 | const torch::Tensor& rgbs, 36 | 37 | const torch::Tensor& geomBuffer, 38 | const torch::Tensor& binningBuffer, 39 | const torch::Tensor& imageBuffer, 40 | const torch::Tensor& out_T, 41 | 42 | const torch::Tensor& dL_dout_color, 43 | const torch::Tensor& dL_dout_depth, 44 | const torch::Tensor& dL_dout_normal, 45 | const torch::Tensor& dL_dout_T, 46 | 47 | const float lambda_R_concen, 48 | const torch::Tensor& gt_color, 49 | const float lambda_ascending, 50 | const float lambda_dist, 51 | const bool need_depth, 52 | const bool need_normal, 53 | const torch::Tensor& out_D, 54 | const torch::Tensor& out_N, 55 | 56 | const bool debug); 57 | 58 | } 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /cuda/src/config.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #ifndef RASTERIZER_CONFIG_H_INCLUDED 12 | #define RASTERIZER_CONFIG_H_INCLUDED 13 | 14 | #define BLOCK_X 16 15 | #define BLOCK_Y 16 16 | #define MAX_NUM_LEVELS 16 17 | #define MAX_ALPHA 0.99999f 18 | #define MIN_ALPHA 0.00001f 19 | #define EARLY_STOP_T 0.0001f 20 | 21 | #define STEP_SZ_SCALE 100.f 22 | 23 | #define MAX_N_SAMP 3 24 | 25 | // Below are the derived term from above 26 | #define BLOCK_SIZE (BLOCK_X * BLOCK_Y) 27 | #define NUM_BIT_ORDER_RANK (3 * MAX_NUM_LEVELS) 28 | #define NUM_BIT_TILE_ID (64 - NUM_BIT_ORDER_RANK) 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /cuda/src/forward.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #ifndef RASTERIZER_FORWARD_H_INCLUDED 12 | #define RASTERIZER_FORWARD_H_INCLUDED 13 | 14 | #include 15 | 16 | namespace FORWARD { 17 | 18 | // Interface for python to run forward rasterization. 19 | std::tuple 20 | rasterize_voxels( 21 | const int n_samp_per_vox, 22 | const int image_width, const int image_height, 23 | const float tan_fovx, const float tan_fovy, 24 | const float cx, const float cy, 25 | const torch::Tensor& w2c_matrix, 26 | const torch::Tensor& c2w_matrix, 27 | const float bg_color, 28 | const bool need_depth, 29 | const bool need_distortion, 30 | const bool need_normal, 31 | const bool track_max_w, 32 | 33 | const torch::Tensor& octree_paths, 34 | const torch::Tensor& vox_centers, 35 | const torch::Tensor& vox_lengths, 36 | const torch::Tensor& geos, 37 | const torch::Tensor& rgbs, 38 | 39 | const torch::Tensor& geomBuffer, 40 | 41 | const bool debug); 42 | 43 | } 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /cuda/src/geo_params_gather.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #ifndef GEO_PARAMS_GATHER_H_INCLUDED 12 | #define GEO_PARAMS_GATHER_H_INCLUDED 13 | 14 | #include 15 | 16 | namespace GEO_PARAMS_GATHER { 17 | 18 | // Python interface for gather grid points value into each voxel. 19 | torch::Tensor gather_triinterp_geo_params( 20 | const torch::Tensor& vox_key, 21 | const torch::Tensor& care_idx, 22 | const torch::Tensor& grid_pts); 23 | 24 | torch::Tensor gather_triinterp_geo_params_bw( 25 | const torch::Tensor& vox_key, 26 | const torch::Tensor& care_idx, 27 | const int num_grid_pts, 28 | const torch::Tensor& dL_dgeo_params); 29 | 30 | torch::Tensor gather_triinterp_feat_params( 31 | const torch::Tensor& vox_key, 32 | const torch::Tensor& care_idx, 33 | const torch::Tensor& grid_pts); 34 | 35 | torch::Tensor gather_triinterp_feat_params_bw( 36 | const torch::Tensor& vox_key, 37 | const torch::Tensor& care_idx, 38 | const int num_grid_pts, 39 | const torch::Tensor& dL_dfeat_params); 40 | 41 | } 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /cuda/src/preprocess.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #ifndef RASTERIZER_PREPROCESS_H_INCLUDED 12 | #define RASTERIZER_PREPROCESS_H_INCLUDED 13 | 14 | #include 15 | 16 | namespace PREPROCESS { 17 | 18 | // Interface for python to find the voxel to render and compute some init values. 19 | std::tuple 20 | rasterize_preprocess( 21 | const int image_width, const int image_height, 22 | const float tan_fovx, const float tan_fovy, 23 | const float cx, const float cy, 24 | const torch::Tensor& w2c_matrix, 25 | const torch::Tensor& c2w_matrix, 26 | const float near, 27 | 28 | const torch::Tensor& octree_paths, 29 | const torch::Tensor& vox_centers, 30 | const torch::Tensor& vox_lengths, 31 | 32 | const bool debug); 33 | 34 | } 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /cuda/src/raster_state.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2023, Inria 3 | * GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | * All rights reserved. 5 | * 6 | * This software is free for non-commercial, research and evaluation use 7 | * under the terms of the LICENSE.md file. 8 | * 9 | * For inquiries contact george.drettakis@inria.fr 10 | */ 11 | 12 | /************************************************************************* 13 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 14 | 15 | NVIDIA CORPORATION and its licensors retain all intellectual property 16 | and proprietary rights in and to this software, related documentation 17 | and any modifications thereto. Any use, reproduction, disclosure or 18 | distribution of this software and related documentation without an express 19 | license agreement from NVIDIA CORPORATION is strictly prohibited. 20 | *************************************************************************/ 21 | 22 | #ifndef RASTER_STATE_H_INCLUDED 23 | #define RASTER_STATE_H_INCLUDED 24 | 25 | #include 26 | #include 27 | 28 | namespace RASTER_STATE { 29 | 30 | std::function resizeFunctional(torch::Tensor& t); 31 | 32 | template 33 | static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment); 34 | 35 | template 36 | size_t required(size_t P); 37 | 38 | template 39 | size_t required(size_t P, size_t Q); 40 | 41 | struct GeometryState 42 | { 43 | // Voxel duplication related variables. 44 | // A voxel is duplicated by the # of touched tile times the # of camera quadrants. 45 | // We need to calculate the prefix sum (scan) for organizing the BinningState. 46 | uint32_t* n_duplicates; // <==> tiles_touched 47 | uint32_t* n_duplicates_scan; // <==> point_offsets; 48 | size_t scan_size; 49 | char* scanning_temp_space; 50 | uint2* bboxes; // The bbox region enclosing a projected voxel. 51 | 52 | // Voxel sorting related variables. 53 | // uint64_t* order_ranks; // <=> float* depths; // The ranking of the rendering order. 54 | uint32_t* cam_quadrant_bitsets; // The camera quadrants a voxel can reach. 55 | 56 | static GeometryState fromChunk(char*& chunk, size_t P); 57 | }; 58 | 59 | struct ImageState 60 | { 61 | uint2* ranges; 62 | uint32_t* tile_last; 63 | uint32_t* n_contrib; 64 | 65 | static ImageState fromChunk(char*& chunk, size_t N, size_t n_tiles); 66 | }; 67 | 68 | struct BinningState 69 | { 70 | size_t sorting_size; 71 | uint64_t* vox_list_keys_unsorted; 72 | uint64_t* vox_list_keys; 73 | uint32_t* vox_list_unsorted; 74 | uint32_t* vox_list; 75 | char* list_sorting_space; 76 | 77 | static BinningState fromChunk(char*& chunk, size_t P); 78 | }; 79 | 80 | std::tuple 81 | unpack_ImageState( 82 | const int image_width, const int image_height, 83 | const torch::Tensor& imageBuffer); 84 | 85 | torch::Tensor filter_geomState( 86 | const int ori_P, 87 | const torch::Tensor& indices, 88 | const torch::Tensor& geomState); 89 | 90 | } 91 | 92 | #endif -------------------------------------------------------------------------------- /cuda/src/sh_compute.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #ifndef SH_COMPUTE_H_INCLUDED 12 | #define SH_COMPUTE_H_INCLUDED 13 | 14 | #include 15 | 16 | namespace SH_COMPUTE { 17 | 18 | // Python interface for spherical harmonic computation. 19 | torch::Tensor sh_compute( 20 | const int D, 21 | const torch::Tensor& idx, 22 | const torch::Tensor& vox_centers, 23 | const torch::Tensor& cam_pos, 24 | const torch::Tensor& sh0, 25 | const torch::Tensor& shs); 26 | 27 | std::tuple sh_compute_bw( 28 | const int D, const int M, 29 | const torch::Tensor& idx, 30 | const torch::Tensor& vox_centers, 31 | const torch::Tensor& cam_pos, 32 | const torch::Tensor& rgbs, 33 | const torch::Tensor& dL_drgbs); 34 | 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /cuda/src/tv_compute.cu: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #include "tv_compute.h" 12 | #include "auxiliary.h" 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | namespace cg = cooperative_groups; 19 | 20 | namespace TV_COMPUTE { 21 | 22 | template 23 | __global__ void total_variation_bw_cuda( 24 | const int N, const int C, const int NC, 25 | const float* __restrict__ grid_pts, 26 | const int64_t* __restrict__ vox_key, 27 | const float weight, 28 | const float* __restrict__ vox_size_inv, 29 | float* __restrict__ grid_pts_grad) 30 | { 31 | auto idx = cg::this_grid().thread_rank(); 32 | if (idx >= NC) 33 | return; 34 | const int iN = idx / C; 35 | const int iC = idx % C; 36 | 37 | // Load from global memory. 38 | int i_book[8]; 39 | #pragma unroll 40 | for (int i=0, k=iN*8; i<8; ++i, ++k) 41 | i_book[i] = vox_key[k]; 42 | 43 | if (tv_sparse) 44 | { 45 | bool valid = false; 46 | for (int i=0; i<8; ++i) 47 | valid |= (grid_pts_grad[i_book[i] * C + iC] != 0.f); 48 | if (!valid) 49 | return; 50 | } 51 | 52 | float vlst[8]; 53 | #pragma unroll 54 | for (int i=0; i<8; ++i) 55 | vlst[i] = grid_pts[i_book[i] * C + iC]; 56 | 57 | float w = weight; 58 | if (!no_tv_s) 59 | w *= 0.01f * vox_size_inv[iN]; 60 | 61 | // Compute gradient wrt total variation loss 62 | int glst[8] = {0}; 63 | #pragma unroll 64 | for (int i=0; i<8; ++i) 65 | { 66 | glst[i] += (vlst[i] > vlst[i^0b001]) * 2 - 1; 67 | glst[i] += (vlst[i] > vlst[i^0b010]) * 2 - 1; 68 | glst[i] += (vlst[i] > vlst[i^0b100]) * 2 - 1; 69 | } 70 | 71 | float dtv_dgrid_pts[8]; 72 | #pragma unroll 73 | for (int i=0; i<8; ++i) 74 | dtv_dgrid_pts[i] = w * ((float)glst[i]); 75 | 76 | // Write back 77 | #pragma unroll 78 | for (int i=0; i<8; ++i) 79 | atomicAdd(grid_pts_grad + i_book[i] * C + iC, dtv_dgrid_pts[i]); 80 | } 81 | 82 | 83 | // Python interface to directly write the gradient of tv loss. 84 | void total_variation_bw( 85 | const torch::Tensor& grid_pts, 86 | const torch::Tensor& vox_key, 87 | const float weight, 88 | const torch::Tensor& vox_size_inv, 89 | const bool no_tv_s, 90 | const bool tv_sparse, 91 | const torch::Tensor& grid_pts_grad) 92 | { 93 | const int N = vox_key.size(0); 94 | const int C = grid_pts.size(1); 95 | const int NC = N * C; 96 | 97 | auto tv_kernel = 98 | (no_tv_s & tv_sparse) ? total_variation_bw_cuda : 99 | (no_tv_s) ? total_variation_bw_cuda : 100 | (tv_sparse) ? total_variation_bw_cuda : 101 | total_variation_bw_cuda ; 102 | 103 | if (N > 0) 104 | tv_kernel <<<(NC + 255) / 256, 256>>> ( 105 | N, C, NC, 106 | grid_pts.contiguous().data_ptr(), 107 | vox_key.contiguous().data_ptr(), 108 | weight, 109 | vox_size_inv.contiguous().data_ptr(), 110 | grid_pts_grad.contiguous().data_ptr()); 111 | } 112 | 113 | } 114 | -------------------------------------------------------------------------------- /cuda/src/tv_compute.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #ifndef TV_COMPUTE_H_INCLUDED 12 | #define TV_COMPUTE_H_INCLUDED 13 | 14 | #include 15 | 16 | namespace TV_COMPUTE { 17 | 18 | // Python interface to directly write the gradient of tv loss. 19 | void total_variation_bw( 20 | const torch::Tensor& grid_pts, 21 | const torch::Tensor& vox_key, 22 | const float weight, 23 | const torch::Tensor& vox_size_inv, 24 | const bool no_tv_s, 25 | const bool tv_sparse, 26 | const torch::Tensor& grid_pts_grad); 27 | 28 | } 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /cuda/src/utils.h: -------------------------------------------------------------------------------- 1 | /************************************************************************* 2 | Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 3 | 4 | NVIDIA CORPORATION and its licensors retain all intellectual property 5 | and proprietary rights in and to this software, related documentation 6 | and any modifications thereto. Any use, reproduction, disclosure or 7 | distribution of this software and related documentation without an express 8 | license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | *************************************************************************/ 10 | 11 | #ifndef UTILS_H_INCLUDED 12 | #define UTILS_H_INCLUDED 13 | 14 | #include 15 | 16 | namespace UTILS { 17 | 18 | torch::Tensor is_in_cone( 19 | const float tanfovx, 20 | const float tanfovy, 21 | const float near, 22 | const torch::Tensor& w2c_matrix, 23 | const torch::Tensor& pts); 24 | 25 | torch::Tensor compute_rd( 26 | const int width, const int height, 27 | const float cx, const float cy, 28 | const float tanfovx, const float tanfovy, 29 | const torch::Tensor& c2w_matrix); 30 | 31 | torch::Tensor depth2pts( 32 | const int width, const int height, 33 | const float cx, const float cy, 34 | const float tanfovx, const float tanfovy, 35 | const torch::Tensor& c2w_matrix, 36 | const torch::Tensor& depth); 37 | 38 | torch::Tensor voxel_order_rank( 39 | const torch::Tensor& octree_paths); 40 | 41 | torch::Tensor ijk_2_octpath(const torch::Tensor& ijk, const torch::Tensor& octlevel); 42 | 43 | torch::Tensor octpath_2_ijk(const torch::Tensor& octpath, const torch::Tensor& octlevel); 44 | 45 | } 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /cuda/svraster_cuda/__init__.py: -------------------------------------------------------------------------------- 1 | from . import meta 2 | from . import utils 3 | from . import renderer 4 | from . import sparse_adam 5 | from . import grid_loss_bw 6 | -------------------------------------------------------------------------------- /cuda/svraster_cuda/grid_loss_bw.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import torch 10 | from . import _C 11 | 12 | 13 | def total_variation(grid_pts, vox_key, weight, vox_size_inv, no_tv_s, tv_sparse, grid_pts_grad): 14 | assert grid_pts.shape == grid_pts_grad.shape 15 | assert len(vox_key.shape) == 2 and vox_key.shape[1] == 8 16 | assert vox_key.shape[0] == vox_size_inv.numel() 17 | _C.total_variation_bw(grid_pts, vox_key, weight, vox_size_inv, no_tv_s, tv_sparse, grid_pts_grad) 18 | -------------------------------------------------------------------------------- /cuda/svraster_cuda/meta.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import torch 10 | from . import _C 11 | 12 | 13 | MAX_NUM_LEVELS = _C.MAX_NUM_LEVELS 14 | STEP_SZ_SCALE = _C.STEP_SZ_SCALE 15 | -------------------------------------------------------------------------------- /cuda/svraster_cuda/sparse_adam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import torch 10 | from . import _C 11 | 12 | 13 | class SparseAdam(torch.optim.Optimizer): 14 | 15 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-15, biased=False, sparse=False): 16 | if not 0.0 <= lr: 17 | raise ValueError("Invalid learning rate: {}".format(lr)) 18 | if not 0.0 <= eps: 19 | raise ValueError("Invalid epsilon value: {}".format(eps)) 20 | if not 0.0 <= betas[0] < 1.0: 21 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 22 | if not 0.0 <= betas[1] < 1.0: 23 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 24 | defaults = dict(lr=lr, betas=betas, eps=eps) 25 | super(SparseAdam, self).__init__(params, defaults) 26 | self.biased = biased 27 | self.sparse = sparse 28 | 29 | def __setstate__(self, state): 30 | super(SparseAdam, self).__setstate__(state) 31 | 32 | @torch.no_grad() 33 | def step(self): 34 | 35 | for group in self.param_groups: 36 | lr = group['lr'] 37 | beta1, beta2 = group['betas'] 38 | eps = group['eps'] 39 | 40 | for param in group['params']: 41 | if param.grad is not None: 42 | state = self.state[param] 43 | # Lazy state initialization 44 | if len(state) == 0: 45 | # Number of time each param is visited 46 | state['step'] = 0 47 | # Exponential moving average of gradient values 48 | state['exp_avg'] = torch.zeros_like(param, memory_format=torch.preserve_format) 49 | # Exponential moving average of squared gradient values 50 | state['exp_avg_sq'] = torch.zeros_like(param, memory_format=torch.preserve_format) 51 | 52 | state['step'] += 1 53 | 54 | if self.biased: 55 | _C.biased_adam_step( 56 | self.sparse, 57 | param, 58 | param.grad, 59 | state['exp_avg'], 60 | state['exp_avg_sq'], 61 | lr, beta1, beta2, eps 62 | ) 63 | else: 64 | _C.unbiased_adam_step( 65 | self.sparse, 66 | param, 67 | param.grad, 68 | state['exp_avg'], 69 | state['exp_avg_sq'], 70 | state['step'], 71 | lr, beta1, beta2, eps 72 | ) 73 | -------------------------------------------------------------------------------- /cuda/svraster_cuda/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import torch 10 | from . import _C 11 | 12 | 13 | def voxel_order_rank(octree_paths): 14 | # Compute the eight possible voxel rendering orders. 15 | order_ranks = _C.voxel_order_rank(octree_paths) 16 | return order_ranks 17 | 18 | 19 | def is_in_cone(pts, cam): 20 | assert torch.is_tensor(pts) 21 | assert pts.device == cam.w2c.device 22 | assert len(pts.shape) == 2 23 | assert pts.shape[1] == 3 24 | return _C.is_in_cone( 25 | cam.tanfovx, 26 | cam.tanfovy, 27 | cam.near, 28 | cam.w2c, 29 | pts) 30 | 31 | 32 | def compute_rd(width, height, cx, cy, tanfovx, tanfovy, c2w_matrix): 33 | assert torch.is_tensor(c2w_matrix) 34 | return _C.compute_rd(width, height, cx, cy, tanfovx, tanfovy, c2w_matrix) 35 | 36 | 37 | def depth2pts(width, height, cx, cy, tanfovx, tanfovy, c2w_matrix, depth): 38 | assert torch.is_tensor(c2w_matrix) 39 | assert depth.device == c2w_matrix.device 40 | assert depth.numel() == width * height 41 | return _C.depth2pts(width, height, cx, cy, tanfovx, tanfovy, c2w_matrix, depth) 42 | 43 | 44 | def ijk_2_octpath(ijk, octlevel): 45 | assert torch.is_tensor(ijk) and torch.is_tensor(octlevel) 46 | assert len(ijk.shape) == 2 and ijk.shape[1] == 3 47 | assert ijk.numel() == octlevel.numel() * 3 48 | assert ijk.dtype == torch.int64 49 | assert octlevel.dtype == torch.int8 50 | return _C.ijk_2_octpath(ijk, octlevel) 51 | 52 | 53 | def octpath_2_ijk(octpath, octlevel): 54 | assert torch.is_tensor(octpath) and torch.is_tensor(octlevel) 55 | assert octpath.numel() == octlevel.numel() 56 | assert octpath.dtype == torch.int64 57 | assert octlevel.dtype == torch.int8 58 | return _C.octpath_2_ijk(octpath, octlevel) 59 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 13 | # 14 | # NVIDIA CORPORATION and its licensors retain all intellectual property 15 | # and proprietary rights in and to this software, related documentation 16 | # and any modifications thereto. Any use, reproduction, disclosure or 17 | # distribution of this software and related documentation without an express 18 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 19 | 20 | 21 | import os 22 | import json 23 | import numpy as np 24 | from PIL import Image 25 | from tqdm import trange 26 | from pathlib import Path 27 | 28 | import torch 29 | 30 | from src.utils.image_utils import im_pil2tensor 31 | from src.utils.loss_utils import psnr_score, ssim_score, lpips_loss, correct_lpips_loss 32 | 33 | 34 | def read_pairs(renders_dir, gt_dir): 35 | renders = [] 36 | gts = [] 37 | image_names = [] 38 | for fname in os.listdir(renders_dir): 39 | render = Image.open(renders_dir / fname) 40 | gt = Image.open(gt_dir / fname) 41 | renders.append(im_pil2tensor(render).unsqueeze(0)) 42 | gts.append(im_pil2tensor(gt).unsqueeze(0)) 43 | image_names.append(fname) 44 | return renders, gts, image_names 45 | 46 | 47 | def evaluate(model_paths, split): 48 | full_dict = {} 49 | per_view_dict = {} 50 | 51 | for scene_dir in model_paths: 52 | print("Scene:", scene_dir) 53 | full_dict[scene_dir] = {} 54 | per_view_dict[scene_dir] = {} 55 | 56 | test_dir = scene_dir / split 57 | 58 | for method in os.listdir(test_dir): 59 | method_dir = test_dir / method 60 | if not method_dir.is_dir(): 61 | continue 62 | print("Method:", method) 63 | 64 | renders, gts, image_names = read_pairs( 65 | renders_dir=method_dir / "renders", 66 | gt_dir=method_dir/ "gt") 67 | 68 | ssims = [] 69 | psnrs = [] 70 | lpipss = [] 71 | correct_lpipss = [] 72 | 73 | for idx in trange(len(renders)): 74 | im_render = renders[idx].cuda() 75 | im_gt = gts[idx].cuda() 76 | ssims.append(ssim_score(im_render, im_gt).item()) 77 | psnrs.append(psnr_score(im_render, im_gt).item()) 78 | lpipss.append(lpips_loss(im_render, im_gt).item()) 79 | correct_lpipss.append(correct_lpips_loss(im_render, im_gt).item()) 80 | del im_render, im_gt 81 | torch.cuda.empty_cache() 82 | 83 | avg_ssim = np.mean(ssims) 84 | avg_psnr = np.mean(psnrs) 85 | avg_lpips = np.mean(lpipss) 86 | avg_correct_lpips = np.mean(correct_lpipss) 87 | 88 | print(f" SSIM : {avg_ssim:>12.7f}") 89 | print(f" PSNR : {avg_psnr:>12.7f}") 90 | print(f" LPIPS: {avg_lpips:>12.7f}") 91 | print(f" LPIPS: {avg_correct_lpips:>12.7f} (corrected)") 92 | print("") 93 | 94 | full_dict[scene_dir][method] = { 95 | "SSIM": avg_ssim, 96 | "PSNR": avg_psnr, 97 | "LPIPS": avg_lpips, 98 | "LPIPS-corrected": avg_correct_lpips, 99 | } 100 | per_view_dict[scene_dir][method] = { 101 | "SSIM": {name: ssim for ssim, name in zip(ssims, image_names)}, 102 | "PSNR": {name: psnr for psnr, name in zip(psnrs, image_names)}, 103 | "LPIPS": {name: lp for lp, name in zip(lpipss, image_names)}, 104 | "LPIPS-corrected": {name: lp for lp, name in zip(correct_lpipss, image_names)}, 105 | } 106 | 107 | with open(scene_dir / "results.json", 'w') as f: 108 | json.dump(full_dict[scene_dir], f, indent=True) 109 | with open(scene_dir / "per_view.json", 'w') as f: 110 | json.dump(per_view_dict[scene_dir], f, indent=True) 111 | print("Saved to", scene_dir / "results.json") 112 | print("Saved to", scene_dir / "per_view.json") 113 | 114 | 115 | if __name__ == "__main__": 116 | 117 | import argparse 118 | parser = argparse.ArgumentParser(description="Quantitative evaluation of the rendered images.") 119 | parser.add_argument('--split', type=str, default="test") 120 | parser.add_argument('model_paths', nargs=argparse.REMAINDER, type=Path) 121 | args = parser.parse_args() 122 | 123 | assert len(args.model_paths) > 0 124 | evaluate(args.model_paths, args.split) 125 | -------------------------------------------------------------------------------- /render_by_trace_sdf.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Inria 3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco 4 | # All rights reserved. 5 | # 6 | # This software is free for non-commercial, research and evaluation use 7 | # under the terms of the LICENSE.md file. 8 | # 9 | # For inquiries contact george.drettakis@inria.fr 10 | # 11 | 12 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 13 | # 14 | # NVIDIA CORPORATION and its licensors retain all intellectual property 15 | # and proprietary rights in and to this software, related documentation 16 | # and any modifications thereto. Any use, reproduction, disclosure or 17 | # distribution of this software and related documentation without an express 18 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 19 | 20 | import os 21 | import time 22 | import numpy as np 23 | from tqdm import tqdm 24 | from os import makedirs 25 | import imageio 26 | 27 | import torch 28 | 29 | from src.config import cfg, update_argparser, update_config 30 | 31 | from src.dataloader.data_pack import DataPack 32 | from src.sparse_voxel_model import SparseVoxelModel 33 | from src.utils.image_utils import im_tensor2np, viz_tensordepth 34 | from src.utils.fuser_utils import Fuser 35 | 36 | 37 | @torch.no_grad() 38 | def render_set(name, iteration, suffix, args, views, voxel_model): 39 | 40 | render_path = os.path.join(args.model_path, name, f"ours_{iteration}{suffix}_trace_by_sdf", "renders") 41 | makedirs(render_path, exist_ok=True) 42 | print(f'render_path: {render_path}') 43 | print(f'ss =: {voxel_model.ss}') 44 | print(f'vox_geo_mode =: {voxel_model.vox_geo_mode}') 45 | print(f'density_mode =: {voxel_model.density_mode}') 46 | 47 | if args.eval_fps: 48 | torch.cuda.empty_cache() 49 | torch.cuda.reset_peak_memory_stats() 50 | 51 | if args.eval_fps: 52 | # Warmup 53 | voxel_model.render_trace_sdf(views[0]) 54 | 55 | eps_time = time.perf_counter() 56 | psnr_lst = [] 57 | for idx, view in enumerate(tqdm(views, desc="Rendering progress")): 58 | hit_depth, hit_vox_id = voxel_model.render_trace_sdf(view) 59 | if not args.eval_fps: 60 | rendering = voxel_model._sh0[hit_vox_id].moveaxis(-1, 0) 61 | rendering *= (hit_vox_id != -1) 62 | gt = view.image.cuda() 63 | mse = (rendering.clip(0,1) - gt.clip(0,1)).square().mean() 64 | psnr = -10 * torch.log10(mse) 65 | psnr_lst.append(psnr.item()) 66 | fname = view.image_name 67 | 68 | # RGB 69 | imageio.imwrite( 70 | os.path.join(render_path, fname + (".jpg" if args.use_jpg else ".png")), 71 | im_tensor2np(rendering) 72 | ) 73 | torch.cuda.synchronize() 74 | eps_time = time.perf_counter() - eps_time 75 | peak_mem = torch.cuda.memory_stats()["allocated_bytes.all.peak"] / 1024 ** 3 76 | if args.eval_fps: 77 | print(f'Eps time: {eps_time:.3f} sec') 78 | print(f"Peak mem: {peak_mem:.2f} GB") 79 | print(f'FPS : {len(views)/eps_time:.0f}') 80 | outtxt = os.path.join(args.model_path, name, "ours_{}{}.txt".format(iteration, suffix)) 81 | with open(outtxt, 'w') as f: 82 | f.write(f"n={len(views):.6f}\n") 83 | f.write(f"eps={eps_time:.6f}\n") 84 | f.write(f"peak_mem={peak_mem:.2f}\n") 85 | f.write(f"fps={len(views)/eps_time:.6f}\n") 86 | else: 87 | print('PSNR:', np.mean(psnr_lst)) 88 | 89 | 90 | if __name__ == "__main__": 91 | # Parse arguments 92 | import argparse 93 | parser = argparse.ArgumentParser( 94 | description="Sparse voxels raster rendering.") 95 | parser.add_argument('model_path') 96 | parser.add_argument("--iteration", default=-1, type=int) 97 | parser.add_argument("--skip_train", action="store_true") 98 | parser.add_argument("--skip_test", action="store_true") 99 | parser.add_argument("--eval_fps", action="store_true") 100 | parser.add_argument("--clear_res_down", action="store_true") 101 | parser.add_argument("--suffix", default="", type=str) 102 | parser.add_argument("--use_jpg", action="store_true") 103 | parser.add_argument("--overwrite_ss", default=None, type=float) 104 | parser.add_argument("--overwrite_vox_geo_mode", default=None) 105 | args = parser.parse_args() 106 | print("Rendering " + args.model_path) 107 | 108 | # Load config 109 | update_config(os.path.join(args.model_path, 'config.yaml')) 110 | 111 | if args.clear_res_down: 112 | cfg.data.res_downscale = 0 113 | cfg.data.res_width = 0 114 | 115 | # Load data 116 | data_pack = DataPack(cfg.data, cfg.model.white_background, camera_params_only=False) 117 | 118 | # Load model 119 | voxel_model = SparseVoxelModel(cfg.model) 120 | loaded_iter = voxel_model.load_iteration(args.model_path, args.iteration) 121 | 122 | # Output path suffix 123 | suffix = args.suffix 124 | if not args.suffix: 125 | if cfg.data.res_downscale > 0: 126 | suffix += f"_r{cfg.data.res_downscale}" 127 | if cfg.data.res_width > 0: 128 | suffix += f"_w{cfg.data.res_width}" 129 | 130 | if args.overwrite_ss: 131 | voxel_model.ss = args.overwrite_ss 132 | if not args.suffix: 133 | suffix += f"_ss{args.overwrite_ss:.2f}" 134 | 135 | if args.overwrite_vox_geo_mode: 136 | voxel_model.vox_geo_mode = args.overwrite_vox_geo_mode 137 | if not args.suffix: 138 | suffix += f"_{args.overwrite_vox_geo_mode}" 139 | 140 | # Fuse sdf and rgb 141 | volume = Fuser( 142 | xyz=voxel_model.grid_pts_xyz, 143 | bandwidth=voxel_model.vox_size.min().item() * 20, 144 | # bandwidth=torch.zeros([len(voxel_model.grid_pts_xyz)], dtype=torch.float32, device="cuda").index_reduce_( 145 | # dim=0, 146 | # index=voxel_model.vox_key.flatten(), 147 | # source=voxel_model.vox_size.repeat(1, 8).flatten(), 148 | # reduce="amax") * 3, 149 | use_trunc=True, 150 | fuse_tsdf=True, 151 | feat_dim=3) 152 | 153 | for cam in tqdm(data_pack.get_train_cameras()): 154 | median_depth, median_idx = voxel_model.render_median(cam) 155 | volume.integrate(cam=cam, feat=cam.image.cuda(), depth=median_depth) 156 | 157 | voxel_model._shs.data.fill_(0) 158 | voxel_model._sh0.data.copy_( 159 | volume.feature.nan_to_num_()[voxel_model.vox_key].mean(dim=1)) 160 | voxel_model._geo_grid_pts.data.copy_( 161 | volume.tsdf.nan_to_num_()) 162 | 163 | del volume 164 | torch.cuda.empty_cache() 165 | 166 | # Start rendering 167 | voxel_model.freeze_vox_geo() 168 | 169 | if not args.skip_train: 170 | render_set( 171 | "train", loaded_iter, suffix, args, 172 | data_pack.get_train_cameras(), voxel_model) 173 | 174 | if not args.skip_test: 175 | render_set( 176 | "test", loaded_iter, suffix, args, 177 | data_pack.get_test_cameras(), voxel_model) 178 | -------------------------------------------------------------------------------- /render_fly_through.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import time 11 | import numpy as np 12 | from tqdm import tqdm 13 | from os import makedirs 14 | import imageio 15 | 16 | import torch 17 | 18 | from src.config import cfg, update_argparser, update_config 19 | 20 | from src.dataloader.data_pack import DataPack 21 | from src.sparse_voxel_model import SparseVoxelModel 22 | from src.cameras import MiniCam 23 | from src.utils.image_utils import im_tensor2np, viz_tensordepth 24 | from src.utils.camera_utils import interpolate_poses 25 | 26 | 27 | if __name__ == "__main__": 28 | # Parse arguments 29 | import argparse 30 | parser = argparse.ArgumentParser( 31 | description="Sparse voxels raster rendering.") 32 | parser.add_argument('model_path') 33 | parser.add_argument("--iteration", default=-1, type=int) 34 | parser.add_argument("--n_frames", default=300, type=int) 35 | parser.add_argument("--save_scale", default=1.0, type=float) 36 | 37 | # Manually select which frames to interpolate 38 | parser.add_argument("--ids", default=[], type=int, nargs='*') 39 | 40 | # Use farthest point sampling to select key frame 41 | parser.add_argument("--starting_id", default=0, type=int) 42 | 43 | # Other tweaking 44 | parser.add_argument("--step_forward", default=0, type=float) 45 | 46 | args = parser.parse_args() 47 | print("Rendering " + args.model_path) 48 | 49 | # Load config 50 | update_config(os.path.join(args.model_path, 'config.yaml')) 51 | 52 | # Load data 53 | data_pack = DataPack( 54 | source_path=cfg.data.source_path, 55 | image_dir_name=cfg.data.image_dir_name, 56 | res_downscale=cfg.data.res_downscale, 57 | res_width=cfg.data.res_width, 58 | skip_blend_alpha=cfg.data.skip_blend_alpha, 59 | alpha_is_white=cfg.model.white_background, 60 | data_device=cfg.data.data_device, 61 | use_test=cfg.data.eval, 62 | test_every=cfg.data.test_every, 63 | camera_params_only=True, 64 | ) 65 | 66 | # Interpolate poses 67 | cams = data_pack.get_train_cameras() 68 | if len(args.ids): 69 | key_poses = [cams[i].c2w.cpu().numpy() for i in args.ids] 70 | else: 71 | cam_pos = torch.stack([cam.position for cam in cams]) 72 | ids = [args.starting_id] 73 | for _ in range(3): 74 | farthest_id = torch.cdist(cam_pos[ids], cam_pos).amin(0).argmax().item() 75 | ids.append(farthest_id) 76 | ids[1], ids[2] = ids[2], ids[1] 77 | key_poses = [cams[i].c2w.cpu().numpy() for i in ids] 78 | 79 | if args.step_forward != 0: 80 | for i in range(len(key_poses)): 81 | lookat = key_poses[i][:3, 2] 82 | key_poses[i][:3, 3] += args.step_forward * lookat 83 | 84 | interp_poses = interpolate_poses(key_poses, n_frame=args.n_frames, periodic=True) 85 | 86 | # Load model 87 | voxel_model = SparseVoxelModel( 88 | n_samp_per_vox=cfg.model.n_samp_per_vox, 89 | sh_degree=cfg.model.sh_degree, 90 | ss=cfg.model.ss, 91 | white_background=cfg.model.white_background, 92 | black_background=cfg.model.black_background, 93 | ) 94 | loaded_iter = voxel_model.load_iteration(args.model_path, args.iteration) 95 | voxel_model.freeze_vox_geo() 96 | 97 | # Rendering 98 | fovx = cams[0].fovx 99 | fovy = cams[0].fovy 100 | width = cams[0].image_width 101 | height = cams[0].image_height 102 | 103 | video = [] 104 | for pose in tqdm(interp_poses, desc="Rendering progress"): 105 | 106 | cam = MiniCam( 107 | c2w=pose, 108 | fovx=fovx, fovy=fovy, 109 | width=width, height=height) 110 | 111 | with torch.no_grad(): 112 | render_pkg = voxel_model.render(cam) 113 | rendering = render_pkg['color'] 114 | 115 | if args.save_scale != 0: 116 | rendering = torch.nn.functional.interpolate( 117 | rendering[None], 118 | scale_factor=args.save_scale, 119 | mode="bilinear", 120 | antialias=True)[0] 121 | 122 | video.append(im_tensor2np(rendering)) 123 | 124 | outpath = os.path.join(args.model_path, "render_fly_through.mp4") 125 | imageio.mimwrite(outpath, video, fps=30) 126 | print("Save to", outpath) 127 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | einops 3 | opencv-python==4.8.0.74 4 | opencv-contrib-python==4.8.0.74 5 | 6 | yacs 7 | tqdm 8 | natsort 9 | argparse 10 | pillow 11 | imageio 12 | imageio-ffmpeg 13 | scikit-image 14 | pycolmap 15 | 16 | plyfile 17 | shapely 18 | trimesh==4.0.4 19 | open3d==0.18.0 20 | gpytoolbox 21 | 22 | lpips 23 | pytorch-msssim 24 | git+https://github.com/rahul-goel/fused-ssim/ 25 | -------------------------------------------------------------------------------- /scripts/dtu_clean_for_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import cv2 11 | import glob 12 | import trimesh 13 | import numpy as np 14 | from tqdm import trange 15 | 16 | 17 | if __name__ == '__main__': 18 | # Parse arguments 19 | import argparse 20 | parser = argparse.ArgumentParser(description="Clean mesh for evaluation.") 21 | parser.add_argument('data_dir') 22 | parser.add_argument('mesh_path') 23 | args = parser.parse_args() 24 | 25 | # Read mesh 26 | mesh = trimesh.load(args.mesh_path) 27 | print("Loaded mesh:", mesh) 28 | 29 | # Start cleaning 30 | print('Running DTU_clean_mesh_by_mask...') 31 | verts = np.copy(mesh.vertices[:]) 32 | faces = np.copy(mesh.faces[:]) 33 | cameras = np.load(f'{args.data_dir}/cameras_sphere.npz') 34 | mask_lis = sorted(glob.glob(f'{args.data_dir}/mask/*.png')) 35 | 36 | n_images = len(mask_lis) 37 | mask = np.ones(len(verts), dtype=bool) 38 | for i in trange(n_images): 39 | P = cameras[f'world_mat_{i}'] 40 | pts_image = np.matmul(P[None, :3, :3], verts[:, :, None]).squeeze() + P[None, :3, 3] 41 | pts_image = pts_image / pts_image[:, 2:] 42 | pts_image = np.round(pts_image).astype(np.int32) + 1 43 | mask_image = cv2.imread(mask_lis[i]) 44 | kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (25, 25)) 45 | mask_image = cv2.dilate(mask_image, kernel, iterations=1) 46 | mask_image = (mask_image[:, :, 0] > 128) 47 | mask_image = np.concatenate([np.ones([1, 1600]), mask_image, np.ones([1, 1600])], axis=0) 48 | mask_image = np.concatenate([np.ones([1202, 1]), mask_image, np.ones([1202, 1])], axis=1) 49 | curr_mask = mask_image[(pts_image[:, 1].clip(0, 1201), pts_image[:, 0].clip(0, 1601))] 50 | mask &= curr_mask.astype(bool) 51 | 52 | print('Valid vertices ratio:', mask.mean()) 53 | 54 | indexes = np.full(len(verts), -1, dtype=np.int64) 55 | indexes[np.where(mask)] = np.arange(len(np.where(mask)[0])) 56 | 57 | faces_mask = mask[faces[:, 0]] & mask[faces[:, 1]] & mask[faces[:, 2]] 58 | new_faces = faces[np.where(faces_mask)] 59 | new_faces[:, 0] = indexes[new_faces[:, 0]] 60 | new_faces[:, 1] = indexes[new_faces[:, 1]] 61 | new_faces[:, 2] = indexes[new_faces[:, 2]] 62 | new_vertices = verts[np.where(mask)] 63 | 64 | mesh = trimesh.Trimesh(new_vertices, new_faces) 65 | try: 66 | print('Kept only the largest CC') 67 | meshes = mesh.split(only_watertight=False) 68 | mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])] 69 | except: 70 | print('Failed') 71 | outdir, outfname = os.path.split(args.mesh_path) 72 | outfname = outfname[:-4] + '_cleaned_for_eval.ply' 73 | mesh.export(os.path.join(outdir, outfname)) 74 | -------------------------------------------------------------------------------- /scripts/dtu_eval/Offical_DTU_Dataset: -------------------------------------------------------------------------------- 1 | /ssd/chengs/DTU/Offical_DTU_Dataset -------------------------------------------------------------------------------- /scripts/dtu_preproc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import subprocess 11 | import json 12 | from argparse import ArgumentParser 13 | import glob 14 | import numpy as np 15 | import cv2 16 | from natsort import natsorted 17 | import math 18 | from tqdm import tqdm 19 | from PIL import Image 20 | 21 | def fov2focal(fov, pixels): 22 | return pixels / (2 * math.tan(fov / 2)) 23 | 24 | def focal2fov(focal, pixels): 25 | return 2*math.atan(pixels/(2*focal)) 26 | 27 | def load_K_Rt_from_P(filename, P=None): 28 | # This function is borrowed from IDR: https://github.com/lioryariv/idr 29 | if P is None: 30 | lines = open(filename).read().splitlines() 31 | if len(lines) == 4: 32 | lines = lines[1:] 33 | lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] 34 | P = np.asarray(lines).astype(np.float32).squeeze() 35 | 36 | out = cv2.decomposeProjectionMatrix(P) 37 | K = out[0] 38 | R = out[1] 39 | t = out[2] 40 | 41 | K = K / K[2, 2] 42 | intrinsics = np.eye(4) 43 | intrinsics[:3, :3] = K 44 | 45 | pose = np.eye(4, dtype=np.float32) 46 | pose[:3, :3] = R.transpose() 47 | pose[:3, 3] = (t[:3] / t[3])[:, 0] 48 | 49 | return intrinsics, pose 50 | 51 | 52 | parser = ArgumentParser(description="Training script parameters") 53 | parser.add_argument('dataset_root') 54 | args = parser.parse_args() 55 | 56 | for scene in os.listdir(args.dataset_root): 57 | scene_path = os.path.join(args.dataset_root, scene) 58 | if not os.path.isdir(scene_path) or 'scan' not in scene: 59 | continue 60 | 61 | camera_param = dict(np.load(os.path.join(scene_path, 'cameras_sphere.npz'))) 62 | images_lis = sorted(glob.glob(os.path.join(scene_path, 'image/*.png'))) 63 | 64 | train = dict(camera_angle_x=0, frames=[]) 65 | test = dict(camera_angle_x=0, frames=[]) 66 | for idx, image in enumerate(images_lis): 67 | image = os.path.basename(image) 68 | stem = os.path.splitext(image)[0] 69 | 70 | world_mat = camera_param['world_mat_%d' % idx] 71 | scale_mat = camera_param['scale_mat_%d' % idx] 72 | 73 | # scale and decompose 74 | P = world_mat @ scale_mat 75 | P = P[:3, :4] 76 | intrinsic_param, c2w = load_K_Rt_from_P(None, P) 77 | 78 | fx = float(intrinsic_param[0][0]) 79 | fy = float(intrinsic_param[1][1]) 80 | cx = float(intrinsic_param[0][2]) 81 | cy = float(intrinsic_param[1][2]) 82 | w, h = Image.open(os.path.join(scene_path, 'image', image)).size 83 | camera_angle_x = focal2fov(fx, w) 84 | camera_angle_y = focal2fov(fy, h) 85 | 86 | # To synthetic blender format 87 | c2w[:3, 1:3] *= -1 88 | 89 | frame = { 90 | "file_path": 'image/' + stem, 91 | "mask_path": f'mask/{int(stem):03d}.png', 92 | "camera_angle_x": camera_angle_x, 93 | "camera_angle_y": camera_angle_y, 94 | "cx_p": cx / w, 95 | "cy_p": cy / h, 96 | "transform_matrix": c2w.tolist() 97 | } 98 | if idx % 8 == 0: 99 | test['frames'].append(frame) 100 | else: 101 | train['frames'].append(frame) 102 | 103 | out_train_path = os.path.join(scene_path, 'transforms_train.json') 104 | out_test_path = os.path.join(scene_path, 'transforms_test.json') 105 | with open(out_train_path, 'w') as f: 106 | json.dump(train, f, indent=4) 107 | 108 | with open(out_test_path, 'w') as f: 109 | json.dump(test, f, indent=4) 110 | 111 | # Write down scene bound 112 | out_bound_path = os.path.join(scene_path, 'nerf_normalization.json') 113 | with open(out_bound_path, 'w') as f: 114 | json.dump({"center": [0.,0.,0.], "radius": 1.0}, f, indent=4) 115 | 116 | np.savetxt( 117 | os.path.join(scene_path, 'to_world_matrix.txt'), 118 | camera_param['scale_mat_0']) 119 | -------------------------------------------------------------------------------- /scripts/dtu_run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | DATA_ROOT=data/dtu_preproc 10 | PATH_TO_OFFICIAL_DTU="scripts/dtu_eval/Offical_DTU_Dataset/" 11 | 12 | lanuch_exp() { 13 | local scene_id="$1" 14 | shift 15 | local output_dir="$1" 16 | shift 17 | local exp_args="$*" 18 | 19 | local scene_name=scan"$scene_id" 20 | 21 | python train.py --cfg_files cfg/dtu_mesh.yaml --source_path $DATA_ROOT/dtu_"$scene_name"/ --model_path $output_dir/$scene_name $exp_args 22 | python render.py $output_dir/$scene_name --skip_test --eval_fps 23 | python render.py $output_dir/$scene_name --skip_test --rgb_only --use_jpg 24 | python render_fly_through.py $output_dir/$scene_name/ 25 | 26 | python extract_mesh.py $output_dir/$scene_name/ --save_gpu --use_vert_color --init_lv 8 --final_lv 10 --mesh_fname mesh_dense 27 | 28 | mkdir -p $output_dir/$scene_name/mesh/latest/evaluation 29 | python scripts/dtu_clean_for_eval.py $DATA_ROOT/dtu_"$scene_name"/ \ 30 | $output_dir/$scene_name/mesh/latest/mesh_dense.ply 31 | python scripts/dtu_eval/eval.py \ 32 | --data $output_dir/$scene_name/mesh/latest/mesh_dense_cleaned_for_eval.ply \ 33 | --scan $scene_id --dataset_dir $PATH_TO_OFFICIAL_DTU \ 34 | --vis_out_dir $output_dir/$scene_name/mesh/latest/evaluation 35 | rm -r $output_dir/$scene_name/checkpoints/ 36 | } 37 | 38 | 39 | for scene in 24 37 40 55 63 65 69 83 97 105 106 110 114 118 122 40 | do 41 | echo "============ start " $scene " ============" 42 | lanuch_exp $scene $1 "${@:2}" 43 | echo "============ end " $scene " ============" 44 | done 45 | -------------------------------------------------------------------------------- /scripts/dtu_stat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import subprocess 11 | import json 12 | from argparse import ArgumentParser 13 | import glob 14 | import pandas as pd 15 | 16 | parser = ArgumentParser(description="Training script parameters") 17 | parser.add_argument('result_root') 18 | args = parser.parse_args() 19 | 20 | scenes = [ 21 | 'scan24', 'scan37', 'scan40', 'scan55', 'scan63', 'scan65', 'scan69', 'scan83', 'scan97', 'scan105', 'scan106', 'scan110', 'scan114', 'scan118', 'scan122' 22 | ] 23 | 24 | cf = [] 25 | tr_time = [] 26 | fps = [] 27 | n_voxels = [] 28 | 29 | for scene in scenes: 30 | eval_path = sorted(glob.glob(f'{args.result_root}/{scene}/test_stat/iter*.json')) 31 | if len(eval_path): 32 | eval_path = eval_path[-1] 33 | with open(eval_path) as f: 34 | ret = json.load(f) 35 | tr_time.append(ret['elapsed'] / 1000) 36 | n_voxels.append(ret['n_voxels']) 37 | else: 38 | tr_time.append(0) 39 | n_voxels.append(0) 40 | 41 | eval_path = sorted(glob.glob(f'{args.result_root}/{scene}/train/*.txt')) 42 | if len(eval_path): 43 | eval_path = eval_path[-1] 44 | with open(eval_path) as f: 45 | fps.append(float([line.strip().split('=')[1] for line in f if line.startswith('fps')][-1])) 46 | else: 47 | fps.append(0) 48 | 49 | eval_path = f'{args.result_root}/{scene}/mesh/latest/mesh_dense_cleaned_for_eval.ply.json' 50 | if os.path.isfile(eval_path): 51 | with open(eval_path) as f: 52 | ret = json.load(f) 53 | cf.append(ret['overall']) 54 | else: 55 | cf.append(10) 56 | 57 | 58 | 59 | def format_df_string(df): 60 | df = df.copy() 61 | df['scene'] = df['scene'].map(lambda s: s.rjust(15)) 62 | df['cf-dist'] = df['cf-dist'].round(2) 63 | df['tr-mins'] = (df['tr-mins'] / 60).round(1) 64 | df['fps'] = df['fps'].round(1) 65 | df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1) 66 | return df.to_string(index=False) 67 | 68 | def add_avg_row(df): 69 | df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose() 70 | df_avg['scene'] = 'AVG' 71 | return pd.concat([df, df_avg], ignore_index=True) 72 | 73 | df = pd.DataFrame({ 74 | 'scene': scenes, 75 | 'cf-dist': cf, 76 | 'tr-mins': tr_time, 77 | 'fps': fps, 78 | '#vox(M)': n_voxels, 79 | }) 80 | df = add_avg_row(df) 81 | 82 | print(format_df_string(df)) 83 | 84 | -------------------------------------------------------------------------------- /scripts/eval_tnt/README.md: -------------------------------------------------------------------------------- 1 | # Python Toolbox for Evaluation 2 | 3 | This Python script evaluates **training** dataset of TanksAndTemples benchmark. 4 | The script requires ``Open3D`` and a few Python packages such as ``matplotlib``, ``json``, and ``numpy``. 5 | 6 | ## How to use: 7 | **Step 0**. Reconstruct 3D models and recover camera poses from the training dataset. 8 | The raw videos of the training dataset can be found from: 9 | https://tanksandtemples.org/download/ 10 | 11 | **Step 1**. Download evaluation data (ground truth geometry + reference reconstruction) using 12 | [this link](https://drive.google.com/open?id=1UoKPiUUsKa0AVHFOrnMRhc5hFngjkE-t). In this example, we regard ``TanksAndTemples/evaluation/data/`` as a dataset folder. 13 | 14 | **Step 2**. Install Open3D. Follow instructions in http://open3d.org/docs/getting_started.html 15 | 16 | **Step 3**. Run the evaluation script and grab some coffee. 17 | ``` 18 | # firstly, run cull_mesh.py to cull mesh and then 19 | ./run.sh Barn 20 | ``` 21 | Output (evaluation of Ignatius): 22 | ``` 23 | =========================== 24 | Evaluating Ignatius 25 | =========================== 26 | path/to/TanksAndTemples/evaluation/data/Ignatius/Ignatius_COLMAP.ply 27 | Reading PLY: [========================================] 100% 28 | Read PointCloud: 6929586 vertices. 29 | path/to/TanksAndTemples/evaluation/data/Ignatius/Ignatius.ply 30 | Reading PLY: [========================================] 100% 31 | : 32 | ICP Iteration #0: Fitness 0.9980, RMSE 0.0044 33 | ICP Iteration #1: Fitness 0.9980, RMSE 0.0043 34 | ICP Iteration #2: Fitness 0.9980, RMSE 0.0043 35 | ICP Iteration #3: Fitness 0.9980, RMSE 0.0043 36 | ICP Iteration #4: Fitness 0.9980, RMSE 0.0042 37 | ICP Iteration #5: Fitness 0.9980, RMSE 0.0042 38 | ICP Iteration #6: Fitness 0.9979, RMSE 0.0042 39 | ICP Iteration #7: Fitness 0.9979, RMSE 0.0042 40 | ICP Iteration #8: Fitness 0.9979, RMSE 0.0042 41 | ICP Iteration #9: Fitness 0.9979, RMSE 0.0042 42 | ICP Iteration #10: Fitness 0.9979, RMSE 0.0042 43 | [EvaluateHisto] 44 | Cropping geometry: [========================================] 100% 45 | Pointcloud down sampled from 6929586 points to 1449840 points. 46 | Pointcloud down sampled from 1449840 points to 1365628 points. 47 | path/to/TanksAndTemples/evaluation/data/Ignatius/evaluation//Ignatius.precision.ply 48 | Cropping geometry: [========================================] 100% 49 | Pointcloud down sampled from 5016769 points to 4957123 points. 50 | Pointcloud down sampled from 4957123 points to 4181506 points. 51 | [compute_point_cloud_to_point_cloud_distance] 52 | [compute_point_cloud_to_point_cloud_distance] 53 | : 54 | [ViewDistances] Add color coding to visualize error 55 | [ViewDistances] Add color coding to visualize error 56 | : 57 | [get_f1_score_histo2] 58 | ============================== 59 | evaluation result : Ignatius 60 | ============================== 61 | distance tau : 0.003 62 | precision : 0.7679 63 | recall : 0.7937 64 | f-score : 0.7806 65 | ============================== 66 | ``` 67 | 68 | **Step 5**. Go to the evaluation folder. ``TanksAndTemples/evaluation/data/Ignatius/evaluation/`` will have the following outputs. 69 | 70 | 71 | 72 | ``PR_Ignatius_@d_th_0_0030.pdf`` (Precision and recall curves with a F-score) 73 | 74 | | | | 75 | |--|--| 76 | | ``Ignatius.precision.ply`` | ``Ignatius.recall.ply`` | 77 | 78 | (3D visualization of precision and recall. Each mesh is color coded using hot colormap) 79 | 80 | # Requirements 81 | 82 | - Python 3 83 | - open3d v0.9.0 84 | - matplotlib 85 | -------------------------------------------------------------------------------- /scripts/eval_tnt/config.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # - TanksAndTemples Website Toolbox - 3 | # - http://www.tanksandtemples.org - 4 | # ---------------------------------------------------------------------------- 5 | # The MIT License (MIT) 6 | # 7 | # Copyright (c) 2017 8 | # Arno Knapitsch 9 | # Jaesik Park 10 | # Qian-Yi Zhou 11 | # Vladlen Koltun 12 | # 13 | # Permission is hereby granted, free of charge, to any person obtaining a copy 14 | # of this software and associated documentation files (the "Software"), to deal 15 | # in the Software without restriction, including without limitation the rights 16 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | # copies of the Software, and to permit persons to whom the Software is 18 | # furnished to do so, subject to the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be included in 21 | # all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | # THE SOFTWARE. 30 | # ---------------------------------------------------------------------------- 31 | 32 | # some global parameters - do not modify 33 | scenes_tau_dict = { 34 | "Barn": 0.01, 35 | "Caterpillar": 0.005, 36 | "Church": 0.025, 37 | "Courthouse": 0.025, 38 | "Ignatius": 0.003, 39 | "Meetingroom": 0.01, 40 | "Truck": 0.005, 41 | } 42 | -------------------------------------------------------------------------------- /scripts/eval_tnt/evaluate_single_scene.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import cv2 5 | import numpy as np 6 | import os 7 | import glob 8 | from skimage.morphology import binary_dilation, disk 9 | import argparse 10 | 11 | import trimesh 12 | from pathlib import Path 13 | import subprocess 14 | import sys 15 | import json 16 | 17 | 18 | if __name__ == "__main__": 19 | 20 | parser = argparse.ArgumentParser( 21 | description='Arguments to evaluate the mesh.' 22 | ) 23 | 24 | parser.add_argument('--input_mesh', type=str, help='path to the mesh to be evaluated') 25 | parser.add_argument('--scene', type=str, help='scan id of the input mesh') 26 | parser.add_argument('--output_dir', type=str, default='evaluation_results_single', help='path to the output folder') 27 | parser.add_argument('--TNT', type=str, default='Offical_DTU_Dataset', help='path to the GT DTU point clouds') 28 | args = parser.parse_args() 29 | 30 | 31 | TNT_Dataset = args.TNT 32 | out_dir = args.output_dir 33 | Path(out_dir).mkdir(parents=True, exist_ok=True) 34 | scene = args.scene 35 | ply_file = args.input_mesh 36 | result_mesh_file = os.path.join(out_dir, "culled_mesh.ply") 37 | # read scene.json 38 | f"python run.py --dataset-dir {ply_file} --traj-path {TNT_Dataset}/{scene}/{scene}_COLMAP_SfM.log --ply-path {TNT_Dataset}/{scene}/{scene}_COLMAP.ply" -------------------------------------------------------------------------------- /scripts/eval_tnt/help_func.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | import torch 4 | 5 | def rotation_matrix(a, b): 6 | """Compute the rotation matrix that rotates vector a to vector b. 7 | 8 | Args: 9 | a: The vector to rotate. 10 | b: The vector to rotate to. 11 | Returns: 12 | The rotation matrix. 13 | """ 14 | a = a / torch.linalg.norm(a) 15 | b = b / torch.linalg.norm(b) 16 | v = torch.cross(a, b) 17 | c = torch.dot(a, b) 18 | # If vectors are exactly opposite, we add a little noise to one of them 19 | if c < -1 + 1e-8: 20 | eps = (torch.rand(3) - 0.5) * 0.01 21 | return rotation_matrix(a + eps, b) 22 | s = torch.linalg.norm(v) 23 | skew_sym_mat = torch.Tensor( 24 | [ 25 | [0, -v[2], v[1]], 26 | [v[2], 0, -v[0]], 27 | [-v[1], v[0], 0], 28 | ] 29 | ) 30 | return torch.eye(3) + skew_sym_mat + skew_sym_mat @ skew_sym_mat * ((1 - c) / (s**2 + 1e-8)) 31 | 32 | 33 | def auto_orient_and_center_poses( 34 | poses, method="up", center_poses=True 35 | ): 36 | """Orients and centers the poses. We provide two methods for orientation: pca and up. 37 | 38 | pca: Orient the poses so that the principal component of the points is aligned with the axes. 39 | This method works well when all of the cameras are in the same plane. 40 | up: Orient the poses so that the average up vector is aligned with the z axis. 41 | This method works well when images are not at arbitrary angles. 42 | 43 | 44 | Args: 45 | poses: The poses to orient. 46 | method: The method to use for orientation. 47 | center_poses: If True, the poses are centered around the origin. 48 | 49 | Returns: 50 | The oriented poses. 51 | """ 52 | 53 | translation = poses[..., :3, 3] 54 | 55 | mean_translation = torch.mean(translation, dim=0) 56 | translation_diff = translation - mean_translation 57 | 58 | if center_poses: 59 | translation = mean_translation 60 | else: 61 | translation = torch.zeros_like(mean_translation) 62 | 63 | if method == "pca": 64 | _, eigvec = torch.linalg.eigh(translation_diff.T @ translation_diff) 65 | eigvec = torch.flip(eigvec, dims=(-1,)) 66 | 67 | if torch.linalg.det(eigvec) < 0: 68 | eigvec[:, 2] = -eigvec[:, 2] 69 | 70 | transform = torch.cat([eigvec, eigvec @ -translation[..., None]], dim=-1) 71 | oriented_poses = transform @ poses 72 | 73 | if oriented_poses.mean(axis=0)[2, 1] < 0: 74 | oriented_poses[:, 1:3] = -1 * oriented_poses[:, 1:3] 75 | elif method == "up": 76 | up = torch.mean(poses[:, :3, 1], dim=0) 77 | up = up / torch.linalg.norm(up) 78 | 79 | rotation = rotation_matrix(up, torch.Tensor([0, 0, 1])) 80 | transform = torch.cat([rotation, rotation @ -translation[..., None]], dim=-1) 81 | oriented_poses = transform @ poses 82 | elif method == "none": 83 | transform = torch.eye(4) 84 | transform[:3, 3] = -translation 85 | transform = transform[:3, :] 86 | oriented_poses = transform @ poses 87 | 88 | return oriented_poses, transform 89 | 90 | 91 | -------------------------------------------------------------------------------- /scripts/eval_tnt/plot.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # - TanksAndTemples Website Toolbox - 3 | # - http://www.tanksandtemples.org - 4 | # ---------------------------------------------------------------------------- 5 | # The MIT License (MIT) 6 | # 7 | # Copyright (c) 2017 8 | # Arno Knapitsch 9 | # Jaesik Park 10 | # Qian-Yi Zhou 11 | # Vladlen Koltun 12 | # 13 | # Permission is hereby granted, free of charge, to any person obtaining a copy 14 | # of this software and associated documentation files (the "Software"), to deal 15 | # in the Software without restriction, including without limitation the rights 16 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | # copies of the Software, and to permit persons to whom the Software is 18 | # furnished to do so, subject to the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be included in 21 | # all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | # THE SOFTWARE. 30 | # ---------------------------------------------------------------------------- 31 | # 32 | # This python script is for downloading dataset from www.tanksandtemples.org 33 | # The dataset has a different license, please refer to 34 | # https://tanksandtemples.org/license/ 35 | 36 | import matplotlib.pyplot as plt 37 | from cycler import cycler 38 | 39 | 40 | def plot_graph( 41 | scene, 42 | fscore, 43 | dist_threshold, 44 | edges_source, 45 | cum_source, 46 | edges_target, 47 | cum_target, 48 | plot_stretch, 49 | mvs_outpath, 50 | show_figure=False, 51 | ): 52 | f = plt.figure() 53 | plt_size = [14, 7] 54 | pfontsize = "medium" 55 | 56 | ax = plt.subplot(111) 57 | label_str = "precision" 58 | ax.plot( 59 | edges_source[1::], 60 | cum_source * 100, 61 | c="red", 62 | label=label_str, 63 | linewidth=2.0, 64 | ) 65 | 66 | label_str = "recall" 67 | ax.plot( 68 | edges_target[1::], 69 | cum_target * 100, 70 | c="blue", 71 | label=label_str, 72 | linewidth=2.0, 73 | ) 74 | 75 | ax.grid(True) 76 | plt.rcParams["figure.figsize"] = plt_size 77 | plt.rc("axes", prop_cycle=cycler("color", ["r", "g", "b", "y"])) 78 | plt.title("Precision and Recall: " + scene + ", " + "%02.2f f-score" % 79 | (fscore * 100)) 80 | plt.axvline(x=dist_threshold, c="black", ls="dashed", linewidth=2.0) 81 | 82 | plt.ylabel("# of points (%)", fontsize=15) 83 | plt.xlabel("Meters", fontsize=15) 84 | plt.axis([0, dist_threshold * plot_stretch, 0, 100]) 85 | ax.legend(shadow=True, fancybox=True, fontsize=pfontsize) 86 | # plt.axis([0, dist_threshold*plot_stretch, 0, 100]) 87 | 88 | plt.setp(ax.get_legend().get_texts(), fontsize=pfontsize) 89 | 90 | plt.legend(loc=2, borderaxespad=0.0, fontsize=pfontsize) 91 | plt.legend(loc=4) 92 | leg = plt.legend(loc="lower right") 93 | 94 | box = ax.get_position() 95 | ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) 96 | 97 | # Put a legend to the right of the current axis 98 | ax.legend(loc="center left", bbox_to_anchor=(1, 0.5)) 99 | plt.setp(ax.get_legend().get_texts(), fontsize=pfontsize) 100 | png_name = mvs_outpath + "/PR_{0}_@d_th_0_{1}.png".format( 101 | scene, "%04d" % (dist_threshold * 10000)) 102 | pdf_name = mvs_outpath + "/PR_{0}_@d_th_0_{1}.pdf".format( 103 | scene, "%04d" % (dist_threshold * 10000)) 104 | 105 | # save figure and display 106 | f.savefig(png_name, format="png", bbox_inches="tight") 107 | f.savefig(pdf_name, format="pdf", bbox_inches="tight") 108 | if show_figure: 109 | plt.show() 110 | -------------------------------------------------------------------------------- /scripts/eval_tnt/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=1.3 2 | open3d==0.10 3 | -------------------------------------------------------------------------------- /scripts/eval_tnt/trajectory_io.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import open3d as o3d 3 | 4 | 5 | class CameraPose: 6 | 7 | def __init__(self, meta, mat): 8 | self.metadata = meta 9 | self.pose = mat 10 | 11 | def __str__(self): 12 | return ("Metadata : " + " ".join(map(str, self.metadata)) + "\n" + 13 | "Pose : " + "\n" + np.array_str(self.pose)) 14 | 15 | 16 | def convert_trajectory_to_pointcloud(traj): 17 | pcd = o3d.geometry.PointCloud() 18 | for t in traj: 19 | pcd.points.append(t.pose[:3, 3]) 20 | return pcd 21 | 22 | 23 | def read_trajectory(filename): 24 | traj = [] 25 | with open(filename, "r") as f: 26 | metastr = f.readline() 27 | while metastr: 28 | metadata = map(int, metastr.split()) 29 | mat = np.zeros(shape=(4, 4)) 30 | for i in range(4): 31 | matstr = f.readline() 32 | mat[i, :] = np.fromstring(matstr, dtype=float, sep=" \t") 33 | traj.append(CameraPose(metadata, mat)) 34 | metastr = f.readline() 35 | return traj 36 | 37 | 38 | def write_trajectory(traj, filename): 39 | with open(filename, "w") as f: 40 | for x in traj: 41 | p = x.pose.tolist() 42 | f.write(" ".join(map(str, x.metadata)) + "\n") 43 | f.write("\n".join( 44 | " ".join(map("{0:.12f}".format, p[i])) for i in range(4))) 45 | f.write("\n") 46 | -------------------------------------------------------------------------------- /scripts/eval_tnt/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def make_dir(path): 5 | if not os.path.exists(path): 6 | os.makedirs(path) 7 | -------------------------------------------------------------------------------- /scripts/mipnerf360_run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | DATA_ROOT=data/360_v2 10 | 11 | lanuch_exp() { 12 | local scene_name="$1" 13 | shift 14 | local output_dir="$1" 15 | shift 16 | local exp_args="$*" 17 | 18 | python train.py --cfg_files cfg/mipnerf360.yaml --source_path $DATA_ROOT/$scene_name/ --model_path $output_dir/$scene_name $exp_args 19 | python render.py $output_dir/$scene_name --skip_train --eval_fps 20 | python render.py $output_dir/$scene_name --skip_train 21 | python eval.py $output_dir/$scene_name/ 22 | python render_fly_through.py $output_dir/$scene_name 23 | rm -r $output_dir/$scene_name/checkpoints/ 24 | } 25 | 26 | 27 | for scene in bonsai counter kitchen room 28 | do 29 | echo "============ start " $scene " ============" 30 | lanuch_exp $scene $1 --image_dir_name images_2 "${@:2}" 31 | echo "============ end " $scene " ============" 32 | done 33 | 34 | for scene in bicycle garden stump treehill flowers 35 | do 36 | echo "============ start " $scene " ============" 37 | lanuch_exp $scene $1 --image_dir_name images_4 "${@:2}" 38 | echo "============ end " $scene " ============" 39 | done 40 | -------------------------------------------------------------------------------- /scripts/mipnerf360_stat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import subprocess 11 | import json 12 | from argparse import ArgumentParser 13 | import glob 14 | import pandas as pd 15 | 16 | parser = ArgumentParser(description="Training script parameters") 17 | parser.add_argument('result_root') 18 | parser.add_argument('--suffix', default='') 19 | args = parser.parse_args() 20 | 21 | indoor_scenes = ['bonsai', 'counter', 'kitchen', 'room'] 22 | outdoor_scenes = ['bicycle', 'garden', 'stump', 'treehill', 'flowers'] 23 | scenes = indoor_scenes + outdoor_scenes 24 | 25 | indoor_psnr = [] 26 | indoor_ssim = [] 27 | indoor_lpips = [] 28 | indoor_lpips_corr = [] 29 | indoor_tr_time = [] 30 | indoor_fps = [] 31 | indoor_n_voxels = [] 32 | for scene in indoor_scenes: 33 | path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json')) 34 | if len(path) == 0: 35 | print(f'{scene:10s}: failed !!??') 36 | continue 37 | path = sorted(path)[-1] 38 | with open(path) as f: 39 | ret = json.load(f) 40 | tmp_psnr = ret['psnr'] 41 | indoor_tr_time.append(ret['elapsed'] / 1000) 42 | # indoor_fps.append(ret['fps']) 43 | indoor_n_voxels.append(ret['n_voxels']) 44 | n_iter = int(os.path.split(path)[1].replace('iter', '').replace('.json', '')) 45 | fps_path = f'{args.result_root}/{scene}/test/ours_{n_iter}{args.suffix}.txt' 46 | with open(fps_path) as f: 47 | fps = float(f.read().strip().split()[-1].split('=')[1]) 48 | indoor_fps.append(fps) 49 | eval_path = f'{args.result_root}/{scene}/results.json' 50 | if os.path.exists(eval_path): 51 | with open(os.path.join(eval_path)) as f: 52 | ret = json.load(f) 53 | ret = ret[f"ours_{n_iter}{args.suffix}"] 54 | indoor_psnr.append(ret['PSNR']) 55 | indoor_ssim.append(ret['SSIM']) 56 | indoor_lpips.append(ret['LPIPS']) 57 | indoor_lpips_corr.append(ret.get('LPIPS-corrected', 1)) 58 | else: 59 | indoor_psnr.append(tmp_psnr) 60 | indoor_ssim.append(0) 61 | indoor_lpips.append(0) 62 | indoor_lpips_corr.append(1) 63 | 64 | outdoor_psnr = [] 65 | outdoor_ssim = [] 66 | outdoor_lpips = [] 67 | outdoor_lpips_corr = [] 68 | outdoor_tr_time = [] 69 | outdoor_fps = [] 70 | outdoor_n_voxels = [] 71 | for scene in outdoor_scenes: 72 | path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json')) 73 | if len(path) == 0: 74 | print(f'{scene:10s}: failed !!??') 75 | continue 76 | path = sorted(path)[-1] 77 | with open(path) as f: 78 | ret = json.load(f) 79 | tmp_psnr = ret['psnr'] 80 | outdoor_tr_time.append(ret['elapsed'] / 1000) 81 | # outdoor_fps.append(ret['fps']) 82 | outdoor_n_voxels.append(ret['n_voxels']) 83 | n_iter = int(os.path.split(path)[1].replace('iter', '').replace('.json', '')) 84 | fps_path = f'{args.result_root}/{scene}/test/ours_{n_iter}{args.suffix}.txt' 85 | with open(fps_path) as f: 86 | fps = float(f.read().strip().split()[-1].split('=')[1]) 87 | outdoor_fps.append(fps) 88 | eval_path = f'{args.result_root}/{scene}/results.json' 89 | if os.path.exists(eval_path): 90 | with open(os.path.join(eval_path)) as f: 91 | ret = json.load(f) 92 | ret = ret[f"ours_{n_iter}{args.suffix}"] 93 | outdoor_psnr.append(ret['PSNR']) 94 | outdoor_ssim.append(ret['SSIM']) 95 | outdoor_lpips.append(ret['LPIPS']) 96 | outdoor_lpips_corr.append(ret.get('LPIPS-corrected', 1)) 97 | else: 98 | outdoor_psnr.append(tmp_psnr) 99 | outdoor_ssim.append(0) 100 | outdoor_lpips.append(0) 101 | outdoor_lpips_corr.append(1) 102 | 103 | 104 | 105 | def format_df_string(df): 106 | df = df.copy() 107 | df['scene'] = df['scene'].map(lambda s: s.rjust(15)) 108 | df['psnr'] = df['psnr'].round(2) 109 | df['ssim'] = df['ssim'].round(3) 110 | df['lpips'] = df['lpips'].round(3) 111 | df['lpips*'] = df['lpips*'].round(3) 112 | df['tr-mins'] = (df['tr-mins'] / 60).round(1) 113 | df['fps'] = df['fps'].round(1) 114 | df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1) 115 | return df.to_string(index=False) 116 | 117 | def add_avg_row(df): 118 | df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose() 119 | df_avg['scene'] = 'AVG' 120 | return pd.concat([df, df_avg], ignore_index=True) 121 | 122 | df_indoor = pd.DataFrame({ 123 | 'scene': indoor_scenes, 124 | 'psnr': indoor_psnr, 125 | 'ssim': indoor_ssim, 126 | 'lpips': indoor_lpips, 127 | 'lpips*': indoor_lpips_corr, 128 | 'tr-mins': indoor_tr_time, 129 | 'fps': indoor_fps, 130 | '#vox(M)': indoor_n_voxels, 131 | }) 132 | 133 | df_outdoor = pd.DataFrame({ 134 | 'scene': outdoor_scenes, 135 | 'psnr': outdoor_psnr, 136 | 'ssim': outdoor_ssim, 137 | 'lpips': outdoor_lpips, 138 | 'lpips*': outdoor_lpips_corr, 139 | 'tr-mins': outdoor_tr_time, 140 | 'fps': outdoor_fps, 141 | '#vox(M)': outdoor_n_voxels, 142 | }) 143 | 144 | df = pd.concat([df_indoor, df_outdoor], ignore_index=True) 145 | 146 | df_indoor = add_avg_row(df_indoor) 147 | df_outdoor = add_avg_row(df_outdoor) 148 | df = add_avg_row(df) 149 | 150 | print(format_df_string(df_indoor)) 151 | print() 152 | print(format_df_string(df_outdoor)) 153 | print() 154 | print(format_df_string(df)) 155 | 156 | -------------------------------------------------------------------------------- /scripts/scannetpp_preproc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import json 4 | import argparse 5 | from tqdm import tqdm 6 | 7 | 8 | def focal2fov(focal, pixels): 9 | return 2*math.atan(pixels/(2*focal)) 10 | 11 | 12 | if __name__ == '__main__': 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--indir', default='data/scannetpp_v2/data') 16 | parser.add_argument('--split_dir', default='data/scannetpp_v2/splits') 17 | 18 | parser.add_argument('--splits', default=[], nargs='*') 19 | 20 | parser.add_argument('--ids', default=[], nargs='*') 21 | # parser.add_argument('--ids', default=['08bbbdcc3d'], nargs='*') 22 | # parser.add_argument('--ids', default=['7b6477cb95', 'c50d2d1d42', 'cc5237fd77', 'acd95847c5', 'fb5a96b1a2', 'a24f64f7fb', '1ada7a0617', '5eb31827b7', '3e8bba0176', '3f15a9266d', '21d970d8de', '5748ce6f01', 'c4c04e6d6c', '7831862f02', 'bde1e479ad', '38d58a7a31', '5ee7c22ba0', 'f9f95681fd', '3864514494', '40aec5fffa', '13c3e046d7', 'e398684d27', 'a8bf42d646', '45b0dac5e3', '31a2c91c43', 'e7af285f7d', '286b55a2bf', '7bc286c1b6', 'f3685d06a9', 'b0a08200c9', '825d228aec', 'a980334473', 'f2dc06b1d2', '5942004064', '25f3b7a318', 'bcd2436daf', 'f3d64c30f8', '0d2ee665be', '3db0a1c8f3', 'ac48a9b736', 'c5439f4607', '578511c8a9', 'd755b3d9d8', '99fa5c25e1', '09c1414f1b', '5f99900f09', '9071e139d9', '6115eddb86', '27dd4da69e', 'c49a8c6cff'], nargs='*') 23 | 24 | parser.add_argument('--is_test_hidden', default=False, action='store_true') 25 | # parser.add_argument('--ids', default=['ca0e09014e', 'beb802368c', 'ebff4de90b', 'd228e2d9dd', '9e019d8be1', '11b696efba', '471cc4ba84', 'f20e7b5640', 'dfe9cbd72a', 'ccdc33dc2a', '124974734e', 'c0cbb1fea1', '047fb766c4', '7b37cccb03', '8283161f1b', 'c3e279be54', '5a14f9da39', 'cd7973d92b', '5298ec174f', 'e0e83b4ca3', '64ea6b73c2', 'f00bd5fa8a', '02a980c994', 'be91f7884d', '1c876c250f', '15155a88fb', '633f9a9f06', 'd6419f6478', 'f0b0a42ba3', 'a46b21d949', '74ff105c0d', '77596f5d2a', 'ecb5d01065', 'c9bf4c8b62', 'b074ca565a', '49c758655e', 'd4d2019f5d', '319787e6ec', '84b48f2614', 'bee11d6a41', '9a9e32c768', '9b365a9b68', '54e7ffaea3', '7d72f01865', '252652d5ba', '651dc6b4f1', '03f7a0e617', 'fe94fc30cf', 'd1b9dff904', '4bc04e0cde'], nargs='*') 26 | args = parser.parse_args() 27 | 28 | if len(args.splits) > 0: 29 | args.ids = [] 30 | for split in args.splits: 31 | with open(os.path.join(args.split_dir, f"{split}.txt")) as f: 32 | args.ids.extend(f.read().strip().split()) 33 | print(args.ids) 34 | 35 | for scene_id in tqdm(args.ids): 36 | in_scene_dir = os.path.join(args.indir, scene_id, 'dslr') 37 | out_scene_dir = os.path.join(in_scene_dir, 'svraster_inputs') 38 | 39 | os.system(f'mkdir -p {out_scene_dir}') 40 | 41 | with open(os.path.join(in_scene_dir, 'nerfstudio', 'transforms_undistorted.json')) as f: 42 | meta = json.load(f) 43 | 44 | cx_p = meta['cx'] / meta['w'] 45 | cy_p = meta['cy'] / meta['h'] 46 | camera_angle_x = focal2fov(meta['fl_x'], meta['w']) 47 | camera_angle_y = focal2fov(meta['fl_y'], meta['h']) 48 | 49 | new_metas_lst = [] 50 | for key in ['frames', 'test_frames']: 51 | new_metas_lst.append(dict( 52 | camera_angle_x=0, 53 | colmap={ 54 | 'path': '../colmap', 55 | 'transform': [ 56 | [0, 1, 0], 57 | [1, 0, 0], 58 | [0, 0, -1], 59 | ], 60 | }, 61 | frames=[])) 62 | for frame in meta[key]: 63 | new_metas_lst[-1]['frames'].append({ 64 | 'camera_angle_x': camera_angle_x, 65 | 'camera_angle_y': camera_angle_y, 66 | 'cx_p': cx_p, 67 | 'cy_p': cy_p, 68 | 'file_path': f"../undistorted_images/{frame['file_path']}", 69 | 'depth_path': f"../undistorted_depths/{frame['file_path'].replace('.JPG', '.png')}", 70 | 'transform_matrix': frame['transform_matrix'], 71 | 'is_bad': frame['is_bad'], 72 | 'heldout': args.is_test_hidden and (key == 'test_frames'), 73 | 'w': meta['w'], 74 | 'h': meta['h'], 75 | }) 76 | 77 | new_train_meta, new_test_meta = new_metas_lst 78 | 79 | with open(os.path.join(out_scene_dir, 'transforms_train.json'), 'w') as f: 80 | json.dump(new_train_meta, f, indent=2) 81 | with open(os.path.join(out_scene_dir, 'transforms_test.json'), 'w') as f: 82 | json.dump(new_test_meta, f, indent=2) 83 | -------------------------------------------------------------------------------- /scripts/scannetpp_run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | DATA_ROOT=data/scannetpp_nvs 10 | 11 | lanuch_exp() { 12 | local scene_name="$1" 13 | shift 14 | local output_dir="$1" 15 | shift 16 | local exp_args="$*" 17 | 18 | python train.py --source_path $DATA_ROOT/$scene_name --model_path $output_dir/$scene_name $exp_args 19 | python render.py $output_dir/$scene_name --skip_train --eval_fps 20 | python render.py $output_dir/$scene_name --skip_train 21 | python eval.py $output_dir/$scene_name 22 | python render_fly_through.py $output_dir/$scene_name 23 | rm -r $output_dir/$scene_name/checkpoints/ 24 | } 25 | 26 | ulimit -n 4096 # Increase maximum number of files the script can read 27 | 28 | for scene in 39f36da05b 5a269ba6fe dc263dfbf0 08bbbdcc3d 29 | do 30 | echo "============ start " $scene " ============" 31 | if [ ! -f $1/$scene/results.json ]; then 32 | # We use the source image resolution and prevent automatic downsampling. 33 | lanuch_exp $scene $1 --res_downscale 1.0 --cfg_files cfg/scannetpp.yaml "${@:2}" 34 | fi 35 | echo "============ end " $scene " ============" 36 | done 37 | -------------------------------------------------------------------------------- /scripts/scannetpp_stat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import subprocess 11 | import json 12 | from argparse import ArgumentParser 13 | import glob 14 | import pandas as pd 15 | 16 | parser = ArgumentParser(description="Training script parameters") 17 | parser.add_argument('result_root') 18 | parser.add_argument('--suffix', default='_r1.0') 19 | args = parser.parse_args() 20 | 21 | indoor_scenes = ['39f36da05b', '5a269ba6fe', 'dc263dfbf0', '08bbbdcc3d'] 22 | 23 | indoor_psnr = [] 24 | indoor_ssim = [] 25 | indoor_lpips = [] 26 | indoor_lpips_corr = [] 27 | indoor_tr_time = [] 28 | indoor_fps = [] 29 | indoor_n_voxels = [] 30 | for scene in indoor_scenes: 31 | path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json')) 32 | if len(path) == 0: 33 | print(f'{scene:10s}: failed !!??') 34 | continue 35 | path = sorted(path)[-1] 36 | with open(path) as f: 37 | ret = json.load(f) 38 | tmp_psnr = ret['psnr'] 39 | indoor_tr_time.append(ret['elapsed'] / 1000) 40 | indoor_fps.append(ret['fps']) 41 | indoor_n_voxels.append(ret['n_voxels']) 42 | n_iter = int(os.path.split(path)[1].replace('iter', '').replace('.json', '')) 43 | fps_path = f'{args.result_root}/{scene}/test/ours_{n_iter}{args.suffix}.txt' 44 | with open(fps_path) as f: 45 | fps = float(f.read().strip().split()[-1].split('=')[1]) 46 | #indoor_fps.append(fps) 47 | eval_path = f'{args.result_root}/{scene}/results.json' 48 | if os.path.exists(eval_path): 49 | with open(os.path.join(eval_path)) as f: 50 | ret = json.load(f) 51 | ret = ret[f"ours_{n_iter}{args.suffix}"] 52 | indoor_psnr.append(ret['PSNR']) 53 | indoor_ssim.append(ret['SSIM']) 54 | indoor_lpips.append(ret['LPIPS']) 55 | indoor_lpips_corr.append(ret.get('LPIPS-corrected', 1)) 56 | else: 57 | indoor_psnr.append(tmp_psnr) 58 | indoor_ssim.append(0) 59 | indoor_lpips.append(0) 60 | indoor_lpips_corr.append(1) 61 | 62 | 63 | def format_df_string(df): 64 | df = df.copy() 65 | df['scene'] = df['scene'].map(lambda s: s.rjust(15)) 66 | df['psnr'] = df['psnr'].round(2) 67 | df['ssim'] = df['ssim'].round(3) 68 | df['lpips'] = df['lpips'].round(3) 69 | df['lpips*'] = df['lpips*'].round(3) 70 | df['tr-mins'] = (df['tr-mins'] / 60).round(1) 71 | df['fps'] = df['fps'].round(1) 72 | df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1) 73 | return df.to_string(index=False) 74 | 75 | def add_avg_row(df): 76 | df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose() 77 | df_avg['scene'] = 'AVG' 78 | return pd.concat([df, df_avg], ignore_index=True) 79 | 80 | df = pd.DataFrame({ 81 | 'scene': indoor_scenes, 82 | 'psnr': indoor_psnr, 83 | 'ssim': indoor_ssim, 84 | 'lpips': indoor_lpips, 85 | 'lpips*': indoor_lpips_corr, 86 | 'tr-mins': indoor_tr_time, 87 | 'fps': indoor_fps, 88 | '#vox(M)': indoor_n_voxels, 89 | }) 90 | df = add_avg_row(df) 91 | 92 | print(format_df_string(df)) 93 | -------------------------------------------------------------------------------- /scripts/synthetic_nerf_run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | DATA_ROOT=data/nerf_synthetic 10 | 11 | lanuch_exp() { 12 | local scene_name="$1" 13 | shift 14 | local output_dir="$1" 15 | shift 16 | local exp_args="$*" 17 | 18 | python train.py --cfg_files cfg/synthetic_nerf.yaml --source_path $DATA_ROOT/$scene_name --model_path $output_dir/$scene_name $exp_args 19 | python render.py $output_dir/$scene_name --skip_train --eval_fps 20 | python render.py $output_dir/$scene_name --skip_train 21 | python eval.py $output_dir/$scene_name/ 22 | python render_fly_through.py $output_dir/$scene_name/ 23 | rm -r $output_dir/$scene_name/checkpoints/ 24 | } 25 | 26 | 27 | for scene in chair drums ficus hotdog lego materials mic ship 28 | do 29 | echo "============ start " $scene " ============" 30 | lanuch_exp $scene $1 "${@:2}" 31 | echo "============ end " $scene " ============" 32 | done 33 | -------------------------------------------------------------------------------- /scripts/synthetic_nerf_stat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import subprocess 11 | import json 12 | from argparse import ArgumentParser 13 | import glob 14 | import pandas as pd 15 | 16 | parser = ArgumentParser(description="Training script parameters") 17 | parser.add_argument('result_root') 18 | args = parser.parse_args() 19 | 20 | scenes = ['chair', 'drums', 'ficus', 'hotdog', 'lego', 'materials', 'mic', 'ship'] 21 | 22 | psnr = [] 23 | ssim = [] 24 | lpips = [] 25 | tr_time = [] 26 | fps = [] 27 | n_voxels = [] 28 | max_iter = sorted(glob.glob(f'{args.result_root}/{scenes[0]}/test_stat/iter*.json'))[-1].split('/')[-1] 29 | for scene in scenes: 30 | eval_path = f'{args.result_root}/{scene}/test_stat/{max_iter}' 31 | if os.path.exists(eval_path): 32 | with open(eval_path) as f: 33 | ret = json.load(f) 34 | psnr.append(ret['psnr']) 35 | tr_time.append(ret['elapsed'] / 1000) 36 | fps.append(ret['fps']) 37 | n_voxels.append(ret['n_voxels']) 38 | else: 39 | psnr.append(0) 40 | tr_time.append(0) 41 | fps.append(0) 42 | n_voxels.append(0) 43 | 44 | eval_path = f'{args.result_root}/{scene}/results.json' 45 | if os.path.exists(eval_path): 46 | with open(os.path.join(eval_path)) as f: 47 | ret = json.load(f) 48 | ret = ret[sorted(ret.keys())[-1]] 49 | psnr[-1] = ret['PSNR'] 50 | ssim.append(ret['SSIM']) 51 | lpips.append(ret['LPIPS']) 52 | else: 53 | ssim.append(0) 54 | lpips.append(0) 55 | 56 | 57 | 58 | def format_df_string(df): 59 | df = df.copy() 60 | df['scene'] = df['scene'].map(lambda s: s.rjust(15)) 61 | df['psnr'] = df['psnr'].round(2) 62 | df['ssim'] = df['ssim'].round(3) 63 | df['lpips'] = df['lpips'].round(3) 64 | df['tr-mins'] = (df['tr-mins'] / 60).round(1) 65 | df['fps'] = df['fps'].round(1) 66 | df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1) 67 | return df.to_string(index=False) 68 | 69 | def add_avg_row(df): 70 | df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose() 71 | df_avg['scene'] = 'AVG' 72 | return pd.concat([df, df_avg], ignore_index=True) 73 | 74 | df = pd.DataFrame({ 75 | 'scene': scenes, 76 | 'psnr': psnr, 77 | 'ssim': ssim, 78 | 'lpips': lpips, 79 | 'tr-mins': tr_time, 80 | 'fps': fps, 81 | '#vox(M)': n_voxels, 82 | }) 83 | df = add_avg_row(df) 84 | 85 | print(format_df_string(df)) 86 | -------------------------------------------------------------------------------- /scripts/tandt_db_run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | DATA_ROOT=data/tandt_db 10 | 11 | lanuch_exp() { 12 | local scene_name="$1" 13 | shift 14 | local output_dir="$1" 15 | shift 16 | local exp_args="$*" 17 | 18 | python train.py --source_path $DATA_ROOT/$scene_name/ --model_path $output_dir/$scene_name $exp_args 19 | python render.py $output_dir/$scene_name --skip_train --eval_fps 20 | python render.py $output_dir/$scene_name --skip_train 21 | python eval.py $output_dir/$scene_name/ 22 | python render_fly_through.py $output_dir/$scene_name/ 23 | rm -r $output_dir/$scene_name/checkpoints/ 24 | } 25 | 26 | 27 | for scene in train truck 28 | do 29 | echo "============ start " $scene " ============" 30 | lanuch_exp $scene $1 --res_downscale 1.0 --cfg_files cfg/mipnerf360.yaml "${@:2}" 31 | echo "============ end " $scene " ============" 32 | done 33 | 34 | for scene in drjohnson playroom 35 | do 36 | echo "============ start " $scene " ============" 37 | lanuch_exp $scene $1 --res_downscale 1.0 --cfg_files cfg/deep_blending.yaml "${@:2}" 38 | echo "============ end " $scene " ============" 39 | done 40 | -------------------------------------------------------------------------------- /scripts/tandt_db_stat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import subprocess 11 | import json 12 | from argparse import ArgumentParser 13 | import glob 14 | import pandas as pd 15 | 16 | parser = ArgumentParser(description="Training script parameters") 17 | parser.add_argument('result_root') 18 | args = parser.parse_args() 19 | 20 | tandt_scenes = ['train', 'truck'] 21 | db_scenes = ['drjohnson', 'playroom'] 22 | 23 | tandt_psnr = [] 24 | tandt_ssim = [] 25 | tandt_lpips = [] 26 | tandt_tr_time = [] 27 | tandt_fps = [] 28 | tandt_n_voxels = [] 29 | for scene in tandt_scenes: 30 | path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json')) 31 | if len(path) == 0: 32 | print(f'{scene:10s}: failed !!??') 33 | continue 34 | path = sorted(path)[-1] 35 | with open(path) as f: 36 | ret = json.load(f) 37 | tmp_psnr = ret['psnr'] 38 | tandt_tr_time.append(ret['elapsed'] / 1000) 39 | tandt_fps.append(ret['fps']) 40 | tandt_n_voxels.append(ret['n_voxels']) 41 | eval_path = f'{args.result_root}/{scene}/results.json' 42 | if os.path.exists(eval_path): 43 | with open(os.path.join(eval_path)) as f: 44 | ret = json.load(f) 45 | ret = ret[sorted(ret.keys())[-1]] 46 | tandt_psnr.append(ret['PSNR']) 47 | tandt_ssim.append(ret['SSIM']) 48 | tandt_lpips.append(ret['LPIPS']) 49 | else: 50 | tandt_psnr.append(tmp_psnr) 51 | tandt_ssim.append(0) 52 | tandt_lpips.append(0) 53 | 54 | db_psnr = [] 55 | db_ssim = [] 56 | db_lpips = [] 57 | db_tr_time = [] 58 | db_fps = [] 59 | db_n_voxels = [] 60 | for scene in db_scenes: 61 | path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json')) 62 | if len(path) == 0: 63 | print(f'{scene:10s}: failed !!??') 64 | continue 65 | path = sorted(path)[-1] 66 | with open(path) as f: 67 | ret = json.load(f) 68 | tmp_psnr = ret['psnr'] 69 | db_tr_time.append(ret['elapsed'] / 1000) 70 | db_fps.append(ret['fps']) 71 | db_n_voxels.append(ret['n_voxels']) 72 | eval_path = f'{args.result_root}/{scene}/results.json' 73 | if os.path.exists(eval_path): 74 | with open(os.path.join(eval_path)) as f: 75 | ret = json.load(f) 76 | ret = ret[sorted(ret.keys())[-1]] 77 | db_psnr.append(ret['PSNR']) 78 | db_ssim.append(ret['SSIM']) 79 | db_lpips.append(ret['LPIPS']) 80 | else: 81 | db_psnr.append(tmp_psnr) 82 | db_ssim.append(0) 83 | db_lpips.append(0) 84 | 85 | 86 | 87 | def format_df_string(df): 88 | df = df.copy() 89 | df['scene'] = df['scene'].map(lambda s: s.rjust(15)) 90 | df['psnr'] = df['psnr'].round(2) 91 | df['ssim'] = df['ssim'].round(3) 92 | df['lpips'] = df['lpips'].round(3) 93 | df['tr-mins'] = (df['tr-mins'] / 60).round(1) 94 | df['fps'] = df['fps'].round(1) 95 | df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1) 96 | return df.to_string(index=False) 97 | 98 | def add_avg_row(df): 99 | df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose() 100 | df_avg['scene'] = 'AVG' 101 | return pd.concat([df, df_avg], ignore_index=True) 102 | 103 | df_tandt = pd.DataFrame({ 104 | 'scene': tandt_scenes, 105 | 'psnr': tandt_psnr, 106 | 'ssim': tandt_ssim, 107 | 'lpips': tandt_lpips, 108 | 'tr-mins': tandt_tr_time, 109 | 'fps': tandt_fps, 110 | '#vox(M)': tandt_n_voxels, 111 | }) 112 | df_tandt = add_avg_row(df_tandt) 113 | 114 | df_db = pd.DataFrame({ 115 | 'scene': db_scenes, 116 | 'psnr': db_psnr, 117 | 'ssim': db_ssim, 118 | 'lpips': db_lpips, 119 | 'tr-mins': db_tr_time, 120 | 'fps': db_fps, 121 | '#vox(M)': db_n_voxels, 122 | }) 123 | df_db = add_avg_row(df_db) 124 | 125 | print(format_df_string(df_tandt)) 126 | print() 127 | print(format_df_string(df_db)) 128 | -------------------------------------------------------------------------------- /scripts/tnt_run.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | PATH_TO_OFFICIAL_TNT="data/TnT" 10 | PATH_TO_PREPROC_TNT="data/TnT/TNT_GOF" 11 | 12 | lanuch_exp() { 13 | local scene_name="$1" 14 | shift 15 | local output_dir="$1" 16 | shift 17 | local exp_args="$*" 18 | 19 | python train.py --cfg_files cfg/tnt_mesh.yaml --source_path $PATH_TO_PREPROC_TNT/TrainingSet/$scene_name/ --model_path $output_dir/$scene_name $exp_args 20 | python render.py $output_dir/$scene_name --skip_test --eval_fps 21 | python render.py $output_dir/$scene_name --skip_test --rgb_only --use_jpg 22 | python render_fly_through.py $output_dir/$scene_name/ 23 | python extract_mesh.py $output_dir/$scene_name/ --save_gpu --bbox_path $PATH_TO_OFFICIAL_TNT/$scene_name/"$scene_name"_mesh_bbox.txt --use_vert_color --final_lv 11 --adaptive --mesh_fname mesh_svr 24 | python scripts/eval_tnt/run.py --dataset-dir $PATH_TO_OFFICIAL_TNT/$scene_name/ --traj-path $PATH_TO_PREPROC_TNT/TrainingSet/$scene_name/"$scene_name"_COLMAP_SfM.log --ply-path $output_dir/$scene_name/mesh/latest/mesh_svr.ply 25 | rm -r $output_dir/$scene_name/checkpoints/ 26 | } 27 | 28 | ulimit -n 2048 # Increase maximum number of files the script can read 29 | 30 | for scene in Barn Caterpillar Ignatius Truck Meetingroom Courthouse 31 | do 32 | echo "============ start " $scene " ============" 33 | lanuch_exp $scene $1 "${@:2}" 34 | echo "============ end " $scene " ============" 35 | done 36 | -------------------------------------------------------------------------------- /scripts/tnt_stat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import subprocess 11 | import json 12 | from argparse import ArgumentParser 13 | import glob 14 | import pandas as pd 15 | 16 | parser = ArgumentParser(description="Training script parameters") 17 | parser.add_argument('result_root') 18 | parser.add_argument('--suffix', default='_r2.0') 19 | args = parser.parse_args() 20 | 21 | all_scenes = ['Barn', 'Caterpillar', 'Courthouse', 'Ignatius', 'Meetingroom', 'Truck'] 22 | 23 | all_fscore = [] 24 | all_precision = [] 25 | all_recall = [] 26 | all_tr_time = [] 27 | all_fps = [] 28 | all_n_voxels = [] 29 | for scene in all_scenes: 30 | path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json')) 31 | if len(path) == 0: 32 | print(f'{scene:10s}: failed !!??') 33 | continue 34 | path = sorted(path)[-1] 35 | with open(path) as f: 36 | ret = json.load(f) 37 | all_tr_time.append(ret['elapsed'] / 1000) 38 | # all_fps.append(ret['fps']) 39 | all_n_voxels.append(ret['n_voxels']) 40 | n_iter = int(os.path.split(path)[1].replace('iter', '').replace('.json', '')) 41 | fps_path = f'{args.result_root}/{scene}/train/ours_{n_iter}{args.suffix}.txt' 42 | with open(fps_path) as f: 43 | fps = float(f.read().strip().split()[-1].split('=')[1]) 44 | all_fps.append(fps) 45 | eval_path = f'{args.result_root}/{scene}/mesh/latest/evaluation/result.json' 46 | if os.path.exists(eval_path): 47 | with open(os.path.join(eval_path)) as f: 48 | ret = json.load(f) 49 | all_fscore.append(ret['f-score']) 50 | all_precision.append(ret['precision']) 51 | all_recall.append(ret['recall']) 52 | else: 53 | all_fscore.append(0) 54 | all_precision.append(0) 55 | all_recall.append(0) 56 | 57 | 58 | 59 | def format_df_string(df): 60 | df = df.copy() 61 | df['scene'] = df['scene'].map(lambda s: s.rjust(15)) 62 | df['f-score'] = df['f-score'].round(2) 63 | df['prec.'] = df['prec.'].round(2) 64 | df['recall'] = df['recall'].round(2) 65 | df['tr. mins'] = (df['tr. mins'] / 60).round(1) 66 | df['fps'] = df['fps'].round(1) 67 | df['#vox (M)'] = (df['#vox (M)'] / 1_000_000).round(1) 68 | return df.to_string() 69 | 70 | def add_avg_row(df): 71 | df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose() 72 | df_avg['scene'] = 'AVG' 73 | return pd.concat([df, df_avg], ignore_index=True) 74 | 75 | df = pd.DataFrame({ 76 | 'scene': all_scenes, 77 | 'f-score': all_fscore, 78 | 'prec.': all_precision, 79 | 'recall': all_recall, 80 | 'tr. mins': all_tr_time, 81 | 'fps': all_fps, 82 | '#vox (M)': all_n_voxels, 83 | }) 84 | df = add_avg_row(df) 85 | 86 | print(format_df_string(df)) 87 | -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import argparse 10 | from yacs.config import CfgNode 11 | 12 | 13 | cfg = CfgNode() 14 | 15 | cfg.model = CfgNode(dict( 16 | n_samp_per_vox = 1, # Number of sampled points per visited voxel 17 | sh_degree = 3, # Use 3 * (k+1)^2 params per voxels for view-dependent colors 18 | ss = 1.5, # Super-sampling rates for anti-aliasing 19 | white_background = False, # Assum white background 20 | black_background = False, # Assum black background 21 | )) 22 | 23 | cfg.data = CfgNode(dict( 24 | source_path = "", 25 | image_dir_name = "images", 26 | res_downscale = 0., 27 | res_width = 0, 28 | skip_blend_alpha = False, 29 | data_device = "cpu", 30 | eval = False, 31 | test_every = 8, 32 | )) 33 | 34 | cfg.bounding = CfgNode(dict( 35 | # Define the main (inside) region bounding box 36 | # The default use the suggested bounding if given by dataset. 37 | # Otherwise, it automatically chose from forward or camera_median modes. 38 | # See src/utils/bounding_utils.py for details. 39 | 40 | # default | camera_median | camera_max | forward | pcd 41 | bound_mode = "default", 42 | bound_scale = 1.0, # Scaling factor of the bound 43 | forward_dist_scale = 1.0, # For forward mode 44 | pcd_density_rate = 0.1, # For pcd mode 45 | 46 | # Number of Octree level outside the main foreground region 47 | outside_level = 5, 48 | )) 49 | 50 | cfg.optimizer = CfgNode(dict( 51 | geo_lr = 0.025, 52 | sh0_lr = 0.010, 53 | shs_lr = 0.00025, 54 | 55 | optim_beta1 = 0.1, 56 | optim_beta2 = 0.99, 57 | optim_eps = 1e-15, 58 | 59 | lr_decay_ckpt = [19000], 60 | lr_decay_mult = 0.1, 61 | )) 62 | 63 | cfg.regularizer = CfgNode(dict( 64 | # Main photometric loss 65 | lambda_photo = 1.0, 66 | use_l1 = False, 67 | use_huber = False, 68 | huber_thres = 0.03, 69 | 70 | # SSIM loss 71 | lambda_ssim = 0.02, 72 | 73 | # Sparse depth loss 74 | lambda_sparse_depth = 0.0, 75 | sparse_depth_until = 10_000, 76 | 77 | # Mask loss 78 | lambda_mask = 0.0, 79 | 80 | # Depthanything loss 81 | lambda_depthanythingv2 = 0.0, 82 | depthanythingv2_from = 3000, 83 | depthanythingv2_end = 20000, 84 | depthanythingv2_end_mult = 0.1, 85 | 86 | # Mast3r metrid loss 87 | lambda_mast3r_metric_depth = 0.0, 88 | mast3r_repo_path = '', 89 | mast3r_metric_depth_from = 0, 90 | mast3r_metric_depth_end = 20000, 91 | mast3r_metric_depth_end_mult = 0.01, 92 | 93 | # Final transmittance should concentrate to either 0 or 1 94 | lambda_T_concen = 0.0, 95 | 96 | # Final transmittance should be 0 97 | lambda_T_inside = 0.0, 98 | 99 | # Per-point rgb loss 100 | lambda_R_concen = 0.01, 101 | 102 | # Geometric regularization 103 | lambda_ascending = 0.0, 104 | ascending_from = 0, 105 | 106 | # Distortion loss (encourage distribution concentration on ray) 107 | lambda_dist = 0.1, 108 | dist_from = 10000, 109 | 110 | # Consistency loss of rendered normal and derived normal from expected depth 111 | lambda_normal_dmean = 0.0, 112 | n_dmean_from = 10_000, 113 | n_dmean_end = 20_000, 114 | n_dmean_ks = 3, 115 | n_dmean_tol_deg = 90.0, 116 | 117 | # Consistency loss of rendered normal and derived normal from median depth 118 | lambda_normal_dmed = 0.0, 119 | n_dmed_from=3000, 120 | n_dmed_end=20_000, 121 | 122 | # Total variation loss of density grid 123 | lambda_tv_density = 1e-10, 124 | tv_from = 0, 125 | tv_until = 10000, 126 | 127 | # Data augmentation 128 | ss_aug_max = 1.5, 129 | rand_bg = False, 130 | )) 131 | 132 | cfg.init = CfgNode(dict( 133 | # Voxel property initialization 134 | geo_init = -10.0, 135 | sh0_init = 0.5, 136 | shs_init = 0.0, 137 | 138 | sh_degree_init = 3, 139 | 140 | # Init main inside region by dense voxels 141 | init_n_level = 6, # (2^6)^3 voxels 142 | 143 | # Number of voxel ratio for outside (background region) 144 | init_out_ratio = 2.0, 145 | )) 146 | 147 | cfg.procedure = CfgNode(dict( 148 | # Schedule 149 | n_iter = 20_000, 150 | sche_mult = 1.0, 151 | seed=3721, 152 | 153 | # Reset sh 154 | reset_sh_ckpt = [-1], 155 | 156 | # Adaptive general setup 157 | adapt_from = 1000, 158 | adapt_every = 1000, 159 | 160 | # Adaptive voxel pruning 161 | prune_until = 18000, 162 | prune_thres_init = 0.0001, 163 | prune_thres_final = 0.05, 164 | 165 | # Adaptive voxel pruning 166 | subdivide_until = 15000, 167 | subdivide_all_until = 0, 168 | subdivide_samp_thres = 1.0, # A voxel max sampling rate should larger than this. 169 | subdivide_prop = 0.05, 170 | subdivide_max_num = 10_000_000, 171 | )) 172 | 173 | cfg.auto_exposure = CfgNode(dict( 174 | enable = False, 175 | auto_exposure_upd_ckpt = [5000, 10000, 15000] 176 | )) 177 | 178 | for i_cfg in cfg.values(): 179 | i_cfg.set_new_allowed(True) 180 | 181 | 182 | def everytype2bool(v): 183 | if v.isnumeric(): 184 | return bool(int(v)) 185 | v = v.lower() 186 | if v in ['n', 'no', 'none', 'false']: 187 | return False 188 | return True 189 | 190 | 191 | def update_argparser(parser): 192 | for name in cfg.keys(): 193 | group = parser.add_argument_group(name) 194 | for key, value in getattr(cfg, name).items(): 195 | t = type(value) 196 | 197 | if t == bool: 198 | group.add_argument(f"--{key}", action='store_true' if t else 'store_false') 199 | elif t == list: 200 | group.add_argument(f"--{key}", default=value, type=type(value[0]), nargs="*") 201 | elif t == tuple: 202 | group.add_argument(f"--{key}", default=value, type=type(value[0]), nargs=len(value)) 203 | else: 204 | group.add_argument(f"--{key}", default=value, type=t) 205 | 206 | 207 | def update_config(cfg_files, cmd_lst=[]): 208 | # Update from config files 209 | if isinstance(cfg_files, str): 210 | cfg_files = [cfg_files] 211 | for cfg_path in cfg_files: 212 | cfg.merge_from_file(cfg_path) 213 | 214 | if len(cmd_lst) == 0: 215 | return 216 | 217 | # Parse the arguments from command line 218 | internal_parser = argparse.ArgumentParser() 219 | update_argparser(internal_parser) 220 | internal_args = internal_parser.parse_args(cmd_lst) 221 | 222 | # Update from command line args 223 | for name in cfg.keys(): 224 | cfg_subgroup = getattr(cfg, name) 225 | for key in cfg_subgroup.keys(): 226 | arg_val = getattr(internal_args, key) 227 | # Check if the default values is updated 228 | if internal_parser.get_default(key) != arg_val: 229 | cfg_subgroup[key] = arg_val 230 | -------------------------------------------------------------------------------- /src/dataloader/data_pack.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import random 11 | import numpy as np 12 | 13 | import torch 14 | 15 | from src.dataloader.reader_colmap_dataset import read_colmap_dataset 16 | from src.dataloader.reader_nerf_dataset import read_nerf_dataset 17 | 18 | from src.cameras import Camera, MiniCam 19 | 20 | 21 | class DataPack: 22 | 23 | def __init__(self, 24 | source_path, 25 | image_dir_name="images", 26 | res_downscale=0., 27 | res_width=0, 28 | skip_blend_alpha=False, 29 | alpha_is_white=False, 30 | data_device="cpu", 31 | use_test=False, 32 | test_every=8, 33 | camera_params_only=False): 34 | 35 | camera_creator = CameraCreator( 36 | res_downscale=res_downscale, 37 | res_width=res_width, 38 | skip_blend_alpha=skip_blend_alpha, 39 | alpha_is_white=alpha_is_white, 40 | data_device=data_device, 41 | camera_params_only=camera_params_only, 42 | ) 43 | 44 | sparse_path = os.path.join(source_path, "sparse") 45 | colmap_path = os.path.join(source_path, "colmap", "sparse") 46 | meta_path1 = os.path.join(source_path, "transforms_train.json") 47 | meta_path2 = os.path.join(source_path, "transforms.json") 48 | 49 | # TODO: read camera by multithreading 50 | 51 | if os.path.exists(sparse_path) or os.path.exists(colmap_path): 52 | print("Read dataset in COLMAP format.") 53 | dataset = read_colmap_dataset( 54 | source_path=source_path, 55 | image_dir_name=image_dir_name, 56 | use_test=use_test, 57 | test_every=test_every, 58 | camera_creator=camera_creator) 59 | elif os.path.exists(meta_path1) or os.path.exists(meta_path2): 60 | print("Read dataset in NeRF format.") 61 | dataset = read_nerf_dataset( 62 | source_path=source_path, 63 | use_test=use_test, 64 | test_every=test_every, 65 | camera_creator=camera_creator) 66 | else: 67 | raise Exception("Unknown scene type!") 68 | 69 | self._cameras = { 70 | 'train': dataset['train_cam_lst'], 71 | 'test': dataset['test_cam_lst'], 72 | } 73 | 74 | ############################## 75 | # Read additional dataset info 76 | ############################## 77 | # If the dataset suggested a scene bound 78 | self.suggested_bounding = dataset.get('suggested_bounding', None) 79 | 80 | # If the dataset provide a transformation to other coordinate 81 | self.to_world_matrix = None 82 | to_world_path = os.path.join(source_path, 'to_world_matrix.txt') 83 | if os.path.isfile(to_world_path): 84 | self.to_world_matrix = np.loadtxt(to_world_path) 85 | 86 | # If the dataset has a point cloud 87 | self.point_cloud = dataset.get('point_cloud', None) 88 | 89 | def get_train_cameras(self): 90 | return self._cameras['train'] 91 | 92 | def get_test_cameras(self): 93 | return self._cameras['test'] 94 | 95 | 96 | # Create a random sequence of image indices 97 | def compute_iter_idx(num_data, num_iter): 98 | tr_iter_idx = [] 99 | while len(tr_iter_idx) < num_iter: 100 | lst = list(range(num_data)) 101 | random.shuffle(lst) 102 | tr_iter_idx.extend(lst) 103 | return tr_iter_idx[:num_iter] 104 | 105 | 106 | # Function that create Camera instances while parsing dataset 107 | class CameraCreator: 108 | 109 | warned = False 110 | 111 | def __init__(self, 112 | res_downscale=0., 113 | res_width=0, 114 | skip_blend_alpha=False, 115 | alpha_is_white=False, 116 | data_device="cpu", 117 | camera_params_only=False): 118 | 119 | self.res_downscale = res_downscale 120 | self.res_width = res_width 121 | self.skip_blend_alpha = skip_blend_alpha 122 | self.alpha_is_white = alpha_is_white 123 | self.data_device = data_device 124 | self.camera_params_only = camera_params_only 125 | 126 | def __call__(self, 127 | image, 128 | w2c, 129 | fovx, 130 | fovy, 131 | cx_p=0.5, 132 | cy_p=0.5, 133 | sparse_pt=None, 134 | image_name=""): 135 | 136 | if self.camera_params_only: 137 | return MiniCam( 138 | c2w=np.linalg.inv(w2c), 139 | fovx=fovx, fovy=fovy, 140 | cx_p=cx_p, cy_p=cy_p, 141 | width=image.size[0], 142 | height=image.size[1], 143 | image_name=image_name) 144 | 145 | # Determine target resolution 146 | if self.res_downscale > 0: 147 | downscale = self.res_downscale 148 | elif self.res_width > 0: 149 | downscale = image.size[0] / self.res_width 150 | else: 151 | downscale = 1 152 | 153 | total_pix = image.size[0] * image.size[1] 154 | if total_pix > 1200 ** 2 and not self.warned: 155 | self.warned = True 156 | suggest_ds = (total_pix ** 0.5) / 1200 157 | print(f"###################################################################") 158 | print(f"Image too large. Suggest to use `--res_downscale {suggest_ds:.1f}`.") 159 | print(f"###################################################################") 160 | 161 | # Resize image if needed 162 | if downscale != 1: 163 | image = image.resize(round(image.size[0] / downscale), round(image.size[1] / downscale)) 164 | 165 | # Convert image to tensor 166 | tensor = torch.tensor(np.array(image), dtype=torch.float32).moveaxis(-1, 0) / 255.0 167 | if tensor.shape[0] == 4: 168 | # Blend alpha channel 169 | tensor, mask = tensor.split([3, 1], dim=0) 170 | if not self.skip_blend_alpha: 171 | tensor = tensor * mask + int(self.alpha_is_white) * (1 - mask) 172 | 173 | return Camera( 174 | w2c=w2c, 175 | fovx=fovx, fovy=fovy, 176 | cx_p=cx_p, cy_p=cy_p, 177 | image=tensor, 178 | sparse_pt=sparse_pt, 179 | image_name=image_name) 180 | -------------------------------------------------------------------------------- /src/dataloader/reader_colmap_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import json 11 | import natsort 12 | import pycolmap 13 | import numpy as np 14 | from PIL import Image 15 | from pathlib import Path 16 | 17 | from src.utils.colmap_utils import parse_colmap_pts 18 | from src.utils.camera_utils import focal2fov 19 | 20 | 21 | def read_colmap_dataset(source_path, image_dir_name, use_test, test_every, camera_creator): 22 | 23 | source_path = Path(source_path) 24 | 25 | # Parse colmap meta data 26 | sparse_path = source_path / "sparse" / "0" 27 | if not sparse_path.exists(): 28 | sparse_path = source_path / "colmap" / "sparse" / "0" 29 | if not sparse_path.exists(): 30 | raise Exception("Can not find COLMAP reconstruction.") 31 | 32 | sfm = pycolmap.Reconstruction(sparse_path) 33 | point_cloud = parse_colmap_pts(sfm) 34 | correspondent = point_cloud.corr 35 | 36 | # Sort key by filename 37 | keys = natsort.natsorted( 38 | sfm.images.keys(), 39 | key = lambda k : sfm.images[k].name) 40 | 41 | # Load all images and cameras 42 | cam_lst = [] 43 | for key in keys: 44 | 45 | frame = sfm.images[key] 46 | 47 | # Load image 48 | image_path = source_path / image_dir_name / frame.name 49 | if not image_path.exists(): 50 | image_path = image_path.with_suffix('.png') 51 | if not image_path.exists(): 52 | image_path = image_path.with_suffix('.jpg') 53 | if not image_path.exists(): 54 | image_path = image_path.with_suffix('.JPG') 55 | if not image_path.exists(): 56 | raise Exception(f"File not found: {str(image_path)}") 57 | image = Image.open(image_path) 58 | 59 | # Load camera intrinsic 60 | if frame.camera.model.name == "SIMPLE_PINHOLE": 61 | focal_x, cx, cy = frame.camera.params 62 | fovx = focal2fov(focal_x, frame.camera.width) 63 | fovy = focal2fov(focal_x, frame.camera.height) 64 | cx_p = cx / frame.camera.width 65 | cy_p = cy / frame.camera.height 66 | elif frame.camera.model.name == "PINHOLE": 67 | focal_x, focal_y, cx, cy = frame.camera.params 68 | fovx = focal2fov(focal_x, frame.camera.width) 69 | fovy = focal2fov(focal_y, frame.camera.height) 70 | cx_p = cx / frame.camera.width 71 | cy_p = cy / frame.camera.height 72 | else: 73 | assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!" 74 | 75 | # Load camera extrinsic 76 | w2c = np.eye(4, dtype=np.float32) 77 | w2c[:3] = frame.cam_from_world.matrix() 78 | 79 | # Load sparse point 80 | sparse_pt = point_cloud.points[correspondent[frame.name]] 81 | 82 | cam_lst.append(camera_creator( 83 | image=image, 84 | w2c=w2c, 85 | fovx=fovx, 86 | fovy=fovy, 87 | cx_p=cx_p, 88 | cy_p=cy_p, 89 | sparse_pt=sparse_pt, 90 | image_name=image_path.name, 91 | )) 92 | 93 | # Split train/test 94 | if use_test: 95 | train_cam_lst = [ 96 | cam for i, cam in enumerate(cam_lst) 97 | if i % test_every != 0] 98 | test_cam_lst = [ 99 | cam for i, cam in enumerate(cam_lst) 100 | if i % test_every == 0] 101 | else: 102 | train_cam_lst = cam_lst 103 | test_cam_lst = [] 104 | 105 | # Parse main scene bound if there is 106 | nerf_normalization_path = os.path.join(source_path, "nerf_normalization.json") 107 | if os.path.isfile(nerf_normalization_path): 108 | with open(nerf_normalization_path) as f: 109 | nerf_normalization = json.load(f) 110 | suggested_center = np.array(nerf_normalization["center"], dtype=np.float32) 111 | suggested_radius = np.array(nerf_normalization["radius"], dtype=np.float32) 112 | suggested_bounding = np.stack([ 113 | suggested_center - suggested_radius, 114 | suggested_center + suggested_radius, 115 | ]) 116 | else: 117 | suggested_bounding = None 118 | 119 | # Pack dataset 120 | dataset = { 121 | 'train_cam_lst': train_cam_lst, 122 | 'test_cam_lst': test_cam_lst, 123 | 'suggested_bounding': suggested_bounding, 124 | 'point_cloud': point_cloud, 125 | } 126 | return dataset 127 | -------------------------------------------------------------------------------- /src/dataloader/reader_nerf_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import json 11 | import pycolmap 12 | import numpy as np 13 | from PIL import Image 14 | from pathlib import Path 15 | 16 | from src.utils.colmap_utils import parse_colmap_pts 17 | from src.utils.camera_utils import fov2focal, focal2fov 18 | 19 | 20 | def read_nerf_dataset(source_path, test_every, use_test, camera_creator): 21 | 22 | source_path = Path(source_path) 23 | 24 | # Load training cameras 25 | if (source_path / "transforms_train.json").exists(): 26 | train_cam_lst, point_cloud = read_cameras_from_json( 27 | source_path=source_path, 28 | meta_fname="transforms_train.json", 29 | camera_creator=camera_creator) 30 | else: 31 | train_cam_lst, point_cloud = read_cameras_from_json( 32 | source_path=source_path, 33 | meta_fname="transforms.json", 34 | camera_creator=camera_creator) 35 | 36 | # Load testing cameras 37 | if (source_path / "transforms_test.json").exists(): 38 | test_cam_lst, _ = read_cameras_from_json( 39 | source_path=source_path, 40 | meta_fname="transforms_test.json", 41 | camera_creator=camera_creator) 42 | elif use_test: 43 | test_cam_lst = [ 44 | cam for i, cam in enumerate(train_cam_lst) 45 | if i % test_every == 0] 46 | train_cam_lst = [ 47 | cam for i, cam in enumerate(train_cam_lst) 48 | if i % test_every != 0] 49 | else: 50 | test_cam_lst = [] 51 | 52 | # Parse main scene bound if there is 53 | nerf_normalization_path = os.path.join(source_path, "nerf_normalization.json") 54 | if os.path.isfile(nerf_normalization_path): 55 | with open(nerf_normalization_path) as f: 56 | nerf_normalization = json.load(f) 57 | suggested_center = np.array(nerf_normalization["center"], dtype=np.float32) 58 | suggested_radius = np.array(nerf_normalization["radius"], dtype=np.float32) 59 | suggested_bounding = np.stack([ 60 | suggested_center - suggested_radius, 61 | suggested_center + suggested_radius, 62 | ]) 63 | else: 64 | # Assume synthetic blender scene bound 65 | suggested_bounding = np.array([ 66 | [-1.5, -1.5, -1.5], 67 | [1.5, 1.5, 1.5], 68 | ], dtype=np.float32) 69 | 70 | # Pack dataset 71 | dataset = { 72 | 'train_cam_lst': train_cam_lst, 73 | 'test_cam_lst': test_cam_lst, 74 | 'suggested_bounding': suggested_bounding, 75 | 'point_cloud': point_cloud, 76 | } 77 | return dataset 78 | 79 | 80 | def read_cameras_from_json(source_path, meta_fname, camera_creator): 81 | 82 | with open(source_path / meta_fname) as f: 83 | meta = json.load(f) 84 | 85 | # Load COLMAP points if there is 86 | if "colmap" in meta: 87 | sfm = pycolmap.Reconstruction(source_path / meta["colmap"]["path"]) 88 | if "transform" in meta["colmap"]: 89 | transform = np.array(meta["colmap"]["transform"]) 90 | else: 91 | transform = None 92 | point_cloud = parse_colmap_pts(sfm, transform) 93 | correspondent = point_cloud.corr 94 | else: 95 | point_cloud = None 96 | correspondent = None 97 | 98 | # Load global setup 99 | global_fovx = meta.get("camera_angle_x", 0) 100 | global_fovy = meta.get("camera_angle_y", 0) 101 | global_cx_p = parse_principle_point(meta, is_cx=True) 102 | global_cy_p = parse_principle_point(meta, is_cx=False) 103 | 104 | # Load all images and cameras 105 | cam_lst = [] 106 | for frame in meta["frames"]: 107 | 108 | # Guess the rgb image path and load image 109 | path_candidates = [ 110 | source_path / frame["file_path"], 111 | source_path / (frame["file_path"] + '.png'), 112 | source_path / (frame["file_path"] + '.jpg'), 113 | source_path / (frame["file_path"] + '.JPG'), 114 | ] 115 | for image_path in path_candidates: 116 | if image_path.exists(): 117 | break 118 | 119 | if frame.get('heldout', False): 120 | image = Image.new('RGB', (frame['w'], frame['h'])) 121 | elif image_path.exists(): 122 | image = Image.open(image_path) 123 | else: 124 | raise Exception(f"File not found: {str(image_path)}") 125 | 126 | # Load camera intrinsic 127 | fovx = frame.get('camera_angle_x', global_fovx) 128 | cx_p = frame.get('cx_p', global_cx_p) 129 | cy_p = frame.get('cy_p', global_cy_p) 130 | 131 | if 'camera_angle_y' in frame: 132 | fovy = frame['camera_angle_y'] 133 | elif global_fovy > 0: 134 | fovy = global_fovy 135 | else: 136 | fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1]) 137 | 138 | # Load camera pose 139 | c2w = np.array(frame["transform_matrix"]) 140 | c2w[:3, 1:3] *= -1 # from opengl y-up-z-back to colmap y-down-z-forward 141 | w2c = np.linalg.inv(c2w).astype(np.float32) 142 | 143 | # Load sparse point 144 | if point_cloud is not None: 145 | sparse_pt = point_cloud.points[correspondent[image_path.name]] 146 | else: 147 | sparse_pt = None 148 | 149 | cam_lst.append(camera_creator( 150 | image=image, 151 | w2c=w2c, 152 | fovx=fovx, 153 | fovy=fovy, 154 | cx_p=cx_p, 155 | cy_p=cy_p, 156 | sparse_pt=sparse_pt, 157 | image_name=image_path.name, 158 | )) 159 | 160 | return cam_lst, point_cloud 161 | 162 | 163 | def parse_principle_point(info, is_cx): 164 | key = "cx" if is_cx else "cy" 165 | key_res = "w" if is_cx else "h" 166 | if f"{key}_p" in info: 167 | return info[f"{key}_p"] 168 | if key in info and key_res in info: 169 | return info[key] / info[key_res] 170 | return None 171 | -------------------------------------------------------------------------------- /src/sparse_voxel_gears/io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import re 11 | import torch 12 | 13 | from src.utils import octree_utils 14 | 15 | class SVInOut: 16 | 17 | def save(self, path, quantize=False): 18 | ''' 19 | Save the necessary attributes and parameters for reproducing rendering. 20 | ''' 21 | os.makedirs(os.path.dirname(path), exist_ok=True) 22 | state_dict = { 23 | 'active_sh_degree': self.active_sh_degree, 24 | 'scene_center': self.scene_center.data.contiguous(), 25 | 'inside_extent': self.inside_extent.data.contiguous(), 26 | 'scene_extent': self.scene_extent.data.contiguous(), 27 | 'octpath': self.octpath.data.contiguous(), 28 | 'octlevel': self.octlevel.data.contiguous(), 29 | '_geo_grid_pts': self._geo_grid_pts.data.contiguous(), 30 | '_sh0': self._sh0.data.contiguous(), 31 | '_shs': self._shs.data.contiguous(), 32 | } 33 | 34 | if quantize: 35 | quantize_state_dict(state_dict) 36 | state_dict['quantized'] = True 37 | else: 38 | state_dict['quantized'] = False 39 | 40 | for k, v in state_dict.items(): 41 | if torch.is_tensor(v): 42 | state_dict[k] = v.cpu() 43 | torch.save(state_dict, path) 44 | self.latest_save_path = path 45 | 46 | def load(self, path): 47 | ''' 48 | Load the saved models. 49 | ''' 50 | self.loaded_path = path 51 | state_dict = torch.load(path, map_location="cpu", weights_only=False) 52 | 53 | if state_dict.get('quantized', False): 54 | dequantize_state_dict(state_dict) 55 | 56 | self.active_sh_degree = state_dict['active_sh_degree'] 57 | 58 | self.scene_center = state_dict['scene_center'].cuda() 59 | self.inside_extent = state_dict['inside_extent'].cuda() 60 | self.scene_extent = state_dict['scene_extent'].cuda() 61 | 62 | self.octpath = state_dict['octpath'].cuda() 63 | self.octlevel = state_dict['octlevel'].cuda().to(torch.int8) 64 | 65 | self._geo_grid_pts = state_dict['_geo_grid_pts'].cuda().requires_grad_() 66 | self._sh0 = state_dict['_sh0'].cuda().requires_grad_() 67 | self._shs = state_dict['_shs'].cuda().requires_grad_() 68 | 69 | # Subdivision priority trackor 70 | self._subdiv_p = torch.ones( 71 | [self.num_voxels, 1], 72 | dtype=torch.float32, device="cuda").requires_grad_() 73 | 74 | def save_iteration(self, model_path, iteration, quantize=False): 75 | path = os.path.join(model_path, "checkpoints", f"iter{iteration:06d}_model.pt") 76 | self.save(path, quantize=quantize) 77 | self.latest_save_iter = iteration 78 | 79 | def load_iteration(self, model_path, iteration=-1): 80 | if iteration == -1: 81 | # Find the maximum iteration if it is -1. 82 | fnames = os.listdir(os.path.join(model_path, "checkpoints")) 83 | loaded_iter = max(int(re.sub("[^0-9]", "", fname)) for fname in fnames) 84 | else: 85 | loaded_iter = iteration 86 | 87 | path = os.path.join(model_path, "checkpoints", f"iter{loaded_iter:06d}_model.pt") 88 | self.load(path) 89 | 90 | self.loaded_iter = iteration 91 | 92 | return loaded_iter 93 | 94 | 95 | # Quantization utilities to reduce size when saving model. 96 | # It can reduce ~70% model size with minor PSNR drop. 97 | def quantize_state_dict(state_dict): 98 | state_dict['_geo_grid_pts'] = quantization(state_dict['_geo_grid_pts']) 99 | state_dict['_sh0'] = [quantization(v) for v in state_dict['_sh0'].split(1, dim=1)] 100 | state_dict['_shs'] = [quantization(v) for v in state_dict['_shs'].split(1, dim=1)] 101 | 102 | def dequantize_state_dict(state_dict): 103 | state_dict['_geo_grid_pts'] = dequantization(state_dict['_geo_grid_pts']) 104 | state_dict['_sh0'] = torch.cat( 105 | [dequantization(v) for v in state_dict['_sh0']], dim=1) 106 | state_dict['_shs'] = torch.cat( 107 | [dequantization(v) for v in state_dict['_shs']], dim=1) 108 | 109 | def quantization(src_tensor, max_iter=10): 110 | src_shape = src_tensor.shape 111 | src_vals = src_tensor.flatten().contiguous() 112 | order = src_vals.argsort() 113 | quantile_ind = (torch.linspace(0,1,257) * (len(order) - 1)).long().clamp_(0, len(order)-1) 114 | codebook = src_vals[order[quantile_ind]].contiguous() 115 | codebook[0] = -torch.inf 116 | ind = torch.searchsorted(codebook, src_vals) 117 | 118 | codebook = codebook[1:] 119 | ind = (ind - 1).clamp_(0, 255) 120 | 121 | diff_l = (src_vals - codebook[ind-1]).abs() 122 | diff_m = (src_vals - codebook[ind]).abs() 123 | ind = ind - 1 + (diff_m < diff_l) 124 | ind.clamp_(0, 255) 125 | 126 | for _ in range(max_iter): 127 | codebook = torch.zeros_like(codebook).index_reduce_( 128 | dim=0, 129 | index=ind, 130 | source=src_vals, 131 | reduce='mean', 132 | include_self=False) 133 | diff_l = (src_vals - codebook[ind-1]).abs() 134 | diff_r = (src_vals - codebook[(ind+1).clamp_max_(255)]).abs() 135 | diff_m = (src_vals - codebook[ind]).abs() 136 | upd_mask = torch.minimum(diff_l, diff_r) < diff_m 137 | if upd_mask.sum() == 0: 138 | break 139 | shift = (diff_r < diff_l) * 2 - 1 140 | ind[upd_mask] += shift[upd_mask] 141 | ind.clamp_(0, 255) 142 | 143 | codebook = torch.zeros_like(codebook).index_reduce_( 144 | dim=0, 145 | index=ind, 146 | source=src_vals, 147 | reduce='mean', 148 | include_self=False) 149 | 150 | return dict( 151 | index=ind.reshape(src_shape).to(torch.uint8), 152 | codebook=codebook, 153 | ) 154 | 155 | def dequantization(quant_dict): 156 | return quant_dict['codebook'][quant_dict['index'].long()] 157 | -------------------------------------------------------------------------------- /src/sparse_voxel_gears/pooling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import torch 10 | import svraster_cuda 11 | 12 | from src.utils import octree_utils 13 | 14 | 15 | class SVPooling: 16 | 17 | def pooling_to_level(self, max_level, octpath=None, octlevel=None): 18 | octpath = self.octpath if octpath is None else octpath 19 | octlevel = self.octlevel if octlevel is None else octlevel 20 | 21 | num_bit_to_mask = 3 * max(0, svraster_cuda.meta.MAX_NUM_LEVELS - max_level) 22 | octpath = (octpath >> num_bit_to_mask) << num_bit_to_mask 23 | octlevel = octlevel.clamp_max(max_level) 24 | octpack, invmap = torch.stack([octpath, octlevel]).unique(sorted=True, dim=1, return_inverse=True) 25 | octpath, octlevel = octpack 26 | octlevel = octlevel.to(torch.int8) 27 | 28 | vox_center, vox_size = octree_utils.octpath_decoding( 29 | octpath, octlevel, self.scene_center, self.scene_extent) 30 | 31 | return dict( 32 | invmap=invmap, 33 | octpath=octpath, 34 | octlevel=octlevel, 35 | vox_center=vox_center, 36 | vox_size=vox_size, 37 | ) 38 | 39 | def pooling_to_rate(self, cameras, max_rate, octpath=None, octlevel=None): 40 | octpath = self.octpath.clone() if octpath is None else octpath 41 | octlevel = self.octlevel.clone() if octlevel is None else octlevel 42 | invmap = torch.arange(len(octpath), device="cuda") 43 | 44 | for _ in range(svraster_cuda.meta.MAX_NUM_LEVELS): 45 | vox_center, vox_size = octree_utils.octpath_decoding(octpath, octlevel, self.scene_center, self.scene_extent) 46 | samp_rate = svraster_cuda.renderer.mark_max_samp_rate(cameras, octpath, vox_center, vox_size) 47 | pool_mask = (samp_rate < max_rate) & (octlevel.squeeze(1) > 1) 48 | if pool_mask.sum() == 0: 49 | break 50 | octlevel[pool_mask] = octlevel[pool_mask] - 1 51 | num_bit_to_mask = 3 * (svraster_cuda.meta.MAX_NUM_LEVELS - octlevel[pool_mask]) 52 | octpath[pool_mask] = octpath[pool_mask] >> num_bit_to_mask << num_bit_to_mask 53 | 54 | octpack, cur_invmap = torch.stack([octpath, octlevel]).unique(sorted=True, dim=1, return_inverse=True) 55 | octpath, octlevel = octpack 56 | octlevel = octlevel.to(torch.int8) 57 | invmap = cur_invmap[invmap] 58 | 59 | vox_center, vox_size = octree_utils.octpath_decoding( 60 | octpath, octlevel, self.scene_center, self.scene_extent) 61 | 62 | return dict( 63 | invmap=invmap, 64 | octpath=octpath, 65 | octlevel=octlevel, 66 | vox_center=vox_center, 67 | vox_size=vox_size, 68 | ) 69 | -------------------------------------------------------------------------------- /src/sparse_voxel_gears/properties.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import torch 10 | 11 | from src.utils import octree_utils 12 | from src.utils.fuser_utils import rgb_fusion 13 | from src.utils.activation_utils import rgb2shzero 14 | 15 | import svraster_cuda 16 | 17 | 18 | class SVProperties: 19 | 20 | @property 21 | def num_voxels(self): 22 | return len(self.octpath) 23 | 24 | @property 25 | def num_grid_pts(self): 26 | return len(self.grid_pts_key) 27 | 28 | @property 29 | def scene_min(self): 30 | return self.scene_center - 0.5 * self.scene_extent 31 | 32 | @property 33 | def scene_max(self): 34 | return self.scene_center + 0.5 * self.scene_extent 35 | 36 | @property 37 | def inside_min(self): 38 | return self.scene_center - 0.5 * self.inside_extent 39 | 40 | @property 41 | def inside_max(self): 42 | return self.scene_center + 0.5 * self.inside_extent 43 | 44 | @property 45 | def inside_mask(self): 46 | isin = ((self.inside_min < self.vox_center) & (self.vox_center < self.inside_max)).all(1) 47 | return isin 48 | 49 | @property 50 | def sh0(self): 51 | return self._sh0 52 | 53 | @property 54 | def shs(self): 55 | return self._shs 56 | 57 | @property 58 | def subdivision_priority(self): 59 | return self._subdiv_p.grad 60 | 61 | def reset_subdivision_priority(self): 62 | self._subdiv_p.grad = None 63 | 64 | @property 65 | def signature(self): 66 | # Signature to check if the voxel grid layout is updated 67 | return (self.num_voxels, id(self.octpath), id(self.octlevel)) 68 | 69 | def _check_derived_voxel_attr(self): 70 | # Lazy computation of inverse voxel sizes 71 | signature = self.signature 72 | need_recompute = not hasattr(self, '_check_derived_voxel_attr_signature') or \ 73 | self._check_derived_voxel_attr_signature != signature 74 | if need_recompute: 75 | self._vox_center, self._vox_size = octree_utils.octpath_decoding( 76 | self.octpath, self.octlevel, self.scene_center, self.scene_extent) 77 | self._grid_pts_key, self._vox_key = octree_utils.build_grid_pts_link(self.octpath, self.octlevel) 78 | self._check_derived_voxel_attr_signature = signature 79 | 80 | @property 81 | def vox_center(self): 82 | self._check_derived_voxel_attr() 83 | return self._vox_center 84 | 85 | @property 86 | def vox_size(self): 87 | self._check_derived_voxel_attr() 88 | return self._vox_size 89 | 90 | @property 91 | def grid_pts_key(self): 92 | self._check_derived_voxel_attr() 93 | return self._grid_pts_key 94 | 95 | @property 96 | def vox_key(self): 97 | self._check_derived_voxel_attr() 98 | return self._vox_key 99 | 100 | @property 101 | def vox_size_inv(self): 102 | # Lazy computation of inverse voxel sizes 103 | signature = self.signature 104 | need_recompute = not hasattr(self, '_vox_size_inv_signature') or \ 105 | self._vox_size_inv_signature != signature 106 | if need_recompute: 107 | self._vox_size_inv = 1 / self.vox_size 108 | self._vox_size_inv_signature = signature 109 | return self._vox_size_inv 110 | 111 | @property 112 | def grid_pts_xyz(self): 113 | # Lazy computation of grid points xyz 114 | signature = self.signature 115 | need_recompute = not hasattr(self, '_grid_pts_xyz_signature') or \ 116 | self._grid_pts_xyz_signature != signature 117 | if need_recompute: 118 | self._grid_pts_xyz = octree_utils.compute_gridpoints_xyz( 119 | self.grid_pts_key, self.scene_center, self.scene_extent) 120 | self._grid_pts_xyz_signature = signature 121 | return self._grid_pts_xyz 122 | 123 | @torch.no_grad() 124 | def reset_sh_from_cameras(self, cameras): 125 | self._sh0.data.copy_(rgb2shzero(rgb_fusion(self, cameras))) 126 | self._shs.data.zero_() 127 | 128 | def apply_tv_on_density_field(self, lambda_tv_density): 129 | if self._geo_grid_pts.grad is None: 130 | self._geo_grid_pts.grad = torch.zeros_like(self._geo_grid_pts.data) 131 | svraster_cuda.grid_loss_bw.total_variation( 132 | grid_pts=self._geo_grid_pts, 133 | vox_key=self.vox_key, 134 | weight=lambda_tv_density, 135 | vox_size_inv=self.vox_size_inv, 136 | no_tv_s=True, 137 | tv_sparse=False, 138 | grid_pts_grad=self._geo_grid_pts.grad) 139 | -------------------------------------------------------------------------------- /src/sparse_voxel_gears/renderer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import torch 10 | import svraster_cuda 11 | 12 | from src.utils.image_utils import resize_rendering 13 | 14 | class SVRenderer: 15 | 16 | def freeze_vox_geo(self): 17 | ''' 18 | Freeze grid points parameter and pre-gather them to each voxel. 19 | ''' 20 | with torch.no_grad(): 21 | self.frozen_vox_geo = svraster_cuda.renderer.GatherGeoParams.apply( 22 | self.vox_key, 23 | torch.arange(self.num_voxels, device="cuda"), 24 | self._geo_grid_pts 25 | ) 26 | self._geo_grid_pts.requires_grad = False 27 | 28 | def unfreeze_vox_geo(self): 29 | ''' 30 | Unfreeze grid points parameter. 31 | ''' 32 | del self.frozen_vox_geo 33 | self._geo_grid_pts.requires_grad = True 34 | 35 | def vox_fn(self, idx, cam_pos, color_mode=None, viewdir=None): 36 | ''' 37 | Per-frame voxel property processing. Two important operations: 38 | 1. Gather grid points parameter into each voxel. 39 | 2. Compute view-dependent color of each voxel. 40 | 41 | Input: 42 | @idx Indices for active voxel for current frame. 43 | @cam_pos Camera position. 44 | Output: 45 | @vox_params A dictionary of the pre-process voxel properties. 46 | ''' 47 | 48 | # Gather the density values at the eight corners of each voxel. 49 | # It defined a trilinear density field. 50 | # The final tensor are in shape [#vox, 8] 51 | if hasattr(self, 'frozen_vox_geo'): 52 | geos = self.frozen_vox_geo 53 | else: 54 | geos = svraster_cuda.renderer.GatherGeoParams.apply( 55 | self.vox_key, 56 | idx, 57 | self._geo_grid_pts 58 | ) 59 | 60 | # Compute voxel colors 61 | if color_mode is None or color_mode == "sh": 62 | active_sh_degree = self.active_sh_degree 63 | color_mode = "sh" 64 | elif color_mode.startswith("sh"): 65 | active_sh_degree = int(color_mode[2]) 66 | color_mode = "sh" 67 | 68 | if color_mode == "sh": 69 | rgbs = svraster_cuda.renderer.SH_eval.apply( 70 | active_sh_degree, 71 | idx, 72 | self.vox_center, 73 | cam_pos, 74 | viewdir, # Ignore above two when viewdir is not None 75 | self.sh0, 76 | self.shs, 77 | ) 78 | elif color_mode == "rand": 79 | rgbs = torch.rand([self.num_voxels, 3], dtype=torch.float32, device="cuda") 80 | elif color_mode == "dontcare": 81 | rgbs = torch.empty([self.num_voxels, 3], dtype=torch.float32, device="cuda") 82 | else: 83 | raise NotImplementedError 84 | 85 | # Pack everything 86 | vox_params = { 87 | 'geos': geos, 88 | 'rgbs': rgbs, 89 | 'subdiv_p': self._subdiv_p, # Dummy param to record subdivision priority 90 | } 91 | if vox_params['subdiv_p'] is None: 92 | vox_params['subdiv_p'] = torch.ones([self.num_voxels, 1], device="cuda") 93 | 94 | return vox_params 95 | 96 | def render( 97 | self, 98 | camera, 99 | color_mode=None, 100 | track_max_w=False, 101 | ss=None, 102 | output_depth=False, 103 | output_normal=False, 104 | output_T=False, 105 | rand_bg=False, 106 | use_auto_exposure=False, 107 | **other_opt): 108 | 109 | ################################### 110 | # Pre-processing 111 | ################################### 112 | if ss is None: 113 | ss = self.ss 114 | w_src, h_src = camera.image_width, camera.image_height 115 | w, h = round(w_src * ss), round(h_src * ss) 116 | w_ss, h_ss = w / w_src, h / h_src 117 | if ss != 1.0 and 'gt_color' in other_opt: 118 | other_opt['gt_color'] = resize_rendering(other_opt['gt_color'], size=(h, w)) 119 | 120 | n_samp_per_vox = other_opt.pop('n_samp_per_vox', self.n_samp_per_vox) 121 | 122 | ################################### 123 | # Call low-level rasterization API 124 | ################################### 125 | raster_settings = svraster_cuda.renderer.RasterSettings( 126 | color_mode=color_mode, 127 | n_samp_per_vox=n_samp_per_vox, 128 | image_width=w, 129 | image_height=h, 130 | tanfovx=camera.tanfovx, 131 | tanfovy=camera.tanfovy, 132 | cx=camera.cx * w_ss, 133 | cy=camera.cy * h_ss, 134 | w2c_matrix=camera.w2c, 135 | c2w_matrix=camera.c2w, 136 | bg_color=float(self.white_background), 137 | near=camera.near, 138 | need_depth=output_depth, 139 | need_normal=output_normal, 140 | track_max_w=track_max_w, 141 | **other_opt) 142 | color, depth, normal, T, max_w = svraster_cuda.renderer.rasterize_voxels( 143 | raster_settings, 144 | self.octpath, 145 | self.vox_center, 146 | self.vox_size, 147 | self.vox_fn) 148 | 149 | ################################### 150 | # Post-processing and pack output 151 | ################################### 152 | if rand_bg: 153 | color = color + T * torch.rand_like(color, requires_grad=False) 154 | elif not self.white_background and not self.black_background: 155 | color = color + T * color.mean((1,2), keepdim=True) 156 | 157 | if use_auto_exposure: 158 | color = camera.auto_exposure_apply(color) 159 | 160 | render_pkg = { 161 | 'color': color, 162 | 'depth': depth if output_depth else None, 163 | 'normal': normal if output_normal else None, 164 | 'T': T if output_T else None, 165 | 'max_w': max_w, 166 | } 167 | 168 | for k in ['color', 'depth', 'normal', 'T']: 169 | render_pkg[f'raw_{k}'] = render_pkg[k] 170 | 171 | # Post process super-sampling 172 | if render_pkg[k] is not None and render_pkg[k].shape[-2:] != (h_src, w_src): 173 | render_pkg[k] = resize_rendering(render_pkg[k], size=(h_src, w_src)) 174 | 175 | # Clip intensity 176 | render_pkg['color'] = render_pkg['color'].clamp(0, 1) 177 | 178 | return render_pkg 179 | -------------------------------------------------------------------------------- /src/sparse_voxel_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | from src.sparse_voxel_gears.constructor import SVConstructor 10 | from src.sparse_voxel_gears.properties import SVProperties 11 | from src.sparse_voxel_gears.renderer import SVRenderer 12 | from src.sparse_voxel_gears.adaptive import SVAdaptive 13 | from src.sparse_voxel_gears.io import SVInOut 14 | from src.sparse_voxel_gears.pooling import SVPooling 15 | 16 | 17 | class SparseVoxelModel(SVConstructor, SVProperties, SVRenderer, SVAdaptive, SVInOut, SVPooling): 18 | 19 | def __init__(self, 20 | n_samp_per_vox=1, # Number of sampled points per visited voxel 21 | sh_degree=3, # Use 3 * (k+1)^2 params per voxels for view-dependent colors 22 | ss=1.5, # Super-sampling rates for anti-aliasing 23 | white_background=False, # Assum white background 24 | black_background=False, # Assum black background 25 | ): 26 | ''' 27 | Setup of the model meta. At this point, no voxel is allocated. 28 | Use the following methods to allocate voxels and parameters. 29 | 30 | 1. `model_load` defined in `src/sparse_voxel_gears/io.py`. 31 | Load the saved models from a given path. 32 | 33 | 2. `model_init` defined in `src/sparse_voxel_gears/constructor.py`. 34 | Heuristically initial the sparse grid layout and parameters from the training datas. 35 | ''' 36 | super().__init__() 37 | 38 | self.n_samp_per_vox = n_samp_per_vox 39 | self.max_sh_degree = sh_degree 40 | self.ss = ss 41 | self.white_background = white_background 42 | self.black_background = black_background 43 | 44 | # List the variable names 45 | self.per_voxel_attr_lst = [ 46 | 'octpath', 'octlevel', 47 | '_subdiv_p', 48 | ] 49 | self.per_voxel_param_lst = [ 50 | '_sh0', '_shs', 51 | ] 52 | self.grid_pts_param_lst = [ 53 | '_geo_grid_pts', 54 | ] 55 | 56 | # To be init from model_init 57 | self.scene_center = None 58 | self.scene_extent = None 59 | self.inside_extent = None 60 | self.octpath = None 61 | self.octlevel = None 62 | self.active_sh_degree = sh_degree 63 | 64 | self._geo_grid_pts = None 65 | self._sh0 = None 66 | self._shs = None 67 | self._subdiv_p = None 68 | -------------------------------------------------------------------------------- /src/utils/activation_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import torch 10 | from svraster_cuda.meta import STEP_SZ_SCALE 11 | 12 | def softplus(x): 13 | return torch.nn.functional.softplus(x) 14 | 15 | def exp_linear_10(x): 16 | return torch.where(x > 1, x, torch.exp(x - 1)) 17 | 18 | def exp_linear_11(x): 19 | return torch.where(x > 1.1, x, torch.exp(0.909090909091 * x - 0.904689820196)) 20 | 21 | def exp_linear_20(x): 22 | return torch.where(x > 2.0, x, torch.exp(0.5 * x - 0.30685281944)) 23 | 24 | def softplus_inverse(y): 25 | return y + torch.log(-torch.expm1(-y)) 26 | 27 | def exp_linear_10_inverse(y): 28 | return torch.where(y > 1, y, torch.log(y) + 1) 29 | 30 | def exp_linear_11_inverse(y): 31 | return torch.where(y > 1.1, y, (torch.log(y) + 0.904689820196) / 0.909090909091) 32 | 33 | def exp_linear_20_inverse(x): 34 | return torch.where(y > 2.0, y, (torch.log(y) + 0.30685281944) / 0.5) 35 | 36 | def smooth_clamp_max(x, max_val): 37 | return max_val - torch.nn.functional.softplus(max_val - x) 38 | 39 | def density2alpha(density, interval): 40 | return 1 - torch.exp(-STEP_SZ_SCALE * interval * density) 41 | 42 | def alpha2density(alpha, interval): 43 | return torch.log(1 - alpha) / (-STEP_SZ_SCALE * interval) 44 | 45 | def rgb2shzero(x): 46 | return (x - 0.5) / 0.28209479177387814 47 | 48 | def shzero2rgb(x): 49 | return x * 0.28209479177387814 + 0.5 50 | -------------------------------------------------------------------------------- /src/utils/bounding_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | 11 | 12 | def decide_main_bounding(bound_mode="default", 13 | forward_dist_scale=1.0, # For "forward" mode 14 | pcd_density_rate=0.1, # For "pcd" mode 15 | bound_scale=1.0, # Scaling of the bounding 16 | tr_cams=None, # Cameras 17 | pcd=None, # Point cloud 18 | suggested_bounding=None): 19 | if bound_mode == "default" and suggested_bounding is not None: 20 | print("Use suggested bounding") 21 | center = suggested_bounding.mean(0) 22 | radius = (suggested_bounding[1] - suggested_bounding[0]) * 0.5 23 | elif bound_mode in ["camera_max", "camera_median"]: 24 | center, radius = main_scene_bound_camera_heuristic( 25 | cams=tr_cams, bound_mode=bound_mode) 26 | elif bound_mode == "forward": 27 | center, radius = main_scene_bound_forward_heuristic( 28 | cams=tr_cams, forward_dist_scale=forward_dist_scale) 29 | elif bound_mode == "pcd": 30 | center, radius = main_scene_bound_pcd_heuristic( 31 | pcd=pcd, pcd_density_rate=pcd_density_rate) 32 | elif bound_mode == "default": 33 | cam_lookats = np.stack([cam.lookat.tolist() for cam in tr_cams]) 34 | lookat_dots = (cam_lookats[:,None] * cam_lookats).sum(-1) 35 | is_forward_facing = lookat_dots.min() > 0 36 | 37 | if is_forward_facing: 38 | center, radius = main_scene_bound_forward_heuristic( 39 | cams=tr_cams, forward_dist_scale=forward_dist_scale) 40 | else: 41 | center, radius = main_scene_bound_camera_heuristic( 42 | cams=tr_cams, bound_mode="camera_median") 43 | else: 44 | raise NotImplementedError 45 | 46 | radius = radius * bound_scale 47 | 48 | bounding = np.array([ 49 | center - radius, 50 | center + radius, 51 | ], dtype=np.float32) 52 | return bounding 53 | 54 | 55 | def main_scene_bound_camera_heuristic(cams, bound_mode): 56 | print("Heuristic bounding:", bound_mode) 57 | cam_positions = np.stack([cam.position.tolist() for cam in cams]) 58 | center = cam_positions.mean(0) 59 | dists = np.linalg.norm(cam_positions - center, axis=1) 60 | if bound_mode == "camera_max": 61 | radius = np.max(dists) 62 | elif bound_mode == "camera_median": 63 | radius = np.median(dists) 64 | else: 65 | raise NotImplementedError 66 | return center, radius 67 | 68 | 69 | def main_scene_bound_forward_heuristic(cams, forward_dist_scale): 70 | print("Heuristic bounding: forward") 71 | positions = np.stack([cam.position.tolist() for cam in cams]) 72 | cam_center = positions.mean(0) 73 | cam_lookat = np.stack([cam.lookat.tolist() for cam in cams]).mean(0) 74 | cam_lookat /= np.linalg.norm(cam_lookat) 75 | cam_extent = 2 * np.linalg.norm(positions - cam_center, axis=1).max() 76 | 77 | center = cam_center + forward_dist_scale * cam_extent * cam_lookat 78 | radius = 0.8 * forward_dist_scale * cam_extent 79 | 80 | return center, radius 81 | 82 | 83 | def main_scene_bound_pcd_heuristic(pcd, pcd_density_rate): 84 | print("Heuristic bounding: pcd") 85 | center = np.median(pcd.points, axis=0) 86 | dist = np.abs(pcd.points - center).max(axis=1) 87 | dist = np.sort(dist) 88 | density = (1 + np.arange(len(dist))) * (dist > 0) / ((2 * dist) ** 3 + 1e-6) 89 | 90 | # Should cover at least 5% of the point 91 | begin_idx = round(len(density) * 0.05) 92 | 93 | # Find the radius with maximum point density 94 | max_idx = begin_idx + density[begin_idx:].argmax() 95 | 96 | # Find the smallest radius with point density equal to pcd_density_rate of maximum 97 | target_density = pcd_density_rate * density[max_idx] 98 | target_idx = max_idx + np.where(density[max_idx:] < target_density)[0][0] 99 | 100 | radius = dist[target_idx] 101 | 102 | return center, radius 103 | -------------------------------------------------------------------------------- /src/utils/camera_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import numpy as np 10 | from scipy.interpolate import make_interp_spline 11 | 12 | 13 | def fov2focal(fov, pixels): 14 | return pixels / (2 * np.tan(0.5 * fov)) 15 | 16 | def focal2fov(focal, pixels): 17 | return 2 * np.arctan(pixels / (2 * focal)) 18 | 19 | 20 | def interpolate_poses(poses, n_frame, periodic=True): 21 | 22 | assert len(poses) > 1 23 | 24 | poses = list(poses) 25 | bc_type = None 26 | 27 | if periodic: 28 | poses.append(poses[0]) 29 | bc_type = "periodic" 30 | 31 | pos_lst = np.stack([pose[:3, 3] for pose in poses]) 32 | lookat_lst = np.stack([pose[:3, 2] for pose in poses]) 33 | right_lst = np.stack([pose[:3, 0] for pose in poses]) 34 | 35 | ts = np.linspace(0, 1, len(poses)) 36 | pos_interp_f = make_interp_spline(ts, pos_lst, bc_type=bc_type) 37 | lookat_interp_f = make_interp_spline(ts, lookat_lst, bc_type=bc_type) 38 | right_interp_f = make_interp_spline(ts, right_lst, bc_type=bc_type) 39 | 40 | samps = np.linspace(0, 1, n_frame+1)[:n_frame] 41 | pos_video = pos_interp_f(samps) 42 | lookat_video = lookat_interp_f(samps) 43 | right_video = right_interp_f(samps) 44 | interp_poses = [] 45 | for i in range(n_frame): 46 | pos = pos_video[i] 47 | lookat = lookat_video[i] / np.linalg.norm(lookat_video[i]) 48 | right_ = right_video[i] / np.linalg.norm(right_video[i]) 49 | down = np.cross(lookat, right_) 50 | right = np.cross(down, lookat) 51 | c2w = np.eye(4, dtype=np.float32) 52 | c2w[:3, 0] = right 53 | c2w[:3, 1] = down 54 | c2w[:3, 2] = lookat 55 | c2w[:3, 3] = pos 56 | interp_poses.append(c2w) 57 | 58 | return interp_poses 59 | 60 | 61 | def gen_circular_poses(radius, 62 | n_frame, 63 | starting=1.5 * np.pi, # Starting from -z 64 | ): 65 | poses = [] 66 | for rad in np.linspace(starting, starting + 2 * np.pi, n_frame): 67 | pos = radius * np.array([np.cos(rad), 0, np.sin(rad)]) 68 | lookat = -pos / np.linalg.norm(pos) 69 | down = np.array([0, 1, 0]) 70 | right = np.cross(down, lookat) 71 | right = right / np.linalg.norm(right) 72 | down = np.cross(lookat, right) 73 | c2w = np.eye(4, dtype=np.float32) 74 | c2w[:3, 0] = right 75 | c2w[:3, 1] = down 76 | c2w[:3, 2] = lookat 77 | c2w[:3, 3] = pos 78 | poses.append(c2w) 79 | return poses 80 | -------------------------------------------------------------------------------- /src/utils/colmap_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import pycolmap 10 | import numpy as np 11 | 12 | from typing import NamedTuple 13 | 14 | 15 | class PointCloud(NamedTuple): 16 | points: np.array 17 | colors: np.array 18 | errors: np.array 19 | corr: dict 20 | 21 | 22 | def parse_colmap_pts(sfm: pycolmap.Reconstruction, transform: np.array =None): 23 | """ 24 | Parse COLMAP points and correspondents. 25 | 26 | Input: 27 | @sfm Reconstruction from COLMAP. 28 | @transform 3x3 matrix to transform xyz. 29 | Output: 30 | @xyz Nx3 point positions. 31 | @rgb Nx3 point colors. 32 | @err N errors. 33 | @corr Dictionary from file name to point indices. 34 | """ 35 | 36 | xyz = [] 37 | rgb = [] 38 | err = [] 39 | points_id = [] 40 | for k, v in sfm.points3D.items(): 41 | points_id.append(k) 42 | xyz.append(v.xyz) 43 | rgb.append(v.color) 44 | err.append(v.error) 45 | if transform is not None: 46 | xyz[-1] = transform @ xyz[-1] 47 | 48 | xyz = np.array(xyz) 49 | rgb = np.array(rgb) 50 | err = np.array(err) 51 | points_id = np.array(points_id) 52 | 53 | points_idmap = np.full([points_id.max()+2], -1, dtype=np.int64) 54 | points_idmap[points_id] = np.arange(len(xyz)) 55 | 56 | corr = {} 57 | for image in sfm.images.values(): 58 | idx = np.array([p.point3D_id for p in image.points2D if p.has_point3D()]) 59 | corr[image.name] = points_idmap[idx] 60 | assert corr[image.name].min() >= 0 and corr[image.name].max() < len(xyz) 61 | 62 | return PointCloud(points=xyz, colors=rgb, errors=err, corr=corr) 63 | -------------------------------------------------------------------------------- /src/utils/fuser_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | ''' 10 | Reference: KinectFusion algorithm. 11 | ''' 12 | 13 | import numpy as np 14 | 15 | import torch 16 | 17 | 18 | class Fuser: 19 | def __init__(self, 20 | xyz, 21 | bandwidth, 22 | use_trunc=True, 23 | fuse_tsdf=True, 24 | feat_dim=0, 25 | alpha_thres=0.5, 26 | crop_border=0.0, 27 | normal_weight=False, 28 | depth_weight=False, 29 | border_weight=False, 30 | max_norm_dist=10., 31 | use_half=False): 32 | assert len(xyz.shape) == 2 33 | assert xyz.shape[1] == 3 34 | self.xyz = xyz 35 | self.bandwidth = bandwidth 36 | self.use_trunc = use_trunc 37 | self.fuse_tsdf = fuse_tsdf 38 | self.feat_dim = feat_dim 39 | self.alpha_thres = alpha_thres 40 | self.crop_border = crop_border 41 | self.normal_weight = normal_weight 42 | self.depth_weight = depth_weight 43 | self.border_weight = border_weight 44 | self.max_norm_dist = max_norm_dist 45 | 46 | self.dtype = torch.float16 if use_half else torch.float32 47 | self.weight = torch.zeros([len(xyz), 1], dtype=self.dtype, device="cuda") 48 | self.feat = torch.zeros([len(xyz), feat_dim], dtype=self.dtype, device="cuda") 49 | if self.fuse_tsdf: 50 | self.sd_val = torch.zeros([len(xyz), 1], dtype=self.dtype, device="cuda") 51 | else: 52 | self.sd_val = None 53 | 54 | def integrate(self, cam, depth, feat=None, alpha=None): 55 | # Project grid points to image 56 | xyz_uv = cam.project(self.xyz) 57 | 58 | # Filter points projected outside 59 | filter_idx = torch.where((xyz_uv.abs() <= 1-self.crop_border).all(-1))[0] 60 | valid_idx = filter_idx 61 | valid_xyz = self.xyz[filter_idx] 62 | valid_uv = xyz_uv[filter_idx] 63 | 64 | # Compute projective sdf 65 | valid_frame_depth = torch.nn.functional.grid_sample( 66 | depth.view(1,1,*depth.shape[-2:]), 67 | valid_uv.view(1,1,-1,2), 68 | mode='bilinear', 69 | align_corners=False).flatten() 70 | valid_xyz_depth = (valid_xyz - cam.position) @ cam.lookat 71 | valid_sdf = valid_frame_depth - valid_xyz_depth 72 | 73 | if torch.is_tensor(self.bandwidth): 74 | bandwidth = self.bandwidth[valid_idx] 75 | else: 76 | bandwidth = self.bandwidth 77 | 78 | valid_sdf *= (1 / bandwidth) 79 | 80 | if self.use_trunc: 81 | # Filter occluded 82 | filter_idx = torch.where(valid_sdf >= -1)[0] 83 | valid_idx = valid_idx[filter_idx] 84 | valid_uv = valid_uv[filter_idx] 85 | valid_frame_depth = valid_frame_depth[filter_idx] 86 | valid_sdf = valid_sdf[filter_idx] 87 | valid_sdf = valid_sdf.clamp_(-1, 1) 88 | 89 | # Init weighting 90 | w = torch.ones_like(valid_frame_depth) 91 | else: 92 | norm_dist = valid_sdf.abs() 93 | w = torch.exp(-norm_dist.clamp_max(self.max_norm_dist)) 94 | 95 | # Alpha filtering 96 | if alpha is not None: 97 | valid_alpha = torch.nn.functional.grid_sample( 98 | alpha.view(1,1,*alpha.shape[-2:]), 99 | valid_uv.view(1,1,-1,2), 100 | mode='bilinear', 101 | align_corners=False).flatten() 102 | w *= valid_alpha 103 | 104 | filter_idx = torch.where(valid_alpha >= self.alpha_thres)[0] 105 | valid_idx = valid_idx[filter_idx] 106 | valid_uv = valid_uv[filter_idx] 107 | valid_frame_depth = valid_frame_depth[filter_idx] 108 | valid_sdf = valid_sdf[filter_idx] 109 | w = w[filter_idx] 110 | 111 | # Compute geometric weighting 112 | if self.depth_weight: 113 | w *= 1 / valid_frame_depth.clamp_min(0.1) 114 | 115 | if self.normal_weight: 116 | normal = cam.depth2normal(depth) 117 | rd = torch.nn.functional.normalize(cam.depth2pts(depth) - cam.position.view(3,1,1), dim=0) 118 | cos_theta = (normal * rd).sum(0).clamp_min(0) 119 | valid_cos_theta = torch.nn.functional.grid_sample( 120 | cos_theta.view(1,1,*cos_theta.shape[-2:]), 121 | valid_uv.view(1,1,-1,2), 122 | mode='bilinear', 123 | align_corners=False).flatten() 124 | w *= valid_cos_theta 125 | 126 | if self.border_weight: 127 | # The image center get 1.0; corners get 0.1 128 | w *= 1 / (1 + 9/np.sqrt(2) * valid_uv.square().sum(1).sqrt()) 129 | 130 | # Reshape integration weight 131 | w = w.unsqueeze(-1).to(self.dtype) 132 | 133 | # Integrate weight 134 | self.weight[valid_idx] += w 135 | 136 | # Integrate tsdf 137 | if self.fuse_tsdf: 138 | valid_sdf = valid_sdf.unsqueeze(-1).to(self.dtype) 139 | self.sd_val[valid_idx] += w * valid_sdf 140 | 141 | # Sample feature 142 | if self.feat_dim > 0: 143 | valid_feat = torch.nn.functional.grid_sample( 144 | feat.view(1,self.feat_dim,*feat.shape[-2:]).to(self.dtype), 145 | valid_uv.view(1,1,-1,2).to(self.dtype), 146 | mode='bilinear', 147 | align_corners=False)[0,:,0].T 148 | self.feat[valid_idx] += w * valid_feat 149 | 150 | @property 151 | def feature(self): 152 | return self.feat / self.weight 153 | 154 | @property 155 | def tsdf(self): 156 | return self.sd_val / self.weight 157 | 158 | 159 | @torch.no_grad() 160 | def rgb_fusion(voxel_model, cameras): 161 | 162 | from .octree_utils import level_2_vox_size 163 | 164 | # Define volume integrator 165 | finest_vox_size = level_2_vox_size(voxel_model.scene_extent, voxel_model.octlevel.max()).item() 166 | feat_volume = Fuser( 167 | xyz=voxel_model.vox_center, 168 | bandwidth=10 * finest_vox_size, 169 | use_trunc=False, 170 | fuse_tsdf=False, 171 | feat_dim=3, 172 | crop_border=0., 173 | normal_weight=False, 174 | depth_weight=False, 175 | border_weight=False, 176 | use_half=True) 177 | 178 | # Run semantic maps fusion 179 | for cam in cameras: 180 | render_pkg = voxel_model.render(cam, color_mode="dontcare", output_depth=True) 181 | depth = render_pkg['depth'][2] 182 | feat_volume.integrate(cam=cam, feat=cam.image.cuda(), depth=depth) 183 | 184 | return feat_volume.feature.nan_to_num_(0.5).float() 185 | -------------------------------------------------------------------------------- /src/utils/image_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | 10 | import os 11 | import cv2 12 | import torch 13 | import numpy as np 14 | 15 | 16 | def im_tensor2np(x): 17 | if x.shape[0] == 1: 18 | x = x.squeeze(0) 19 | if len(x.shape) == 3: 20 | x = x.moveaxis(0, -1) 21 | return x.clamp(0, 1).mul(255).cpu().numpy().astype(np.uint8) 22 | 23 | def im_pil2tensor(x): 24 | return torch.from_numpy(np.array(x).astype(np.float32)).moveaxis(-1, 0) / 255 25 | 26 | def viz_tensordepth_histeq(x, alpha_mass=None): 27 | ''' 28 | Use histogram equalization for better depth visulization. 29 | By doing so, each color scale will have similar amout of pixels. 30 | The depth order is maintained but the scale do not reflect any actual distance. 31 | ''' 32 | if alpha_mass is not None: 33 | m = (alpha_mass>0.01) & (x>0) 34 | else: 35 | m = (x>0) 36 | 37 | x = x.cpu().numpy() 38 | m = m.cpu().numpy() 39 | n_valid = m.sum() 40 | if alpha_mass is not None: 41 | mass = alpha_mass.cpu().numpy()[m] 42 | else: 43 | mass = np.ones([n_valid]) 44 | order = np.argsort(x[m]) 45 | cdf = np.cumsum(mass[order]) / mass.sum() 46 | hist = np.empty([n_valid]) 47 | hist[order] = 1 + 254 * (cdf ** 2) 48 | x[~m] = 0 49 | x[m] = np.clip(hist, 1, 255) 50 | viz = cv2.applyColorMap(x.astype(np.uint8), cv2.COLORMAP_VIRIDIS) 51 | viz = cv2.cvtColor(viz, cv2.COLOR_BGR2RGB) 52 | viz[~m] = 0 53 | return viz 54 | 55 | def viz_tensordepth_log(x, alpha_mass=None): 56 | if alpha_mass is not None: 57 | m = (alpha_mass>0.01) & (x>0) 58 | else: 59 | m = (x>0) 60 | 61 | x = x.cpu().numpy() 62 | m = m.cpu().numpy() 63 | dmin, dmax = np.quantile(x[m], q=[0.03, 0.97]) 64 | x = np.log(np.clip(1 + x - dmin, 1, 1e9)) 65 | x = x / np.log(1 + dmax - dmin) 66 | x = np.clip(x, 0, 1) * 255 67 | viz = cv2.applyColorMap(x.astype(np.uint8), cv2.COLORMAP_VIRIDIS) 68 | viz = cv2.cvtColor(viz, cv2.COLOR_BGR2RGB) 69 | viz[~m] = 0 70 | return viz 71 | 72 | 73 | def viz_tensordepth(x, alpha_mass=None, mode='log'): 74 | if mode == 'histeq': 75 | return viz_tensordepth_histeq(x, alpha_mass) 76 | elif mode == 'log': 77 | return viz_tensordepth_log(x) 78 | raise NotImplementedError 79 | 80 | def resize_rendering(render, size, mode='bilinear', align_corners=False): 81 | return torch.nn.functional.interpolate( 82 | render[None], size=size, mode=mode, align_corners=align_corners, antialias=True)[0] 83 | -------------------------------------------------------------------------------- /src/utils/mono_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import os 10 | import sys 11 | import cv2 12 | import tqdm 13 | import torch 14 | import numpy as np 15 | import imageio.v2 as iio 16 | from PIL import Image 17 | 18 | 19 | def depth_path(depth_root, cam): 20 | return os.path.join(depth_root, f"{cam.image_name}.png") 21 | 22 | def codebook_path(depth_root, cam): 23 | return os.path.join(depth_root, f"{cam.image_name}.npy") 24 | 25 | def gather_todo_list(depth_root, cameras, force_rerun=False): 26 | # Gather list of camera to estimate depth 27 | todo_indices = [] 28 | for i, cam in enumerate(cameras): 29 | if not os.path.exists(depth_path(depth_root, cam)) or force_rerun: 30 | todo_indices.append(i) 31 | return todo_indices 32 | 33 | def load_depth_to_camera(depth_root, cameras, depth_name): 34 | for cam in tqdm.tqdm(cameras): 35 | depth_np = iio.imread(depth_path(depth_root, cam)) 36 | codebook = np.load(codebook_path(depth_root, cam)) 37 | setattr(cam, depth_name, torch.tensor(codebook[depth_np])) 38 | 39 | def save_quantize_depth(depth_root, cam, depth): 40 | # Quantize depth map to 16 bit 41 | codebook = depth.quantile(torch.linspace(0, 1, 65536).cuda(), interpolation='nearest') 42 | depth_idx = torch.searchsorted(codebook, depth, side='right').clamp_max_(65535) 43 | depth_idx[(depth - codebook[depth_idx-1]).abs() < (depth - codebook[depth_idx]).abs()] -= 1 44 | assert depth_idx.max() <= 65535 45 | assert depth_idx.min() >= 0 46 | 47 | # Save result 48 | depth_np = depth_idx.cpu().numpy().astype(np.uint16) 49 | iio.imwrite(depth_path(depth_root, cam), depth_np) 50 | np.save(codebook_path(depth_root, cam), codebook.cpu().numpy().astype(np.float32)) 51 | 52 | def resize_maxres_divisible(im, len, divisible): 53 | max_res = max(im.shape[-2:]) 54 | target_size = ( 55 | divisible * round(len * im.shape[-2] / max_res / divisible), 56 | divisible * round(len * im.shape[-1] / max_res / divisible)) 57 | im = torch.nn.functional.interpolate(im, size=target_size, mode='bilinear', antialias=True) 58 | return im 59 | 60 | 61 | @torch.no_grad() 62 | def prepare_depthanythingv2(cameras, source_path, force_rerun=False): 63 | 64 | depth_root = os.path.join(source_path, "mono_priors", "depthanythingv2") 65 | os.makedirs(depth_root, exist_ok=True) 66 | 67 | todo_indices = gather_todo_list(depth_root, cameras, force_rerun=force_rerun) 68 | 69 | if len(todo_indices): 70 | print(f"Infer depth for {len(todo_indices)} images. Saved to {depth_root}.") 71 | 72 | # Load model 73 | from transformers import AutoImageProcessor, AutoModelForDepthEstimation 74 | image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf") 75 | model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf").cuda() 76 | 77 | for i in tqdm.tqdm(todo_indices): 78 | cam = cameras[i] 79 | 80 | # Inference depth 81 | inputs = image_processor(images=cam.image, return_tensors="pt", do_rescale=False) 82 | inputs['pixel_values'] = inputs['pixel_values'].cuda() 83 | outputs = model(**inputs) 84 | depth = outputs['predicted_depth'].squeeze() 85 | 86 | # Save result 87 | save_quantize_depth(depth_root, cam, depth) 88 | 89 | # Load the estimated depth 90 | print("Load the estimated depths to cameras.") 91 | load_depth_to_camera(depth_root, cameras, 'depthanythingv2') 92 | 93 | 94 | @torch.no_grad() 95 | def _prepare_mast3r_metric_depth(cameras, depth_root, mast3r_repo_path): 96 | print(f"Infer depth for {len(cameras)} images. Saved to {depth_root}.") 97 | 98 | assert os.path.exists(mast3r_repo_path), "mast3r repo path: `{mast3r_repo_path}` not found" 99 | sys.path.insert(0, mast3r_repo_path) 100 | 101 | # Load model 102 | from mast3r.model import AsymmetricMASt3R 103 | from dust3r.inference import inference 104 | from dust3r.cloud_opt.pair_viewer import PairViewer 105 | 106 | model = AsymmetricMASt3R.from_pretrained('naver/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric').cuda() 107 | 108 | depth_prev = None 109 | 110 | for i in tqdm.trange(len(cameras) - 1): 111 | cam1 = cameras[i] 112 | cam2 = cameras[i+1] 113 | 114 | # Inference depth 115 | image_1 = resize_maxres_divisible(cam1.image[None].cuda(), len=512, divisible=16) 116 | image_2 = resize_maxres_divisible(cam2.image[None].cuda(), len=512, divisible=16) 117 | true_shape = np.array([image_1.shape[-2:]], dtype=np.int32) 118 | 119 | image_dicts = [ 120 | dict(img=image_1 * 2 - 1, true_shape=true_shape, idx=0, instance='0'), 121 | dict(img=image_2 * 2 - 1, true_shape=true_shape, idx=1, instance='1') 122 | ] 123 | symmetric_pair = [(image_dicts[0], image_dicts[1]), (image_dicts[1], image_dicts[0])] 124 | output = inference(symmetric_pair, model, 'cuda', batch_size=1, verbose=False) 125 | 126 | view1, pred1 = output['view1'], output['pred1'] 127 | view2, pred2 = output['view2'], output['pred2'] 128 | pair = PairViewer(view1, view2, pred1, pred2, verbose=False).cuda() 129 | depth1, depth2 = pair.get_depthmaps() 130 | pose1, pose2 = pair.get_im_poses() 131 | 132 | rescale = (cam1.position - cam2.position).norm() / (pose1[:3, 3] - pose2[:3, 3]).norm() 133 | depth1 = (depth1 * rescale).detach().squeeze() 134 | depth2 = (depth2 * rescale).detach().squeeze() 135 | 136 | # Average with previous result 137 | if depth_prev is not None: 138 | depth1 = (depth_prev + depth1) * 0.5 139 | 140 | depth_prev = depth2 141 | 142 | # Save result 143 | save_quantize_depth(depth_root, cam1, depth1) 144 | 145 | # Save last frame 146 | save_quantize_depth(depth_root, cam2, depth2) 147 | 148 | 149 | @torch.no_grad() 150 | def prepare_mast3r_metric_depth(cameras, source_path, mast3r_repo_path): 151 | 152 | depth_root = os.path.join(source_path, "mono_priors", "mast3r_metric_depth") 153 | os.makedirs(depth_root, exist_ok=True) 154 | 155 | n_todo = len(gather_todo_list(depth_root, cameras, force_rerun=False)) 156 | if n_todo > 0: 157 | _prepare_mast3r_metric_depth(cameras, depth_root, mast3r_repo_path) 158 | 159 | # Load the estimated depth 160 | print("Load the estimated depths to cameras.") 161 | load_depth_to_camera(depth_root, cameras, 'mast3r_metric_depth') 162 | -------------------------------------------------------------------------------- /src/utils/system_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | import random 10 | import numpy as np 11 | import torch 12 | 13 | def seed_everything(seed): 14 | random.seed(seed) 15 | np.random.seed(seed) 16 | torch.manual_seed(seed) 17 | --------------------------------------------------------------------------------