├── .gitignore
├── LICENSE
├── LICENSE_inria.md
├── README.md
├── articles
    ├── model_creation_tutor.md
    ├── model_functions.md
    └── scannetpp_dataset.md
├── asset
    ├── fusing_radio.jpg
    ├── fusing_segformer.jpg
    └── teaser.jpg
├── cfg
    ├── deep_blending.yaml
    ├── dtu_mesh.yaml
    ├── mipnerf360.yaml
    ├── mipnerf360_fast_rend.yaml
    ├── mipnerf360_fast_train.yaml
    ├── scannetpp.yaml
    ├── synthetic_nerf.yaml
    └── tnt_mesh.yaml
├── cuda
    ├── binding.cpp
    ├── setup.py
    ├── src
    │   ├── adam_step.cu
    │   ├── adam_step.h
    │   ├── auxiliary.h
    │   ├── backward.cu
    │   ├── backward.h
    │   ├── config.h
    │   ├── forward.cu
    │   ├── forward.h
    │   ├── geo_params_gather.cu
    │   ├── geo_params_gather.h
    │   ├── preprocess.cu
    │   ├── preprocess.h
    │   ├── raster_state.cu
    │   ├── raster_state.h
    │   ├── sh_compute.cu
    │   ├── sh_compute.h
    │   ├── tv_compute.cu
    │   ├── tv_compute.h
    │   ├── utils.cu
    │   └── utils.h
    └── svraster_cuda
    │   ├── __init__.py
    │   ├── grid_loss_bw.py
    │   ├── meta.py
    │   ├── renderer.py
    │   ├── sparse_adam.py
    │   └── utils.py
├── eval.py
├── extract_mesh.py
├── notebooks
    ├── demo_segformer.ipynb
    ├── demo_vfm_radio.ipynb
    └── example.ipynb
├── render.py
├── render_by_trace_sdf.py
├── render_fly_through.py
├── requirements.txt
├── scripts
    ├── dtu_clean_for_eval.py
    ├── dtu_eval
    │   ├── Offical_DTU_Dataset
    │   └── eval.py
    ├── dtu_preproc.py
    ├── dtu_run.sh
    ├── dtu_stat.py
    ├── eval_tnt
    │   ├── README.md
    │   ├── compute_bbox_for_mesh.py
    │   ├── config.py
    │   ├── cull_mesh.py
    │   ├── evaluate_single_scene.py
    │   ├── evaluation.py
    │   ├── help_func.py
    │   ├── plot.py
    │   ├── registration.py
    │   ├── requirements.txt
    │   ├── run.py
    │   ├── trajectory_io.py
    │   └── util.py
    ├── mipnerf360_run.sh
    ├── mipnerf360_stat.py
    ├── scannetpp_preproc.py
    ├── scannetpp_run.sh
    ├── scannetpp_stat.py
    ├── synthetic_nerf_run.sh
    ├── synthetic_nerf_stat.py
    ├── tandt_db_run.sh
    ├── tandt_db_stat.py
    ├── tnt_run.sh
    └── tnt_stat.py
├── src
    ├── cameras.py
    ├── config.py
    ├── dataloader
    │   ├── data_pack.py
    │   ├── reader_colmap_dataset.py
    │   └── reader_nerf_dataset.py
    ├── sparse_voxel_gears
    │   ├── adaptive.py
    │   ├── constructor.py
    │   ├── io.py
    │   ├── pooling.py
    │   ├── properties.py
    │   └── renderer.py
    ├── sparse_voxel_model.py
    └── utils
    │   ├── activation_utils.py
    │   ├── bounding_utils.py
    │   ├── camera_utils.py
    │   ├── colmap_utils.py
    │   ├── fuser_utils.py
    │   ├── image_utils.py
    │   ├── loss_utils.py
    │   ├── marching_cubes_utils.py
    │   ├── mono_utils.py
    │   ├── octree_utils.py
    │   └── system_utils.py
├── train.py
└── viz.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | *.so
 3 | __pycache__/
 4 | *.egg-info/
 5 | *.egg
 6 | eggs/
 7 | .eggs/
 8 | 
 9 | .ipynb_checkpoints/
10 | 
11 | log
12 | logs
13 | ckpt
14 | ckpts
15 | output
16 | outputs
17 | result
18 | results
19 | data
20 | datas
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | # NVIDIA Source Code License for SVRaster
 2 | 
 3 | Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 4 | 
 5 | ## 1. Definitions
 6 | 
 7 | “Licensor” means any person or entity that distributes its Work.
 8 | 
 9 | “Work” means (a) the original work of authorship made available under this license, which may include software, documentation, or other files, and (b) any additions to or derivative works  thereof  that are made available under this license.
10 | 
11 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this license, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
12 | 
13 | Works are “made available” under this license by including in or with the Work either (a) a copyright notice referencing the applicability of this license to the Work, or (b) a copy of this license.
14 | 
15 | ## 2. License Grant
16 | 
17 | ### 2.1 Copyright Grant
18 | Subject to the terms and conditions of this license, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to use, reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
19 | 
20 | ## 3. Limitations
21 | 
22 | ### 3.1 Redistribution
23 | You may reproduce or distribute the Work only if (a) you do so under this license, (b) you include a complete copy of this license with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work.
24 | 
25 | ### 3.2 Derivative Works
26 | You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this license (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself.
27 | 
28 | ### 3.3 Use Limitation
29 | The Work includes files from [GitHub - graphdeco-inria/gaussian-splatting: Original reference implementation of "3D Gaussian Splatting for Real-Time Radiance Field Rendering"](https://github.com/graphdeco-inria/gaussian-splatting) and [GitHub - graphdeco-inria/diff-gaussian-rasterization at 9c5c2028f6fbee2be239bc4c9421ff894fe4fbe0](https://github.com/graphdeco-inria/diff-gaussian-rasterization/tree/9c5c2028f6fbee2be239bc4c9421ff894fe4fbe0), which are subject to the terms of the Gaussian-Splatting License (a copy the license is available at [diff-gaussian-rasterization/LICENSE.md at 9c5c2028f6fbee2be239bc4c9421ff894fe4fbe0 · graphdeco-inria/diff-gaussian-rasterization · GitHub](https://github.com/graphdeco-inria/diff-gaussian-rasterization/blob/9c5c2028f6fbee2be239bc4c9421ff894fe4fbe0/LICENSE.md)).  The Work and any derivative works thereof only may be used or intended for use non-commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
30 | 
31 | ### 3.4 Patent Claims
32 | If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this license from such Licensor (including the grant in Section 2.1) will terminate immediately.
33 | 
34 | ### 3.5 Trademarks
35 | This license does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this license.
36 | 
37 | ### 3.6 Termination
38 | If you violate any term of this license, then your rights under this license (including the grant in Section 2.1) will terminate immediately.
39 | 
40 | ## 4. Disclaimer of Warranty
41 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.
42 | 
43 | ## 5. Limitation of Liability
44 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
45 | 


--------------------------------------------------------------------------------
/LICENSE_inria.md:
--------------------------------------------------------------------------------
 1 | Gaussian-Splatting License  
 2 | ===========================  
 3 | 
 4 | **Inria** and **the Max Planck Institut for Informatik (MPII)** hold all the ownership rights on the *Software* named **gaussian-splatting**.  
 5 | The *Software* is in the process of being registered with the Agence pour la Protection des  
 6 | Programmes (APP).  
 7 | 
 8 | The *Software* is still being developed by the *Licensor*.  
 9 | 
10 | *Licensor*'s goal is to allow the research community to use, test and evaluate  
11 | the *Software*.  
12 | 
13 | ## 1.  Definitions  
14 | 
15 | *Licensee* means any person or entity that uses the *Software* and distributes  
16 | its *Work*.  
17 | 
18 | *Licensor* means the owners of the *Software*, i.e Inria and MPII  
19 | 
20 | *Software* means the original work of authorship made available under this  
21 | License ie gaussian-splatting.  
22 | 
23 | *Work* means the *Software* and any additions to or derivative works of the  
24 | *Software* that are made available under this License.  
25 | 
26 | 
27 | ## 2.  Purpose  
28 | This license is intended to define the rights granted to the *Licensee* by  
29 | Licensors under the *Software*.  
30 | 
31 | ## 3.  Rights granted  
32 | 
33 | For the above reasons Licensors have decided to distribute the *Software*.  
34 | Licensors grant non-exclusive rights to use the *Software* for research purposes  
35 | to research users (both academic and industrial), free of charge, without right  
36 | to sublicense.. The *Software* may be used "non-commercially", i.e., for research  
37 | and/or evaluation purposes only.  
38 | 
39 | Subject to the terms and conditions of this License, you are granted a  
40 | non-exclusive, royalty-free, license to reproduce, prepare derivative works of,  
41 | publicly display, publicly perform and distribute its *Work* and any resulting  
42 | derivative works in any form.  
43 | 
44 | ## 4.  Limitations  
45 | 
46 | **4.1 Redistribution.** You may reproduce or distribute the *Work* only if (a) you do  
47 | so under this License, (b) you include a complete copy of this License with  
48 | your distribution, and (c) you retain without modification any copyright,  
49 | patent, trademark, or attribution notices that are present in the *Work*.  
50 | 
51 | **4.2 Derivative Works.** You may specify that additional or different terms apply  
52 | to the use, reproduction, and distribution of your derivative works of the *Work*  
53 | ("Your Terms") only if (a) Your Terms provide that the use limitation in  
54 | Section 2 applies to your derivative works, and (b) you identify the specific  
55 | derivative works that are subject to Your Terms. Notwithstanding Your Terms,  
56 | this License (including the redistribution requirements in Section 3.1) will  
57 | continue to apply to the *Work* itself.  
58 | 
59 | **4.3** Any other use without of prior consent of Licensors is prohibited. Research  
60 | users explicitly acknowledge having received from Licensors all information  
61 | allowing to appreciate the adequacy between of the *Software* and their needs and  
62 | to undertake all necessary precautions for its execution and use.  
63 | 
64 | **4.4** The *Software* is provided both as a compiled library file and as source  
65 | code. In case of using the *Software* for a publication or other results obtained  
66 | through the use of the *Software*, users are strongly encouraged to cite the  
67 | corresponding publications as explained in the documentation of the *Software*.  
68 | 
69 | ## 5.  Disclaimer  
70 | 
71 | THE USER CANNOT USE, EXPLOIT OR DISTRIBUTE THE *SOFTWARE* FOR COMMERCIAL PURPOSES  
72 | WITHOUT PRIOR AND EXPLICIT CONSENT OF LICENSORS. YOU MUST CONTACT INRIA FOR ANY  
73 | UNAUTHORIZED USE: stip-sophia.transfert@inria.fr . ANY SUCH ACTION WILL  
74 | CONSTITUTE A FORGERY. THIS *SOFTWARE* IS PROVIDED "AS IS" WITHOUT ANY WARRANTIES  
75 | OF ANY NATURE AND ANY EXPRESS OR IMPLIED WARRANTIES, WITH REGARDS TO COMMERCIAL  
76 | USE, PROFESSIONNAL USE, LEGAL OR NOT, OR OTHER, OR COMMERCIALISATION OR  
77 | ADAPTATION. UNLESS EXPLICITLY PROVIDED BY LAW, IN NO EVENT, SHALL INRIA OR THE  
78 | AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR  
79 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE  
80 | GOODS OR SERVICES, LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)  
81 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  
82 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING FROM, OUT OF OR  
83 | IN CONNECTION WITH THE *SOFTWARE* OR THE USE OR OTHER DEALINGS IN THE *SOFTWARE*.  
84 | 
85 | ## 6.  Files subject to permissive licenses
86 | The contents of the file ```utils/loss_utils.py``` are based on publicly available code authored by Evan Su, which falls under the permissive MIT license. 
87 | 
88 | Title: pytorch-ssim\
89 | Project code: https://github.com/Po-Hsun-Su/pytorch-ssim\
90 | Copyright Evan Su, 2017\
91 | License: https://github.com/Po-Hsun-Su/pytorch-ssim/blob/master/LICENSE.txt (MIT)


--------------------------------------------------------------------------------
/articles/model_functions.md:
--------------------------------------------------------------------------------
 1 | # Model functions
 2 | 
 3 | We cover the functions to create a model instance in [model_creation_tutor.md](./model_creation_tutor.md). We describe the other functions in the following.
 4 | 
 5 | ## Properties
 6 | - `n_samp_per_vox`: Number of samples per voxel when rendering.
 7 | - `ss`: Super-sampling scale.
 8 |     - We render higher-resolution image (`[H * ss, W * ss]`) and apply anti-aliasing downsampling.
 9 | - `white_background`: Indicate if the background is white.
10 | - `black_background`: Indicate if the background is black.
11 |     - The background will be the averaged color if neither black or white is set.
12 | - `max_sh_degree`: Maximum SH degree. Support `0~3` degrees.
13 |     - This number should be fixed after voxels parameters are allocated.
14 | 
15 | ## Derived properties
16 | - `num_voxels`: Number of voxels.
17 | - `num_grid_pts`: Number of grid points.
18 |     - Recap that a voxel has 8 corner grid points. A grid point can be shared by adjacent voxels. This is the number of unique grid points.
19 | - `scene_min`: Minimum coordinate of entire scene.
20 | - `scene_max`: Maximum coordinate of entire scene.
21 | - `inside_min`: Minimum coordinate of the main foreground region.
22 |     - It's valid when the model is created from `model_init` with `outside_level > 0` which preserves Octree level outside the main foreground bounding box.
23 | - `inside_max`: Maximum coordinate of the main foreground region.
24 | - `inside_mask`: A mask indicating if a voxel is in `inside_min` and `inside_max`.
25 | - `subdivision_priority`: The model automatically tracks and accumulates subdivision priority during rendering backward pass.
26 |     - Larger value means higher priority.
27 |     - Reset by `reset_subdivision_priority()`.
28 | 
29 | The following is the properties that lazily computed at the first time you access them. It automatically recompute when it detect the voxels allocation is updated (e.g., after pruning or subdivision).
30 | - `vox_center`: Voxel center position in the world space.
31 | - `vox_size`: Voxel size.
32 | - `vox_key`: Index to the unique grid points. It's in shape of `[num_voxels, 8]`.
33 | - `grid_pts_xyz`: The world-space position of the unique grid points.
34 | 
35 | ## Parameters
36 | - `_sh0`: Base color as zero-degree SH component. The shape is `[num_voxels, 3]`.
37 | - `_shs`: Higher-degree SH component for view-dependent color. The shape is `[num_voxels, (max_sh_degree+1)**2 - 1, 3]`.
38 |     - It's the dominant factor of the number of total parameters.
39 | - `_geo_grid_pts`: The density of grid points. The shape is `[num_grid_pts, 1]`.
40 |     - When rendering, it's gathered into `[num_voxels, 8]` as voxel trilinear density field for the CUDA to render.
41 | 
42 | ## Core functions
43 | - `render_pkg = render(camera, track_max_w=False, output_depth=False, output_normal=False, output_T=False)`
44 |     - Rendering a view.
45 |     - `track_max_w` whether to track the maximum blending weigth of each voxel. Access by `render_pkg['max_w']`.
46 |     - `output_depth` whether to render depth. Access by `render_pkg['depth']` or `render_pkg['raw_depth']`.
47 |     - `output_normal` whether to render normal. Access by `render_pkg['normal']` or `render_pkg['raw_normal']`.
48 |     - `output_T` whether to output transmittance. Access by `render_pkg['T']` or `render_pkg['raw_T']`.
49 |     - The outputs with `raw_` prefix are the results without anti-aliasing downsampling.
50 |     - The depth and normal is not normalized by alpha.
51 |     - There output depth is in shape `[3, H, W]` for mean depth, distortion cache, median depth. Only the mean depth support backpropagation.
52 | - `pruning(mask)`
53 |     - Remove voxels indicating by the given mask.
54 | - `subdividing(mask)`
55 |     - Subdivde voxels into their eight octans indicating by the given mask. The source parent voxels are removed after subdivision.
56 | 
57 | ## Useful functions
58 | - `compute_training_stat(camera_lst)`
59 |     - Compute the per-voxel statistic from the given cameras, including `max_w` for maximum blending weight, `min_samp_interval` for the inverse of maximum sampling rates, and `view_cnt` for visibile camera count.
60 | - `reset_sh_from_cameras(camera_lst)`
61 |     - Reset shs to zero.
62 |     - Reset sh0 to yield the colors averaged from the given images.
63 | - `apply_tv_on_density_field(lambda_tv_density)`
64 |     - Add the gradient of total variation loss to the `_geo_grid_pts` parameter.
65 | - `save(path, quantize=False)`
66 |     - Save the model to the given path. You can optionally apply 8-bit quantization to the parameters which save 70% disk space with minor quality difference.
67 | - `load(path)`
68 |     - Load checkpoint from the given path.
69 | - `load_iteration(model_path, iteration=-1)`
70 |     - Load checkpoint from a model output path with the given iteration. The default load the latest iteration.


--------------------------------------------------------------------------------
/articles/scannetpp_dataset.md:
--------------------------------------------------------------------------------
 1 | # SVR for ScanNet++ dataset
 2 | 
 3 | We now support scannet++ dataset. The [benchmark results](https://kaldir.vc.in.tum.de/scannetpp/benchmark/nvs) on 3rd-party evaluated hidden set is (at the time of 8 Mar, 2025):
 4 | <img width="700" alt="scannet++ benchmark" src="https://github.com/user-attachments/assets/3ef905e9-bc86-4d31-87bb-33b9a8bad56c" />
 5 | 
 6 | https://github.com/user-attachments/assets/85f55a12-b4bb-4581-924e-925a38f6a748
 7 | 
 8 | More results information (averaged on 50 scenes):
 9 | - Per-scene optimization time: `12 mins`.
10 | - FPS: `197` at `1752 x 1168` image resolution. As we use `ss=1.5`, the actual rendering resolution is `2628 x 1752`.
11 | - Voxel size distribution:
12 |     | <3mm | 3mm-5mm | 5mm-1cm | 1cm-2cm | 2cm-3cm | >3cm | 
13 |     | :-: | :-: | :-: | :-: | :-: | :-: |
14 |     | 13.61% | 19.25% | 32.43% | 23.31% | 6.66% | 4.73% |
15 | - Sparse points from COLMAP is not used in the submitted version. We later find sparse points loss helpful for geometry and slightly improve quality on the public set. Activate it by `--lambda_sparse_depth 1e-2` when running `train.py`.
16 | 
17 | ### Data preparation
18 | 1. Download the source data following the procedure in [scannet++ official site](https://kaldir.vc.in.tum.de/scannetpp/).
19 | 2. Run `python scripts/scannetpp_preproc.py --indir $PATH_TO_SOURCE_DATA --outdir data/scannetpp_nvs --ids $SEQUENCE_OF_SCENE_ID`.
20 | 
21 | ### Optimization configuration
22 | The config file is provided in `cfg/scannetpp.yaml`. We detail the setting as follow.
23 | 
24 | **Scene bound heuristic.**
25 | As this is a fully indoor dataset, we set `outside_level` to zero and assume the entire scene is inside the main scene bound. The world center is set to the centroid of training cameras and the scene radius is set to two times the maximum distance from world center to the cameras.
26 | 
27 | **SH reset trick.**
28 | We find the view-dependent color from SH is not generalized well so we implement a trick by resetting the sh component near the end of optimization. This trick improve quality on the view "extrapolation" task like ScanNet++ dataset, while it slightly reduces quality on view "interpolation" task like mipnerf360.
29 | 
30 | **Density ascending regularizer.**
31 | It encourages the derived normal from the density field to point toward the camera side. It improves geometry qualitatively and slightly improve quantitative result.
32 | 
33 | **Sparse point depth loss.**
34 | It's not used in the submitted version. On the public set, it improves geometry qualitatively and novel-view results quantitatively.
35 | 
36 | <img width="512" alt="scannet++ benchmark" src="https://github.com/user-attachments/assets/33b9f955-425d-490f-8e9e-0183957522f6" />
37 | 


--------------------------------------------------------------------------------
/asset/fusing_radio.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/svraster/1413d346f6c2d9e548e41956c9ea3a5dc22beb9e/asset/fusing_radio.jpg


--------------------------------------------------------------------------------
/asset/fusing_segformer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/svraster/1413d346f6c2d9e548e41956c9ea3a5dc22beb9e/asset/fusing_segformer.jpg


--------------------------------------------------------------------------------
/asset/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/svraster/1413d346f6c2d9e548e41956c9ea3a5dc22beb9e/asset/teaser.jpg


--------------------------------------------------------------------------------
/cfg/deep_blending.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | # See `src/config.py` for the base setup.
10 | data:
11 |   eval: True
12 | 
13 | bounding:
14 |   bound_mode: camera_max
15 |   bound_scale: 1.5
16 |   outside_level: 0  # No outside region is observed.
17 | 
18 | regularizer:
19 |   lambda_T_inside: 0.01
20 | 


--------------------------------------------------------------------------------
/cfg/dtu_mesh.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | # See `src/config.py` for the base setup.
10 | data:
11 |   eval: False
12 |   skip_blend_alpha: True
13 |   res_downscale: 2.0
14 | 
15 | bounding:
16 |   outside_level: 1
17 | 
18 | model:
19 |   n_samp_per_vox: 3
20 |   black_background: True
21 | 
22 | optimizer:
23 |   geo_lr: 0.05
24 | 
25 | regularizer:
26 |   lambda_T_concen: 0.01
27 |   lambda_R_concen: 0.1
28 |   lambda_normal_dmean: 0.001
29 |   lambda_normal_dmed: 0.001
30 | 
31 | init:
32 |   init_out_ratio: 0.01  # Use very few voxels for the mostly black background
33 | 


--------------------------------------------------------------------------------
/cfg/mipnerf360.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | # See `src/config.py` for the base setup.
10 | data:
11 |   eval: True
12 | 
13 | regularizer:
14 |   lambda_T_inside: 0.01
15 | 


--------------------------------------------------------------------------------
/cfg/mipnerf360_fast_rend.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | # See `src/config.py` for the base setup.
10 | data:
11 |   eval: True
12 | 
13 | model:
14 |   ss: 1.1
15 | 
16 | regularizer:
17 |   lambda_T_inside: 0.01
18 | 
19 | procedure:
20 |   prune_thres_final: 0.15
21 | 


--------------------------------------------------------------------------------
/cfg/mipnerf360_fast_train.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | # See `src/config.py` for the base setup.
10 | data:
11 |   eval: True
12 | 
13 | regularizer:
14 |   lambda_T_inside: 0.01
15 | 
16 | procedure:
17 |   sche_mult: 0.3
18 | 


--------------------------------------------------------------------------------
/cfg/scannetpp.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | # See `src/config.py` for the base setup.
10 | data:
11 |   eval: True
12 |   res_downscale: 1.0  # Use source image resolution
13 | 
14 | bounding:
15 |   bound_mode: camera_max
16 |   bound_scale: 2.0
17 |   outside_level: 0  # No background region
18 | 
19 | regularizer:
20 |   lambda_T_inside: 0.01
21 | 
22 |   lambda_dist: 0.01
23 |   dist_from: 3000
24 | 
25 |   lambda_ascending: 0.01
26 | 
27 | procedure:
28 |   reset_sh_ckpt: [15000]
29 | 


--------------------------------------------------------------------------------
/cfg/synthetic_nerf.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | # See `src/config.py` for the base setup.
10 | data:
11 |   eval: True
12 | 
13 | bounding:
14 |   outside_level: 0
15 | 
16 | model:
17 |   white_background: True
18 | 
19 | regularizer:
20 |   lambda_T_concen: 0.1
21 | 
22 | init:
23 |   sh_degree_init: 0
24 | 
25 | procedure:
26 |   subdivide_all_until: 1000  # Subdivide all at the first time
27 |   subdivide_samp_thres: 0.5  # Enable smaller voxels
28 | 


--------------------------------------------------------------------------------
/cfg/tnt_mesh.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | # See `src/config.py` for the base setup.
10 | model:
11 |   n_samp_per_vox: 3
12 | 
13 | data:
14 |   eval: False
15 |   res_downscale: 2.0
16 | 
17 | regularizer:
18 |   lambda_T_inside: 0.01
19 |   lambda_R_concen: 0.1
20 |   lambda_normal_dmean: 0.001
21 |   lambda_normal_dmed: 0.001
22 | 


--------------------------------------------------------------------------------
/cuda/binding.cpp:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #include <torch/extension.h>
12 | #include "src/config.h"
13 | #include "src/raster_state.h"
14 | #include "src/preprocess.h"
15 | #include "src/forward.h"
16 | #include "src/backward.h"
17 | #include "src/sh_compute.h"
18 | #include "src/tv_compute.h"
19 | #include "src/geo_params_gather.h"
20 | #include "src/utils.h"
21 | #include "src/adam_step.h"
22 | 
23 | 
24 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
25 |     m.def("rasterize_preprocess", &PREPROCESS::rasterize_preprocess);
26 |     m.def("rasterize_voxels", &FORWARD::rasterize_voxels);
27 |     m.def("rasterize_voxels_backward", &BACKWARD::rasterize_voxels_backward);
28 |     m.def("filter_geomState", &RASTER_STATE::filter_geomState);
29 |     m.def("unpack_ImageState", &RASTER_STATE::unpack_ImageState);
30 | 
31 |     m.def("gather_triinterp_geo_params", &GEO_PARAMS_GATHER::gather_triinterp_geo_params);
32 |     m.def("gather_triinterp_geo_params_bw", &GEO_PARAMS_GATHER::gather_triinterp_geo_params_bw);
33 |     m.def("gather_triinterp_feat_params", &GEO_PARAMS_GATHER::gather_triinterp_feat_params);
34 |     m.def("gather_triinterp_feat_params_bw", &GEO_PARAMS_GATHER::gather_triinterp_feat_params_bw);
35 | 
36 |     m.def("sh_compute", &SH_COMPUTE::sh_compute);
37 |     m.def("sh_compute_bw", &SH_COMPUTE::sh_compute_bw);
38 | 
39 |     m.def("total_variation_bw", &TV_COMPUTE::total_variation_bw);
40 | 
41 |     m.def("is_in_cone", &UTILS::is_in_cone);
42 |     m.def("compute_rd", &UTILS::compute_rd);
43 |     m.def("depth2pts", &UTILS::depth2pts);
44 |     m.def("voxel_order_rank", &UTILS::voxel_order_rank);
45 |     m.def("ijk_2_octpath", &UTILS::ijk_2_octpath);
46 |     m.def("octpath_2_ijk", &UTILS::octpath_2_ijk);
47 | 
48 |     m.def("unbiased_adam_step", &ADAM_STEP::unbiased_adam_step);
49 |     m.def("biased_adam_step", &ADAM_STEP::biased_adam_step);
50 | 
51 |     // Some readonly constant
52 |     m.attr("MAX_NUM_LEVELS") = pybind11::int_(MAX_NUM_LEVELS);
53 |     m.attr("STEP_SZ_SCALE") = pybind11::float_(STEP_SZ_SCALE);
54 |     m.attr("MAX_N_SAMP") = pybind11::int_(MAX_N_SAMP);
55 | }
56 | 


--------------------------------------------------------------------------------
/cuda/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import os
10 | from setuptools import setup
11 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
12 | 
13 | setup(
14 |     name="svraster_cuda",
15 |     packages=["svraster_cuda"],
16 |     ext_modules=[
17 |         CUDAExtension(
18 |             name="svraster_cuda._C",
19 |             sources=[
20 |                 "src/raster_state.cu",
21 |                 "src/preprocess.cu",
22 |                 "src/forward.cu",
23 |                 "src/backward.cu",
24 |                 "src/geo_params_gather.cu",
25 |                 "src/sh_compute.cu",
26 |                 "src/tv_compute.cu",
27 |                 "src/utils.cu",
28 |                 "src/adam_step.cu",
29 |                 "binding.cpp"
30 |             ],
31 |             # extra_compile_args={"nvcc": ["--use_fast_math"]},
32 |         )
33 |     ],
34 |     cmdclass={
35 |         "build_ext": BuildExtension
36 |     }
37 | )
38 | 


--------------------------------------------------------------------------------
/cuda/src/adam_step.cu:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  3 | 
  4 | NVIDIA CORPORATION and its licensors retain all intellectual property
  5 | and proprietary rights in and to this software, related documentation
  6 | and any modifications thereto.  Any use, reproduction, disclosure or
  7 | distribution of this software and related documentation without an express
  8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
  9 | *************************************************************************/
 10 | 
 11 | #include "adam_step.h"
 12 | 
 13 | #include <cuda.h>
 14 | #include <cuda_runtime.h>
 15 | 
 16 | #include <cooperative_groups.h>
 17 | namespace cg = cooperative_groups;
 18 | 
 19 | namespace ADAM_STEP {
 20 | 
 21 | __forceinline__ __device__ float lerp(float v0, float v1, float t)
 22 | {
 23 | 	// Compute (1-t) * v0 + t * v1
 24 | 	return fmaf(t, v1, fmaf(-t, v0, v0));
 25 | }
 26 | 
 27 | 
 28 | template <bool sparse>
 29 | __global__ void unbiased_adam_step_cuda_kernel(
 30 | 	const int numel,
 31 | 	float* __restrict__ param,
 32 | 	const float* __restrict__ grad,
 33 | 	float* __restrict__ exp_avg,
 34 | 	float* __restrict__ exp_avg_sq,
 35 | 	const float step_size, const float beta1, const float beta2,
 36 | 	const float rsqrt_bias_correction2, const float eps)
 37 | {
 38 | 	const int idx = cg::this_grid().thread_rank();
 39 | 	if (idx >= numel)
 40 | 		return;
 41 | 
 42 | 	// Check gradient
 43 | 	const float grad_val = grad[idx];
 44 | 	if (sparse && grad_val == 0.0f)
 45 | 		return;
 46 | 
 47 | 	// Load parameters
 48 | 	float exp_avg_val = exp_avg[idx];
 49 | 	float exp_avg_sq_val = exp_avg_sq[idx];
 50 | 
 51 | 	// Adam step
 52 | 	// beta1 * exp_avg_val + (1.0f - beta1) * grad_val
 53 | 	// beta2 * exp_avg_sq_val + (1.0f - beta2) * grad_val * grad_val
 54 | 	exp_avg_val = lerp(grad_val, exp_avg_val, beta1); 
 55 | 	exp_avg_sq_val = lerp(grad_val * grad_val, exp_avg_sq_val, beta2);
 56 | 
 57 | 	const float denom = fmaf(sqrtf(exp_avg_sq_val), rsqrt_bias_correction2, eps);
 58 | 	const float param_step = step_size * (exp_avg_val / denom);
 59 | 
 60 | 	// Save back the new results
 61 | 	param[idx] -= param_step;
 62 | 	exp_avg[idx] = exp_avg_val;
 63 | 	exp_avg_sq[idx] = exp_avg_sq_val;
 64 | }
 65 | 
 66 | 
 67 | template <bool sparse>
 68 | __global__ void biased_adam_step_cuda_kernel(
 69 | 	const int numel,
 70 | 	float* __restrict__ param,
 71 | 	const float* __restrict__ grad,
 72 | 	float* __restrict__ exp_avg,
 73 | 	float* __restrict__ exp_avg_sq,
 74 | 	const float lr, const float beta1, const float beta2, const float eps)
 75 | {
 76 | 	const int idx = cg::this_grid().thread_rank();
 77 | 	if (idx >= numel)
 78 | 		return;
 79 | 
 80 | 	// Check gradient
 81 | 	const float grad_val = grad[idx];
 82 | 	if (sparse && grad_val == 0.0f)
 83 | 		return;
 84 | 
 85 | 	// Load parameters
 86 | 	float exp_avg_val = exp_avg[idx];
 87 | 	float exp_avg_sq_val = exp_avg_sq[idx];
 88 | 
 89 | 	// Adam step
 90 | 	// beta1 * exp_avg_val + (1.0f - beta1) * grad_val
 91 | 	// beta2 * exp_avg_sq_val + (1.0f - beta2) * grad_val * grad_val
 92 | 	exp_avg_val = lerp(grad_val, exp_avg_val, beta1); 
 93 | 	exp_avg_sq_val = lerp(grad_val * grad_val, exp_avg_sq_val, beta2);
 94 | 
 95 | 	const float denom = sqrtf(exp_avg_sq_val) + eps;
 96 | 	const float param_step = lr * (exp_avg_val / denom);
 97 | 
 98 | 	// Save back the new results
 99 | 	param[idx] -= param_step;
100 | 	exp_avg[idx] = exp_avg_val;
101 | 	exp_avg_sq[idx] = exp_avg_sq_val;
102 | }
103 | 
104 | 
105 | 
106 | void unbiased_adam_step(
107 | 	bool sparse,
108 | 	torch::Tensor& param,
109 | 	const torch::Tensor& grad,
110 | 	torch::Tensor& exp_avg,
111 | 	torch::Tensor& exp_avg_sq,
112 | 	const double step,
113 | 	const double lr, const double beta1, const double beta2, const float eps)
114 | {
115 | 	const int numel = param.numel();
116 | 
117 | 	const double bias_correction1 = 1.0 - pow(beta1, step);
118 | 	const double bias_correction2 = 1.0 - pow(beta2, step);
119 | 
120 | 	const double step_size = lr / bias_correction1;
121 | 
122 | 	const double rsqrt_bias_correction2 = rsqrt(bias_correction2);
123 | 
124 | 	auto kernel_func = sparse ? unbiased_adam_step_cuda_kernel<true> :
125 | 								unbiased_adam_step_cuda_kernel<false>;
126 | 
127 | 	kernel_func <<<(numel + 255) / 256, 256>>>(
128 | 		numel,
129 | 		param.contiguous().data_ptr<float>(),
130 | 		grad.contiguous().data_ptr<float>(),
131 | 		exp_avg.contiguous().data_ptr<float>(),
132 | 		exp_avg_sq.contiguous().data_ptr<float>(),
133 | 		step_size, beta1, beta2, rsqrt_bias_correction2, eps
134 | 	);
135 | }
136 | 
137 | void biased_adam_step(
138 | 	bool sparse,
139 | 	torch::Tensor& param,
140 | 	const torch::Tensor& grad,
141 | 	torch::Tensor& exp_avg,
142 | 	torch::Tensor& exp_avg_sq,
143 | 	const float lr, const float beta1, const float beta2, const float eps)
144 | {
145 | 	const int numel = param.numel();
146 | 
147 | 	auto kernel_func = sparse ? biased_adam_step_cuda_kernel<true> :
148 | 								biased_adam_step_cuda_kernel<false>;
149 | 
150 | 	kernel_func <<<(numel + 255) / 256, 256>>>(
151 | 		numel,
152 | 		param.contiguous().data_ptr<float>(),
153 | 		grad.contiguous().data_ptr<float>(),
154 | 		exp_avg.contiguous().data_ptr<float>(),
155 | 		exp_avg_sq.contiguous().data_ptr<float>(),
156 | 		lr, beta1, beta2, eps
157 | 	);
158 | }
159 | 
160 | }
161 | 


--------------------------------------------------------------------------------
/cuda/src/adam_step.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #ifndef ADAM_STEP_H_INCLUDED
12 | #define ADAM_STEP_H_INCLUDED
13 | 
14 | #include <torch/extension.h>
15 | 
16 | namespace ADAM_STEP {
17 | 
18 | // Python interface to run adam optimization step.
19 | void unbiased_adam_step(
20 | 	bool sparse,
21 | 	torch::Tensor& param,
22 | 	const torch::Tensor& grad,
23 | 	torch::Tensor& exp_avg,
24 | 	torch::Tensor& exp_avg_sq,
25 | 	const double step,
26 | 	const double lr, const double beta1, const double beta2, const float eps);
27 | 
28 | void biased_adam_step(
29 | 	bool sparse,
30 | 	torch::Tensor& param,
31 | 	const torch::Tensor& grad,
32 | 	torch::Tensor& exp_avg,
33 | 	torch::Tensor& exp_avg_sq,
34 | 	const float lr, const float beta1, const float beta2, const float eps);
35 | 
36 | }
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/cuda/src/backward.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #ifndef RASTERIZER_BACKWARD_H_INCLUDED
12 | #define RASTERIZER_BACKWARD_H_INCLUDED
13 | 
14 | #include <torch/extension.h>
15 | 
16 | namespace BACKWARD
17 | {
18 | 
19 | // Interface for python to run backward pass of voxel rasterization.
20 | std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>
21 | rasterize_voxels_backward(
22 |     const int R,
23 |     const int n_samp_per_vox,
24 |     const int image_width, const int image_height,
25 |     const float tan_fovx, const float tan_fovy,
26 |     const float cx, const float cy,
27 |     const torch::Tensor& w2c_matrix,
28 |     const torch::Tensor& c2w_matrix,
29 |     const float bg_color,
30 | 
31 |     const torch::Tensor& octree_paths,
32 |     const torch::Tensor& vox_centers,
33 |     const torch::Tensor& vox_lengths,
34 |     const torch::Tensor& geos,
35 |     const torch::Tensor& rgbs,
36 | 
37 |     const torch::Tensor& geomBuffer,
38 |     const torch::Tensor& binningBuffer,
39 |     const torch::Tensor& imageBuffer,
40 |     const torch::Tensor& out_T,
41 | 
42 |     const torch::Tensor& dL_dout_color,
43 |     const torch::Tensor& dL_dout_depth,
44 |     const torch::Tensor& dL_dout_normal,
45 |     const torch::Tensor& dL_dout_T,
46 | 
47 |     const float lambda_R_concen,
48 |     const torch::Tensor& gt_color,
49 |     const float lambda_ascending,
50 |     const float lambda_dist,
51 |     const bool need_depth,
52 |     const bool need_normal,
53 |     const torch::Tensor& out_D,
54 |     const torch::Tensor& out_N,
55 | 
56 |     const bool debug);
57 | 
58 | }
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/cuda/src/config.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #ifndef RASTERIZER_CONFIG_H_INCLUDED
12 | #define RASTERIZER_CONFIG_H_INCLUDED
13 | 
14 | #define BLOCK_X 16
15 | #define BLOCK_Y 16
16 | #define MAX_NUM_LEVELS 16
17 | #define MAX_ALPHA 0.99999f
18 | #define MIN_ALPHA 0.00001f
19 | #define EARLY_STOP_T 0.0001f
20 | 
21 | #define STEP_SZ_SCALE 100.f
22 | 
23 | #define MAX_N_SAMP 3
24 | 
25 | // Below are the derived term from above
26 | #define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
27 | #define NUM_BIT_ORDER_RANK (3 * MAX_NUM_LEVELS)
28 | #define NUM_BIT_TILE_ID (64 - NUM_BIT_ORDER_RANK)
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/cuda/src/forward.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #ifndef RASTERIZER_FORWARD_H_INCLUDED
12 | #define RASTERIZER_FORWARD_H_INCLUDED
13 | 
14 | #include <torch/extension.h>
15 | 
16 | namespace FORWARD {
17 | 
18 | // Interface for python to run forward rasterization.
19 | std::tuple<int, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
20 | rasterize_voxels(
21 |     const int n_samp_per_vox,
22 |     const int image_width, const int image_height,
23 |     const float tan_fovx, const float tan_fovy,
24 |     const float cx, const float cy,
25 |     const torch::Tensor& w2c_matrix,
26 |     const torch::Tensor& c2w_matrix,
27 |     const float bg_color,
28 |     const bool need_depth,
29 |     const bool need_distortion,
30 |     const bool need_normal,
31 |     const bool track_max_w,
32 | 
33 |     const torch::Tensor& octree_paths,
34 |     const torch::Tensor& vox_centers,
35 |     const torch::Tensor& vox_lengths,
36 |     const torch::Tensor& geos,
37 |     const torch::Tensor& rgbs,
38 | 
39 |     const torch::Tensor& geomBuffer,
40 | 
41 |     const bool debug);
42 | 
43 | }
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/cuda/src/geo_params_gather.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #ifndef GEO_PARAMS_GATHER_H_INCLUDED
12 | #define GEO_PARAMS_GATHER_H_INCLUDED
13 | 
14 | #include <torch/extension.h>
15 | 
16 | namespace GEO_PARAMS_GATHER {
17 | 
18 | // Python interface for gather grid points value into each voxel.
19 | torch::Tensor gather_triinterp_geo_params(
20 |     const torch::Tensor& vox_key,
21 |     const torch::Tensor& care_idx,
22 |     const torch::Tensor& grid_pts);
23 | 
24 | torch::Tensor gather_triinterp_geo_params_bw(
25 |     const torch::Tensor& vox_key,
26 |     const torch::Tensor& care_idx,
27 |     const int num_grid_pts,
28 |     const torch::Tensor& dL_dgeo_params);
29 | 
30 | torch::Tensor gather_triinterp_feat_params(
31 |     const torch::Tensor& vox_key,
32 |     const torch::Tensor& care_idx,
33 |     const torch::Tensor& grid_pts);
34 | 
35 | torch::Tensor gather_triinterp_feat_params_bw(
36 |     const torch::Tensor& vox_key,
37 |     const torch::Tensor& care_idx,
38 |     const int num_grid_pts,
39 |     const torch::Tensor& dL_dfeat_params);
40 | 
41 | }
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/cuda/src/preprocess.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #ifndef RASTERIZER_PREPROCESS_H_INCLUDED
12 | #define RASTERIZER_PREPROCESS_H_INCLUDED
13 | 
14 | #include <torch/extension.h>
15 | 
16 | namespace PREPROCESS {
17 | 
18 | // Interface for python to find the voxel to render and compute some init values.
19 | std::tuple<torch::Tensor, torch::Tensor>
20 | rasterize_preprocess(
21 |     const int image_width, const int image_height,
22 |     const float tan_fovx, const float tan_fovy,
23 |     const float cx, const float cy,
24 |     const torch::Tensor& w2c_matrix,
25 |     const torch::Tensor& c2w_matrix,
26 |     const float near,
27 | 
28 |     const torch::Tensor& octree_paths,
29 |     const torch::Tensor& vox_centers,
30 |     const torch::Tensor& vox_lengths,
31 | 
32 |     const bool debug);
33 | 
34 | }
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/cuda/src/raster_state.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2023, Inria
 3 |  * GRAPHDECO research group, https://team.inria.fr/graphdeco
 4 |  * All rights reserved.
 5 |  *
 6 |  * This software is free for non-commercial, research and evaluation use 
 7 |  * under the terms of the LICENSE.md file.
 8 |  *
 9 |  * For inquiries contact  george.drettakis@inria.fr
10 |  */
11 | 
12 | /*************************************************************************
13 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
14 | 
15 | NVIDIA CORPORATION and its licensors retain all intellectual property
16 | and proprietary rights in and to this software, related documentation
17 | and any modifications thereto.  Any use, reproduction, disclosure or
18 | distribution of this software and related documentation without an express
19 | license agreement from NVIDIA CORPORATION is strictly prohibited.
20 | *************************************************************************/
21 | 
22 | #ifndef RASTER_STATE_H_INCLUDED
23 | #define RASTER_STATE_H_INCLUDED
24 | 
25 | #include <cuda_runtime.h>
26 | #include <torch/extension.h>
27 | 
28 | namespace RASTER_STATE {
29 | 
30 | std::function<char*(size_t N)> resizeFunctional(torch::Tensor& t);
31 | 
32 | template <typename T>
33 | static void obtain(char*& chunk, T*& ptr, std::size_t count, std::size_t alignment);
34 | 
35 | template<typename T>
36 | size_t required(size_t P);
37 | 
38 | template<typename T>
39 | size_t required(size_t P, size_t Q);
40 | 
41 | struct GeometryState
42 | {
43 |     // Voxel duplication related variables.
44 |     // A voxel is duplicated by the # of touched tile times the # of camera quadrants.
45 |     // We need to calculate the prefix sum (scan) for organizing the BinningState.
46 |     uint32_t* n_duplicates; // <==> tiles_touched
47 |     uint32_t* n_duplicates_scan; // <==> point_offsets;
48 |     size_t scan_size;
49 |     char* scanning_temp_space;
50 |     uint2* bboxes;  // The bbox region enclosing a projected voxel.
51 | 
52 |     // Voxel sorting related variables.
53 |     // uint64_t* order_ranks; // <=> float* depths;  // The ranking of the rendering order.
54 |     uint32_t* cam_quadrant_bitsets;  // The camera quadrants a voxel can reach.
55 | 
56 |     static GeometryState fromChunk(char*& chunk, size_t P);
57 | };
58 | 
59 | struct ImageState
60 | {
61 |     uint2* ranges;
62 |     uint32_t* tile_last;
63 |     uint32_t* n_contrib;
64 | 
65 |     static ImageState fromChunk(char*& chunk, size_t N, size_t n_tiles);
66 | };
67 | 
68 | struct BinningState
69 | {
70 |     size_t sorting_size;
71 |     uint64_t* vox_list_keys_unsorted;
72 |     uint64_t* vox_list_keys;
73 |     uint32_t* vox_list_unsorted;
74 |     uint32_t* vox_list;
75 |     char* list_sorting_space;
76 | 
77 |     static BinningState fromChunk(char*& chunk, size_t P);
78 | };
79 | 
80 | std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>
81 | unpack_ImageState(
82 |     const int image_width, const int image_height,
83 |     const torch::Tensor& imageBuffer);
84 | 
85 | torch::Tensor filter_geomState(
86 |     const int ori_P,
87 |     const torch::Tensor& indices,
88 |     const torch::Tensor& geomState);
89 | 
90 | }
91 | 
92 | #endif


--------------------------------------------------------------------------------
/cuda/src/sh_compute.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #ifndef SH_COMPUTE_H_INCLUDED
12 | #define SH_COMPUTE_H_INCLUDED
13 | 
14 | #include <torch/extension.h>
15 | 
16 | namespace SH_COMPUTE {
17 | 
18 | // Python interface for spherical harmonic computation.
19 | torch::Tensor sh_compute(
20 |     const int D,
21 |     const torch::Tensor& idx,
22 |     const torch::Tensor& vox_centers,
23 |     const torch::Tensor& cam_pos,
24 |     const torch::Tensor& sh0,
25 |     const torch::Tensor& shs);
26 | 
27 | std::tuple<torch::Tensor, torch::Tensor> sh_compute_bw(
28 |     const int D, const int M,
29 |     const torch::Tensor& idx,
30 |     const torch::Tensor& vox_centers,
31 |     const torch::Tensor& cam_pos,
32 |     const torch::Tensor& rgbs,
33 |     const torch::Tensor& dL_drgbs);
34 | 
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/cuda/src/tv_compute.cu:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  3 | 
  4 | NVIDIA CORPORATION and its licensors retain all intellectual property
  5 | and proprietary rights in and to this software, related documentation
  6 | and any modifications thereto.  Any use, reproduction, disclosure or
  7 | distribution of this software and related documentation without an express
  8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
  9 | *************************************************************************/
 10 | 
 11 | #include "tv_compute.h"
 12 | #include "auxiliary.h"
 13 | 
 14 | #include <cuda.h>
 15 | #include <cuda_runtime.h>
 16 | 
 17 | #include <cooperative_groups.h>
 18 | namespace cg = cooperative_groups;
 19 | 
 20 | namespace TV_COMPUTE {
 21 | 
 22 | template <bool no_tv_s, bool tv_sparse>
 23 | __global__ void total_variation_bw_cuda(
 24 |     const int N, const int C, const int NC,
 25 |     const float* __restrict__ grid_pts,
 26 |     const int64_t* __restrict__ vox_key,
 27 |     const float weight,
 28 |     const float* __restrict__ vox_size_inv,
 29 |     float* __restrict__ grid_pts_grad)
 30 | {
 31 |     auto idx = cg::this_grid().thread_rank();
 32 |     if (idx >= NC)
 33 |         return;
 34 |     const int iN = idx / C;
 35 |     const int iC = idx % C;
 36 | 
 37 |     // Load from global memory.
 38 |     int i_book[8];
 39 |     #pragma unroll
 40 |     for (int i=0, k=iN*8; i<8; ++i, ++k)
 41 |         i_book[i] = vox_key[k];
 42 | 
 43 |     if (tv_sparse)
 44 |     {
 45 |         bool valid = false;
 46 |         for (int i=0; i<8; ++i)
 47 |             valid |= (grid_pts_grad[i_book[i] * C + iC] != 0.f);
 48 |         if (!valid)
 49 |             return;
 50 |     }
 51 | 
 52 |     float vlst[8];
 53 |     #pragma unroll
 54 |     for (int i=0; i<8; ++i)
 55 |         vlst[i] = grid_pts[i_book[i] * C + iC];
 56 | 
 57 |     float w = weight;
 58 |     if (!no_tv_s)
 59 |         w *= 0.01f * vox_size_inv[iN];
 60 | 
 61 |     // Compute gradient wrt total variation loss
 62 |     int glst[8] = {0};
 63 |     #pragma unroll
 64 |     for (int i=0; i<8; ++i)
 65 |     {
 66 |         glst[i] += (vlst[i] > vlst[i^0b001]) * 2 - 1;
 67 |         glst[i] += (vlst[i] > vlst[i^0b010]) * 2 - 1;
 68 |         glst[i] += (vlst[i] > vlst[i^0b100]) * 2 - 1;
 69 |     }
 70 | 
 71 |     float dtv_dgrid_pts[8];
 72 |     #pragma unroll
 73 |     for (int i=0; i<8; ++i)
 74 |         dtv_dgrid_pts[i] = w * ((float)glst[i]);
 75 | 
 76 |     // Write back
 77 |     #pragma unroll
 78 |     for (int i=0; i<8; ++i)
 79 |         atomicAdd(grid_pts_grad + i_book[i] * C + iC, dtv_dgrid_pts[i]);
 80 | }
 81 | 
 82 | 
 83 | // Python interface to directly write the gradient of tv loss.
 84 | void total_variation_bw(
 85 |     const torch::Tensor& grid_pts,
 86 |     const torch::Tensor& vox_key,
 87 |     const float weight,
 88 |     const torch::Tensor& vox_size_inv,
 89 |     const bool no_tv_s,
 90 |     const bool tv_sparse,
 91 |     const torch::Tensor& grid_pts_grad)
 92 | {
 93 |     const int N = vox_key.size(0);
 94 |     const int C = grid_pts.size(1);
 95 |     const int NC = N * C;
 96 | 
 97 |     auto tv_kernel =
 98 |         (no_tv_s & tv_sparse) ? total_variation_bw_cuda<true, true>   :
 99 |         (no_tv_s)             ? total_variation_bw_cuda<true, false>  :
100 |         (tv_sparse)           ? total_variation_bw_cuda<false, true>  :
101 |                                 total_variation_bw_cuda<false, false> ;
102 | 
103 |     if (N > 0)
104 |         tv_kernel <<<(NC + 255) / 256, 256>>> (
105 |             N, C, NC,
106 |             grid_pts.contiguous().data_ptr<float>(),
107 |             vox_key.contiguous().data_ptr<int64_t>(),
108 |             weight,
109 |             vox_size_inv.contiguous().data_ptr<float>(),
110 |             grid_pts_grad.contiguous().data_ptr<float>());
111 | }
112 | 
113 | }
114 | 


--------------------------------------------------------------------------------
/cuda/src/tv_compute.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #ifndef TV_COMPUTE_H_INCLUDED
12 | #define TV_COMPUTE_H_INCLUDED
13 | 
14 | #include <torch/extension.h>
15 | 
16 | namespace TV_COMPUTE {
17 | 
18 | // Python interface to directly write the gradient of tv loss.
19 | void total_variation_bw(
20 |     const torch::Tensor& grid_pts,
21 |     const torch::Tensor& vox_key,
22 |     const float weight,
23 |     const torch::Tensor& vox_size_inv,
24 |     const bool no_tv_s,
25 |     const bool tv_sparse,
26 |     const torch::Tensor& grid_pts_grad);
27 | 
28 | }
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/cuda/src/utils.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 3 | 
 4 | NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | and proprietary rights in and to this software, related documentation
 6 | and any modifications thereto.  Any use, reproduction, disclosure or
 7 | distribution of this software and related documentation without an express
 8 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | *************************************************************************/
10 | 
11 | #ifndef UTILS_H_INCLUDED
12 | #define UTILS_H_INCLUDED
13 | 
14 | #include <torch/extension.h>
15 | 
16 | namespace UTILS {
17 | 
18 | torch::Tensor is_in_cone(
19 |     const float tanfovx,
20 |     const float tanfovy,
21 |     const float near,
22 |     const torch::Tensor& w2c_matrix,
23 |     const torch::Tensor& pts);
24 | 
25 | torch::Tensor compute_rd(
26 |     const int width, const int height,
27 |     const float cx, const float cy,
28 |     const float tanfovx, const float tanfovy,
29 |     const torch::Tensor& c2w_matrix);
30 | 
31 | torch::Tensor depth2pts(
32 |     const int width, const int height,
33 |     const float cx, const float cy,
34 |     const float tanfovx, const float tanfovy,
35 |     const torch::Tensor& c2w_matrix,
36 |     const torch::Tensor& depth);
37 | 
38 | torch::Tensor voxel_order_rank(
39 |     const torch::Tensor& octree_paths);
40 | 
41 | torch::Tensor ijk_2_octpath(const torch::Tensor& ijk, const torch::Tensor& octlevel);
42 | 
43 | torch::Tensor octpath_2_ijk(const torch::Tensor& octpath, const torch::Tensor& octlevel);
44 | 
45 | }
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/cuda/svraster_cuda/__init__.py:
--------------------------------------------------------------------------------
1 | from . import meta
2 | from . import utils
3 | from . import renderer
4 | from . import sparse_adam
5 | from . import grid_loss_bw
6 | 


--------------------------------------------------------------------------------
/cuda/svraster_cuda/grid_loss_bw.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import torch
10 | from . import _C
11 | 
12 | 
13 | def total_variation(grid_pts, vox_key, weight, vox_size_inv, no_tv_s, tv_sparse, grid_pts_grad):
14 |     assert grid_pts.shape == grid_pts_grad.shape
15 |     assert len(vox_key.shape) == 2 and vox_key.shape[1] == 8
16 |     assert vox_key.shape[0] == vox_size_inv.numel()
17 |     _C.total_variation_bw(grid_pts, vox_key, weight, vox_size_inv, no_tv_s, tv_sparse, grid_pts_grad)
18 | 


--------------------------------------------------------------------------------
/cuda/svraster_cuda/meta.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import torch
10 | from . import _C
11 | 
12 | 
13 | MAX_NUM_LEVELS = _C.MAX_NUM_LEVELS
14 | STEP_SZ_SCALE = _C.STEP_SZ_SCALE
15 | 


--------------------------------------------------------------------------------
/cuda/svraster_cuda/sparse_adam.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import torch
10 | from . import _C
11 | 
12 | 
13 | class SparseAdam(torch.optim.Optimizer):
14 | 
15 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-15, biased=False, sparse=False):
16 |         if not 0.0 <= lr:
17 |             raise ValueError("Invalid learning rate: {}".format(lr))
18 |         if not 0.0 <= eps:
19 |             raise ValueError("Invalid epsilon value: {}".format(eps))
20 |         if not 0.0 <= betas[0] < 1.0:
21 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
22 |         if not 0.0 <= betas[1] < 1.0:
23 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
24 |         defaults = dict(lr=lr, betas=betas, eps=eps)
25 |         super(SparseAdam, self).__init__(params, defaults)
26 |         self.biased = biased
27 |         self.sparse = sparse
28 | 
29 |     def __setstate__(self, state):
30 |         super(SparseAdam, self).__setstate__(state)
31 | 
32 |     @torch.no_grad()
33 |     def step(self):
34 | 
35 |         for group in self.param_groups:
36 |             lr = group['lr']
37 |             beta1, beta2 = group['betas']
38 |             eps = group['eps']
39 | 
40 |             for param in group['params']:
41 |                 if param.grad is not None:
42 |                     state = self.state[param]
43 |                     # Lazy state initialization
44 |                     if len(state) == 0:
45 |                         # Number of time each param is visited
46 |                         state['step'] = 0
47 |                         # Exponential moving average of gradient values
48 |                         state['exp_avg'] = torch.zeros_like(param, memory_format=torch.preserve_format)
49 |                         # Exponential moving average of squared gradient values
50 |                         state['exp_avg_sq'] = torch.zeros_like(param, memory_format=torch.preserve_format)
51 | 
52 |                     state['step'] += 1
53 | 
54 |                     if self.biased:
55 |                         _C.biased_adam_step(
56 |                             self.sparse,
57 |                             param,
58 |                             param.grad,
59 |                             state['exp_avg'],
60 |                             state['exp_avg_sq'],
61 |                             lr, beta1, beta2, eps
62 |                         )
63 |                     else:
64 |                         _C.unbiased_adam_step(
65 |                             self.sparse,
66 |                             param,
67 |                             param.grad,
68 |                             state['exp_avg'],
69 |                             state['exp_avg_sq'],
70 |                             state['step'],
71 |                             lr, beta1, beta2, eps
72 |                         )
73 | 


--------------------------------------------------------------------------------
/cuda/svraster_cuda/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import torch
10 | from . import _C
11 | 
12 | 
13 | def voxel_order_rank(octree_paths):
14 |     # Compute the eight possible voxel rendering orders.
15 |     order_ranks = _C.voxel_order_rank(octree_paths)
16 |     return order_ranks
17 | 
18 | 
19 | def is_in_cone(pts, cam):
20 |     assert torch.is_tensor(pts)
21 |     assert pts.device == cam.w2c.device
22 |     assert len(pts.shape) == 2
23 |     assert pts.shape[1] == 3
24 |     return _C.is_in_cone(
25 |         cam.tanfovx,
26 |         cam.tanfovy,
27 |         cam.near,
28 |         cam.w2c,
29 |         pts)
30 | 
31 | 
32 | def compute_rd(width, height, cx, cy, tanfovx, tanfovy, c2w_matrix):
33 |     assert torch.is_tensor(c2w_matrix)
34 |     return _C.compute_rd(width, height, cx, cy, tanfovx, tanfovy, c2w_matrix)
35 | 
36 | 
37 | def depth2pts(width, height, cx, cy, tanfovx, tanfovy, c2w_matrix, depth):
38 |     assert torch.is_tensor(c2w_matrix)
39 |     assert depth.device == c2w_matrix.device
40 |     assert depth.numel() == width * height
41 |     return _C.depth2pts(width, height, cx, cy, tanfovx, tanfovy, c2w_matrix, depth)
42 | 
43 | 
44 | def ijk_2_octpath(ijk, octlevel):
45 |     assert torch.is_tensor(ijk) and torch.is_tensor(octlevel)
46 |     assert len(ijk.shape) == 2 and ijk.shape[1] == 3
47 |     assert ijk.numel() == octlevel.numel() * 3
48 |     assert ijk.dtype == torch.int64
49 |     assert octlevel.dtype == torch.int8
50 |     return _C.ijk_2_octpath(ijk, octlevel)
51 | 
52 | 
53 | def octpath_2_ijk(octpath, octlevel):
54 |     assert torch.is_tensor(octpath) and torch.is_tensor(octlevel)
55 |     assert octpath.numel() == octlevel.numel()
56 |     assert octpath.dtype == torch.int64
57 |     assert octlevel.dtype == torch.int8
58 |     return _C.octpath_2_ijk(octpath, octlevel)
59 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 13 | #
 14 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 15 | # and proprietary rights in and to this software, related documentation
 16 | # and any modifications thereto.  Any use, reproduction, disclosure or
 17 | # distribution of this software and related documentation without an express
 18 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 19 | 
 20 | 
 21 | import os
 22 | import json
 23 | import numpy as np
 24 | from PIL import Image
 25 | from tqdm import trange
 26 | from pathlib import Path
 27 | 
 28 | import torch
 29 | 
 30 | from src.utils.image_utils import im_pil2tensor
 31 | from src.utils.loss_utils import psnr_score, ssim_score, lpips_loss, correct_lpips_loss
 32 | 
 33 | 
 34 | def read_pairs(renders_dir, gt_dir):
 35 |     renders = []
 36 |     gts = []
 37 |     image_names = []
 38 |     for fname in os.listdir(renders_dir):
 39 |         render = Image.open(renders_dir / fname)
 40 |         gt = Image.open(gt_dir / fname)
 41 |         renders.append(im_pil2tensor(render).unsqueeze(0))
 42 |         gts.append(im_pil2tensor(gt).unsqueeze(0))
 43 |         image_names.append(fname)
 44 |     return renders, gts, image_names
 45 | 
 46 | 
 47 | def evaluate(model_paths, split):
 48 |     full_dict = {}
 49 |     per_view_dict = {}
 50 | 
 51 |     for scene_dir in model_paths:
 52 |         print("Scene:", scene_dir)
 53 |         full_dict[scene_dir] = {}
 54 |         per_view_dict[scene_dir] = {}
 55 | 
 56 |         test_dir = scene_dir / split
 57 | 
 58 |         for method in os.listdir(test_dir):
 59 |             method_dir = test_dir / method
 60 |             if not method_dir.is_dir():
 61 |                 continue
 62 |             print("Method:", method)
 63 | 
 64 |             renders, gts, image_names = read_pairs(
 65 |                 renders_dir=method_dir / "renders",
 66 |                 gt_dir=method_dir/ "gt")
 67 | 
 68 |             ssims = []
 69 |             psnrs = []
 70 |             lpipss = []
 71 |             correct_lpipss = []
 72 | 
 73 |             for idx in trange(len(renders)):
 74 |                 im_render = renders[idx].cuda()
 75 |                 im_gt = gts[idx].cuda()
 76 |                 ssims.append(ssim_score(im_render, im_gt).item())
 77 |                 psnrs.append(psnr_score(im_render, im_gt).item())
 78 |                 lpipss.append(lpips_loss(im_render, im_gt).item())
 79 |                 correct_lpipss.append(correct_lpips_loss(im_render, im_gt).item())
 80 |                 del im_render, im_gt
 81 |                 torch.cuda.empty_cache()
 82 | 
 83 |             avg_ssim = np.mean(ssims)
 84 |             avg_psnr = np.mean(psnrs)
 85 |             avg_lpips = np.mean(lpipss)
 86 |             avg_correct_lpips = np.mean(correct_lpipss)
 87 | 
 88 |             print(f"  SSIM : {avg_ssim:>12.7f}")
 89 |             print(f"  PSNR : {avg_psnr:>12.7f}")
 90 |             print(f"  LPIPS: {avg_lpips:>12.7f}")
 91 |             print(f"  LPIPS: {avg_correct_lpips:>12.7f} (corrected)")
 92 |             print("")
 93 | 
 94 |             full_dict[scene_dir][method] = {
 95 |                 "SSIM": avg_ssim,
 96 |                 "PSNR": avg_psnr,
 97 |                 "LPIPS": avg_lpips,
 98 |                 "LPIPS-corrected": avg_correct_lpips,
 99 |             }
100 |             per_view_dict[scene_dir][method] = {
101 |                 "SSIM": {name: ssim for ssim, name in zip(ssims, image_names)},
102 |                 "PSNR": {name: psnr for psnr, name in zip(psnrs, image_names)},
103 |                 "LPIPS": {name: lp for lp, name in zip(lpipss, image_names)},
104 |                 "LPIPS-corrected": {name: lp for lp, name in zip(correct_lpipss, image_names)},
105 |             }
106 | 
107 |         with open(scene_dir / "results.json", 'w') as f:
108 |             json.dump(full_dict[scene_dir], f, indent=True)
109 |         with open(scene_dir / "per_view.json", 'w') as f:
110 |             json.dump(per_view_dict[scene_dir], f, indent=True)
111 |         print("Saved to", scene_dir / "results.json")
112 |         print("Saved to", scene_dir / "per_view.json")
113 | 
114 | 
115 | if __name__ == "__main__":
116 | 
117 |     import argparse
118 |     parser = argparse.ArgumentParser(description="Quantitative evaluation of the rendered images.")
119 |     parser.add_argument('--split', type=str, default="test")
120 |     parser.add_argument('model_paths', nargs=argparse.REMAINDER, type=Path)
121 |     args = parser.parse_args()
122 | 
123 |     assert len(args.model_paths) > 0
124 |     evaluate(args.model_paths, args.split)
125 | 


--------------------------------------------------------------------------------
/render_by_trace_sdf.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (C) 2023, Inria
  3 | # GRAPHDECO research group, https://team.inria.fr/graphdeco
  4 | # All rights reserved.
  5 | #
  6 | # This software is free for non-commercial, research and evaluation use 
  7 | # under the terms of the LICENSE.md file.
  8 | #
  9 | # For inquiries contact  george.drettakis@inria.fr
 10 | #
 11 | 
 12 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 13 | #
 14 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 15 | # and proprietary rights in and to this software, related documentation
 16 | # and any modifications thereto.  Any use, reproduction, disclosure or
 17 | # distribution of this software and related documentation without an express
 18 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 19 | 
 20 | import os
 21 | import time
 22 | import numpy as np
 23 | from tqdm import tqdm
 24 | from os import makedirs
 25 | import imageio
 26 | 
 27 | import torch
 28 | 
 29 | from src.config import cfg, update_argparser, update_config
 30 | 
 31 | from src.dataloader.data_pack import DataPack
 32 | from src.sparse_voxel_model import SparseVoxelModel
 33 | from src.utils.image_utils import im_tensor2np, viz_tensordepth
 34 | from src.utils.fuser_utils import Fuser
 35 | 
 36 | 
 37 | @torch.no_grad()
 38 | def render_set(name, iteration, suffix, args, views, voxel_model):
 39 | 
 40 |     render_path = os.path.join(args.model_path, name, f"ours_{iteration}{suffix}_trace_by_sdf", "renders")
 41 |     makedirs(render_path, exist_ok=True)
 42 |     print(f'render_path: {render_path}')
 43 |     print(f'ss            =: {voxel_model.ss}')
 44 |     print(f'vox_geo_mode  =: {voxel_model.vox_geo_mode}')
 45 |     print(f'density_mode  =: {voxel_model.density_mode}')
 46 | 
 47 |     if args.eval_fps:
 48 |         torch.cuda.empty_cache()
 49 |         torch.cuda.reset_peak_memory_stats()
 50 | 
 51 |     if args.eval_fps:
 52 |         # Warmup
 53 |         voxel_model.render_trace_sdf(views[0])
 54 | 
 55 |     eps_time = time.perf_counter()
 56 |     psnr_lst = []
 57 |     for idx, view in enumerate(tqdm(views, desc="Rendering progress")):
 58 |         hit_depth, hit_vox_id = voxel_model.render_trace_sdf(view)
 59 |         if not args.eval_fps:
 60 |             rendering = voxel_model._sh0[hit_vox_id].moveaxis(-1, 0)
 61 |             rendering *= (hit_vox_id != -1)
 62 |             gt = view.image.cuda()
 63 |             mse = (rendering.clip(0,1) - gt.clip(0,1)).square().mean()
 64 |             psnr = -10 * torch.log10(mse)
 65 |             psnr_lst.append(psnr.item())
 66 |             fname = view.image_name
 67 | 
 68 |             # RGB
 69 |             imageio.imwrite(
 70 |                 os.path.join(render_path, fname + (".jpg" if args.use_jpg else ".png")),
 71 |                 im_tensor2np(rendering)
 72 |             )
 73 |     torch.cuda.synchronize()
 74 |     eps_time = time.perf_counter() - eps_time
 75 |     peak_mem = torch.cuda.memory_stats()["allocated_bytes.all.peak"] / 1024 ** 3
 76 |     if args.eval_fps:
 77 |         print(f'Eps time: {eps_time:.3f} sec')
 78 |         print(f"Peak mem: {peak_mem:.2f} GB")
 79 |         print(f'FPS     : {len(views)/eps_time:.0f}')
 80 |         outtxt = os.path.join(args.model_path, name, "ours_{}{}.txt".format(iteration, suffix))
 81 |         with open(outtxt, 'w') as f:
 82 |             f.write(f"n={len(views):.6f}\n")
 83 |             f.write(f"eps={eps_time:.6f}\n")
 84 |             f.write(f"peak_mem={peak_mem:.2f}\n")
 85 |             f.write(f"fps={len(views)/eps_time:.6f}\n")
 86 |     else:
 87 |         print('PSNR:', np.mean(psnr_lst))
 88 | 
 89 | 
 90 | if __name__ == "__main__":
 91 |     # Parse arguments
 92 |     import argparse
 93 |     parser = argparse.ArgumentParser(
 94 |         description="Sparse voxels raster rendering.")
 95 |     parser.add_argument('model_path')
 96 |     parser.add_argument("--iteration", default=-1, type=int)
 97 |     parser.add_argument("--skip_train", action="store_true")
 98 |     parser.add_argument("--skip_test", action="store_true")
 99 |     parser.add_argument("--eval_fps", action="store_true")
100 |     parser.add_argument("--clear_res_down", action="store_true")
101 |     parser.add_argument("--suffix", default="", type=str)
102 |     parser.add_argument("--use_jpg", action="store_true")
103 |     parser.add_argument("--overwrite_ss", default=None, type=float)
104 |     parser.add_argument("--overwrite_vox_geo_mode", default=None)
105 |     args = parser.parse_args()
106 |     print("Rendering " + args.model_path)
107 | 
108 |     # Load config
109 |     update_config(os.path.join(args.model_path, 'config.yaml'))
110 | 
111 |     if args.clear_res_down:
112 |         cfg.data.res_downscale = 0
113 |         cfg.data.res_width = 0
114 | 
115 |     # Load data
116 |     data_pack = DataPack(cfg.data, cfg.model.white_background, camera_params_only=False)
117 | 
118 |     # Load model
119 |     voxel_model = SparseVoxelModel(cfg.model)
120 |     loaded_iter = voxel_model.load_iteration(args.model_path, args.iteration)
121 | 
122 |     # Output path suffix
123 |     suffix = args.suffix
124 |     if not args.suffix:
125 |         if cfg.data.res_downscale > 0:
126 |             suffix += f"_r{cfg.data.res_downscale}"
127 |         if cfg.data.res_width > 0:
128 |             suffix += f"_w{cfg.data.res_width}"
129 | 
130 |     if args.overwrite_ss:
131 |         voxel_model.ss = args.overwrite_ss
132 |         if not args.suffix:
133 |             suffix += f"_ss{args.overwrite_ss:.2f}"
134 |     
135 |     if args.overwrite_vox_geo_mode:
136 |         voxel_model.vox_geo_mode = args.overwrite_vox_geo_mode
137 |         if not args.suffix:
138 |             suffix += f"_{args.overwrite_vox_geo_mode}"
139 | 
140 |     # Fuse sdf and rgb
141 |     volume = Fuser(
142 |         xyz=voxel_model.grid_pts_xyz,
143 |         bandwidth=voxel_model.vox_size.min().item() * 20,
144 |         # bandwidth=torch.zeros([len(voxel_model.grid_pts_xyz)], dtype=torch.float32, device="cuda").index_reduce_(
145 |         #     dim=0,
146 |         #     index=voxel_model.vox_key.flatten(),
147 |         #     source=voxel_model.vox_size.repeat(1, 8).flatten(),
148 |         #     reduce="amax") * 3,
149 |         use_trunc=True,
150 |         fuse_tsdf=True,
151 |         feat_dim=3)
152 | 
153 |     for cam in tqdm(data_pack.get_train_cameras()):
154 |         median_depth, median_idx = voxel_model.render_median(cam)
155 |         volume.integrate(cam=cam, feat=cam.image.cuda(), depth=median_depth)
156 | 
157 |     voxel_model._shs.data.fill_(0)
158 |     voxel_model._sh0.data.copy_(
159 |         volume.feature.nan_to_num_()[voxel_model.vox_key].mean(dim=1))
160 |     voxel_model._geo_grid_pts.data.copy_(
161 |         volume.tsdf.nan_to_num_())
162 | 
163 |     del volume
164 |     torch.cuda.empty_cache()
165 | 
166 |     # Start rendering
167 |     voxel_model.freeze_vox_geo()
168 | 
169 |     if not args.skip_train:
170 |         render_set(
171 |             "train", loaded_iter, suffix, args,
172 |             data_pack.get_train_cameras(), voxel_model)
173 | 
174 |     if not args.skip_test:
175 |         render_set(
176 |             "test", loaded_iter, suffix, args,
177 |             data_pack.get_test_cameras(), voxel_model)
178 | 


--------------------------------------------------------------------------------
/render_fly_through.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import time
 11 | import numpy as np
 12 | from tqdm import tqdm
 13 | from os import makedirs
 14 | import imageio
 15 | 
 16 | import torch
 17 | 
 18 | from src.config import cfg, update_argparser, update_config
 19 | 
 20 | from src.dataloader.data_pack import DataPack
 21 | from src.sparse_voxel_model import SparseVoxelModel
 22 | from src.cameras import MiniCam
 23 | from src.utils.image_utils import im_tensor2np, viz_tensordepth
 24 | from src.utils.camera_utils import interpolate_poses
 25 | 
 26 | 
 27 | if __name__ == "__main__":
 28 |     # Parse arguments
 29 |     import argparse
 30 |     parser = argparse.ArgumentParser(
 31 |         description="Sparse voxels raster rendering.")
 32 |     parser.add_argument('model_path')
 33 |     parser.add_argument("--iteration", default=-1, type=int)
 34 |     parser.add_argument("--n_frames", default=300, type=int)
 35 |     parser.add_argument("--save_scale", default=1.0, type=float)
 36 | 
 37 |     # Manually select which frames to interpolate
 38 |     parser.add_argument("--ids", default=[], type=int, nargs='*')
 39 | 
 40 |     # Use farthest point sampling to select key frame
 41 |     parser.add_argument("--starting_id", default=0, type=int)
 42 | 
 43 |     # Other tweaking
 44 |     parser.add_argument("--step_forward", default=0, type=float)
 45 | 
 46 |     args = parser.parse_args()
 47 |     print("Rendering " + args.model_path)
 48 | 
 49 |     # Load config
 50 |     update_config(os.path.join(args.model_path, 'config.yaml'))
 51 | 
 52 |     # Load data
 53 |     data_pack = DataPack(
 54 |         source_path=cfg.data.source_path,
 55 |         image_dir_name=cfg.data.image_dir_name,
 56 |         res_downscale=cfg.data.res_downscale,
 57 |         res_width=cfg.data.res_width,
 58 |         skip_blend_alpha=cfg.data.skip_blend_alpha,
 59 |         alpha_is_white=cfg.model.white_background,
 60 |         data_device=cfg.data.data_device,
 61 |         use_test=cfg.data.eval,
 62 |         test_every=cfg.data.test_every,
 63 |         camera_params_only=True,
 64 |     )
 65 | 
 66 |     # Interpolate poses
 67 |     cams = data_pack.get_train_cameras()
 68 |     if len(args.ids):
 69 |         key_poses = [cams[i].c2w.cpu().numpy() for i in args.ids]
 70 |     else:
 71 |         cam_pos = torch.stack([cam.position for cam in cams])
 72 |         ids = [args.starting_id]
 73 |         for _ in range(3):
 74 |             farthest_id = torch.cdist(cam_pos[ids], cam_pos).amin(0).argmax().item()
 75 |             ids.append(farthest_id)
 76 |         ids[1], ids[2] = ids[2], ids[1]
 77 |         key_poses = [cams[i].c2w.cpu().numpy() for i in ids]
 78 | 
 79 |     if args.step_forward != 0:
 80 |         for i in range(len(key_poses)):
 81 |             lookat = key_poses[i][:3, 2]
 82 |             key_poses[i][:3, 3] += args.step_forward * lookat
 83 | 
 84 |     interp_poses = interpolate_poses(key_poses, n_frame=args.n_frames, periodic=True)
 85 | 
 86 |     # Load model
 87 |     voxel_model = SparseVoxelModel(
 88 |         n_samp_per_vox=cfg.model.n_samp_per_vox,
 89 |         sh_degree=cfg.model.sh_degree,
 90 |         ss=cfg.model.ss,
 91 |         white_background=cfg.model.white_background,
 92 |         black_background=cfg.model.black_background,
 93 |     )
 94 |     loaded_iter = voxel_model.load_iteration(args.model_path, args.iteration)
 95 |     voxel_model.freeze_vox_geo()
 96 | 
 97 |     # Rendering
 98 |     fovx = cams[0].fovx
 99 |     fovy = cams[0].fovy
100 |     width = cams[0].image_width
101 |     height = cams[0].image_height
102 | 
103 |     video = []
104 |     for pose in tqdm(interp_poses, desc="Rendering progress"):
105 | 
106 |         cam = MiniCam(
107 |             c2w=pose,
108 |             fovx=fovx, fovy=fovy,
109 |             width=width, height=height)
110 | 
111 |         with torch.no_grad():
112 |             render_pkg = voxel_model.render(cam)
113 |             rendering = render_pkg['color']
114 | 
115 |         if args.save_scale != 0:
116 |             rendering = torch.nn.functional.interpolate(
117 |                 rendering[None],
118 |                 scale_factor=args.save_scale,
119 |                 mode="bilinear",
120 |                 antialias=True)[0]
121 | 
122 |         video.append(im_tensor2np(rendering))
123 | 
124 |     outpath = os.path.join(args.model_path, "render_fly_through.mp4")
125 |     imageio.mimwrite(outpath, video, fps=30)
126 |     print("Save to", outpath)
127 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | einops
 3 | opencv-python==4.8.0.74
 4 | opencv-contrib-python==4.8.0.74
 5 | 
 6 | yacs
 7 | tqdm
 8 | natsort
 9 | argparse
10 | pillow
11 | imageio
12 | imageio-ffmpeg
13 | scikit-image
14 | pycolmap
15 | 
16 | plyfile
17 | shapely
18 | trimesh==4.0.4
19 | open3d==0.18.0
20 | gpytoolbox
21 | 
22 | lpips
23 | pytorch-msssim
24 | git+https://github.com/rahul-goel/fused-ssim/
25 | 


--------------------------------------------------------------------------------
/scripts/dtu_clean_for_eval.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import os
10 | import cv2
11 | import glob
12 | import trimesh
13 | import numpy as np
14 | from tqdm import trange
15 | 
16 | 
17 | if __name__ == '__main__':
18 |     # Parse arguments
19 |     import argparse
20 |     parser = argparse.ArgumentParser(description="Clean mesh for evaluation.")
21 |     parser.add_argument('data_dir')
22 |     parser.add_argument('mesh_path')
23 |     args = parser.parse_args()
24 | 
25 |     # Read mesh
26 |     mesh = trimesh.load(args.mesh_path)
27 |     print("Loaded mesh:", mesh)
28 | 
29 |     # Start cleaning
30 |     print('Running DTU_clean_mesh_by_mask...')
31 |     verts = np.copy(mesh.vertices[:])
32 |     faces = np.copy(mesh.faces[:])
33 |     cameras = np.load(f'{args.data_dir}/cameras_sphere.npz')
34 |     mask_lis = sorted(glob.glob(f'{args.data_dir}/mask/*.png'))
35 | 
36 |     n_images = len(mask_lis)
37 |     mask = np.ones(len(verts), dtype=bool)
38 |     for i in trange(n_images):
39 |         P = cameras[f'world_mat_{i}']
40 |         pts_image = np.matmul(P[None, :3, :3], verts[:, :, None]).squeeze() + P[None, :3, 3]
41 |         pts_image = pts_image / pts_image[:, 2:]
42 |         pts_image = np.round(pts_image).astype(np.int32) + 1
43 |         mask_image = cv2.imread(mask_lis[i])
44 |         kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (25, 25))
45 |         mask_image = cv2.dilate(mask_image, kernel, iterations=1)
46 |         mask_image = (mask_image[:, :, 0] > 128)
47 |         mask_image = np.concatenate([np.ones([1, 1600]), mask_image, np.ones([1, 1600])], axis=0)
48 |         mask_image = np.concatenate([np.ones([1202, 1]), mask_image, np.ones([1202, 1])], axis=1)
49 |         curr_mask = mask_image[(pts_image[:, 1].clip(0, 1201), pts_image[:, 0].clip(0, 1601))]
50 |         mask &= curr_mask.astype(bool)
51 | 
52 |     print('Valid vertices ratio:', mask.mean())
53 | 
54 |     indexes = np.full(len(verts), -1, dtype=np.int64)
55 |     indexes[np.where(mask)] = np.arange(len(np.where(mask)[0]))
56 | 
57 |     faces_mask = mask[faces[:, 0]] & mask[faces[:, 1]] & mask[faces[:, 2]]
58 |     new_faces = faces[np.where(faces_mask)]
59 |     new_faces[:, 0] = indexes[new_faces[:, 0]]
60 |     new_faces[:, 1] = indexes[new_faces[:, 1]]
61 |     new_faces[:, 2] = indexes[new_faces[:, 2]]
62 |     new_vertices = verts[np.where(mask)]
63 | 
64 |     mesh = trimesh.Trimesh(new_vertices, new_faces)
65 |     try:
66 |         print('Kept only the largest CC')
67 |         meshes = mesh.split(only_watertight=False)
68 |         mesh = meshes[np.argmax([len(mesh.faces) for mesh in meshes])]
69 |     except:
70 |         print('Failed')
71 |     outdir, outfname = os.path.split(args.mesh_path)
72 |     outfname = outfname[:-4] + '_cleaned_for_eval.ply'
73 |     mesh.export(os.path.join(outdir, outfname))
74 | 


--------------------------------------------------------------------------------
/scripts/dtu_eval/Offical_DTU_Dataset:
--------------------------------------------------------------------------------
1 | /ssd/chengs/DTU/Offical_DTU_Dataset


--------------------------------------------------------------------------------
/scripts/dtu_preproc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import subprocess
 11 | import json
 12 | from argparse import ArgumentParser
 13 | import glob
 14 | import numpy as np
 15 | import cv2
 16 | from natsort import natsorted
 17 | import math
 18 | from tqdm import tqdm
 19 | from PIL import Image
 20 | 
 21 | def fov2focal(fov, pixels):
 22 |     return pixels / (2 * math.tan(fov / 2))
 23 | 
 24 | def focal2fov(focal, pixels):
 25 |     return 2*math.atan(pixels/(2*focal))
 26 | 
 27 | def load_K_Rt_from_P(filename, P=None):
 28 |     # This function is borrowed from IDR: https://github.com/lioryariv/idr
 29 |     if P is None:
 30 |         lines = open(filename).read().splitlines()
 31 |         if len(lines) == 4:
 32 |             lines = lines[1:]
 33 |         lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
 34 |         P = np.asarray(lines).astype(np.float32).squeeze()
 35 | 
 36 |     out = cv2.decomposeProjectionMatrix(P)
 37 |     K = out[0]
 38 |     R = out[1]
 39 |     t = out[2]
 40 | 
 41 |     K = K / K[2, 2]
 42 |     intrinsics = np.eye(4)
 43 |     intrinsics[:3, :3] = K
 44 | 
 45 |     pose = np.eye(4, dtype=np.float32)
 46 |     pose[:3, :3] = R.transpose()
 47 |     pose[:3, 3] = (t[:3] / t[3])[:, 0]
 48 | 
 49 |     return intrinsics, pose
 50 | 
 51 | 
 52 | parser = ArgumentParser(description="Training script parameters")
 53 | parser.add_argument('dataset_root')
 54 | args = parser.parse_args()
 55 | 
 56 | for scene in os.listdir(args.dataset_root):
 57 |     scene_path = os.path.join(args.dataset_root, scene)
 58 |     if not os.path.isdir(scene_path) or 'scan' not in scene:
 59 |         continue
 60 |     
 61 |     camera_param = dict(np.load(os.path.join(scene_path, 'cameras_sphere.npz')))
 62 |     images_lis = sorted(glob.glob(os.path.join(scene_path, 'image/*.png')))
 63 | 
 64 |     train = dict(camera_angle_x=0, frames=[])
 65 |     test = dict(camera_angle_x=0, frames=[])
 66 |     for idx, image in enumerate(images_lis):
 67 |         image = os.path.basename(image)
 68 |         stem = os.path.splitext(image)[0]
 69 | 
 70 |         world_mat = camera_param['world_mat_%d' % idx]
 71 |         scale_mat = camera_param['scale_mat_%d' % idx]
 72 | 
 73 |         # scale and decompose
 74 |         P = world_mat @ scale_mat
 75 |         P = P[:3, :4]
 76 |         intrinsic_param, c2w = load_K_Rt_from_P(None, P)
 77 | 
 78 |         fx = float(intrinsic_param[0][0])
 79 |         fy = float(intrinsic_param[1][1])
 80 |         cx = float(intrinsic_param[0][2])
 81 |         cy = float(intrinsic_param[1][2])
 82 |         w, h = Image.open(os.path.join(scene_path, 'image', image)).size
 83 |         camera_angle_x = focal2fov(fx, w)
 84 |         camera_angle_y = focal2fov(fy, h)
 85 | 
 86 |         # To synthetic blender format
 87 |         c2w[:3, 1:3] *= -1
 88 | 
 89 |         frame = {
 90 |             "file_path": 'image/' + stem,
 91 |             "mask_path": f'mask/{int(stem):03d}.png',
 92 |             "camera_angle_x": camera_angle_x,
 93 |             "camera_angle_y": camera_angle_y,
 94 |             "cx_p": cx / w,
 95 |             "cy_p": cy / h,
 96 |             "transform_matrix": c2w.tolist()
 97 |         }
 98 |         if idx % 8 == 0:
 99 |             test['frames'].append(frame)
100 |         else:
101 |             train['frames'].append(frame)
102 | 
103 |     out_train_path = os.path.join(scene_path, 'transforms_train.json')
104 |     out_test_path = os.path.join(scene_path, 'transforms_test.json')
105 |     with open(out_train_path, 'w') as f:
106 |         json.dump(train, f, indent=4)
107 | 
108 |     with open(out_test_path, 'w') as f:
109 |         json.dump(test, f, indent=4)
110 | 
111 |     # Write down scene bound
112 |     out_bound_path = os.path.join(scene_path, 'nerf_normalization.json')
113 |     with open(out_bound_path, 'w') as f:
114 |         json.dump({"center": [0.,0.,0.], "radius": 1.0}, f, indent=4)
115 | 
116 |     np.savetxt(
117 |         os.path.join(scene_path, 'to_world_matrix.txt'),
118 |         camera_param['scale_mat_0'])
119 | 


--------------------------------------------------------------------------------
/scripts/dtu_run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | DATA_ROOT=data/dtu_preproc
10 | PATH_TO_OFFICIAL_DTU="scripts/dtu_eval/Offical_DTU_Dataset/"
11 | 
12 | lanuch_exp() {
13 |     local scene_id="$1"
14 |     shift
15 |     local output_dir="$1"
16 |     shift
17 |     local exp_args="$*"
18 | 
19 |     local scene_name=scan"$scene_id"
20 | 
21 |     python train.py --cfg_files cfg/dtu_mesh.yaml --source_path $DATA_ROOT/dtu_"$scene_name"/ --model_path $output_dir/$scene_name $exp_args
22 |     python render.py $output_dir/$scene_name --skip_test --eval_fps
23 |     python render.py $output_dir/$scene_name --skip_test --rgb_only --use_jpg
24 |     python render_fly_through.py $output_dir/$scene_name/
25 | 
26 |     python extract_mesh.py $output_dir/$scene_name/ --save_gpu --use_vert_color --init_lv 8 --final_lv 10 --mesh_fname mesh_dense
27 | 
28 |     mkdir -p $output_dir/$scene_name/mesh/latest/evaluation
29 |     python scripts/dtu_clean_for_eval.py $DATA_ROOT/dtu_"$scene_name"/ \
30 |             $output_dir/$scene_name/mesh/latest/mesh_dense.ply
31 |     python scripts/dtu_eval/eval.py \
32 |         --data $output_dir/$scene_name/mesh/latest/mesh_dense_cleaned_for_eval.ply \
33 |         --scan $scene_id --dataset_dir $PATH_TO_OFFICIAL_DTU \
34 |         --vis_out_dir $output_dir/$scene_name/mesh/latest/evaluation
35 |     rm -r $output_dir/$scene_name/checkpoints/
36 | }
37 | 
38 | 
39 | for scene in 24 37 40 55 63 65 69 83 97 105 106 110 114 118 122
40 | do
41 |     echo "============ start " $scene " ============"
42 |     lanuch_exp $scene $1 "${@:2}"
43 |     echo "============ end " $scene " ============"
44 | done
45 | 


--------------------------------------------------------------------------------
/scripts/dtu_stat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import os
10 | import subprocess
11 | import json
12 | from argparse import ArgumentParser
13 | import glob
14 | import pandas as pd
15 | 
16 | parser = ArgumentParser(description="Training script parameters")
17 | parser.add_argument('result_root')
18 | args = parser.parse_args()
19 | 
20 | scenes = [
21 |     'scan24', 'scan37', 'scan40', 'scan55', 'scan63', 'scan65', 'scan69', 'scan83', 'scan97', 'scan105', 'scan106', 'scan110', 'scan114', 'scan118', 'scan122'
22 | ]
23 | 
24 | cf = []
25 | tr_time = []
26 | fps = []
27 | n_voxels = []
28 | 
29 | for scene in scenes:
30 |     eval_path = sorted(glob.glob(f'{args.result_root}/{scene}/test_stat/iter*.json'))
31 |     if len(eval_path):
32 |         eval_path = eval_path[-1]
33 |         with open(eval_path) as f:
34 |             ret = json.load(f)
35 |             tr_time.append(ret['elapsed'] / 1000)
36 |             n_voxels.append(ret['n_voxels'])
37 |     else:
38 |         tr_time.append(0)
39 |         n_voxels.append(0)
40 | 
41 |     eval_path = sorted(glob.glob(f'{args.result_root}/{scene}/train/*.txt'))
42 |     if len(eval_path):
43 |         eval_path = eval_path[-1]
44 |         with open(eval_path) as f:
45 |             fps.append(float([line.strip().split('=')[1] for line in f if line.startswith('fps')][-1]))
46 |     else:
47 |         fps.append(0)
48 | 
49 |     eval_path = f'{args.result_root}/{scene}/mesh/latest/mesh_dense_cleaned_for_eval.ply.json'
50 |     if os.path.isfile(eval_path):
51 |         with open(eval_path) as f:
52 |             ret = json.load(f)
53 |             cf.append(ret['overall'])
54 |     else:
55 |         cf.append(10)
56 | 
57 | 
58 | 
59 | def format_df_string(df):
60 |     df = df.copy()
61 |     df['scene'] = df['scene'].map(lambda s: s.rjust(15))
62 |     df['cf-dist'] = df['cf-dist'].round(2)
63 |     df['tr-mins'] = (df['tr-mins'] / 60).round(1)
64 |     df['fps'] = df['fps'].round(1)
65 |     df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1)
66 |     return df.to_string(index=False)
67 | 
68 | def add_avg_row(df):
69 |     df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose()
70 |     df_avg['scene'] = 'AVG'
71 |     return pd.concat([df, df_avg], ignore_index=True)
72 | 
73 | df = pd.DataFrame({
74 |     'scene': scenes,
75 |     'cf-dist': cf,
76 |     'tr-mins': tr_time,
77 |     'fps': fps,
78 |     '#vox(M)': n_voxels,
79 | })
80 | df = add_avg_row(df)
81 | 
82 | print(format_df_string(df))
83 | 
84 | 


--------------------------------------------------------------------------------
/scripts/eval_tnt/README.md:
--------------------------------------------------------------------------------
 1 | # Python Toolbox for Evaluation
 2 | 
 3 | This Python script evaluates **training** dataset of TanksAndTemples benchmark.
 4 | The script requires ``Open3D`` and a few Python packages such as ``matplotlib``, ``json``, and ``numpy``.
 5 | 
 6 | ## How to use:
 7 | **Step 0**. Reconstruct 3D models and recover camera poses from the training dataset.
 8 | The raw videos of the training dataset can be found from:
 9 | https://tanksandtemples.org/download/
10 | 
11 | **Step 1**. Download evaluation data (ground truth geometry + reference reconstruction) using
12 | [this link](https://drive.google.com/open?id=1UoKPiUUsKa0AVHFOrnMRhc5hFngjkE-t). In this example, we regard ``TanksAndTemples/evaluation/data/`` as a dataset folder.
13 | 
14 | **Step 2**. Install Open3D. Follow instructions in http://open3d.org/docs/getting_started.html
15 | 
16 | **Step 3**. Run the evaluation script and grab some coffee.
17 | ```
18 | # firstly, run cull_mesh.py to cull mesh and then 
19 | ./run.sh Barn
20 | ```
21 | Output (evaluation of Ignatius):
22 | ```
23 | ===========================
24 | Evaluating Ignatius
25 | ===========================
26 | path/to/TanksAndTemples/evaluation/data/Ignatius/Ignatius_COLMAP.ply
27 | Reading PLY: [========================================] 100%
28 | Read PointCloud: 6929586 vertices.
29 | path/to/TanksAndTemples/evaluation/data/Ignatius/Ignatius.ply
30 | Reading PLY: [========================================] 100%
31 | :
32 | ICP Iteration #0: Fitness 0.9980, RMSE 0.0044
33 | ICP Iteration #1: Fitness 0.9980, RMSE 0.0043
34 | ICP Iteration #2: Fitness 0.9980, RMSE 0.0043
35 | ICP Iteration #3: Fitness 0.9980, RMSE 0.0043
36 | ICP Iteration #4: Fitness 0.9980, RMSE 0.0042
37 | ICP Iteration #5: Fitness 0.9980, RMSE 0.0042
38 | ICP Iteration #6: Fitness 0.9979, RMSE 0.0042
39 | ICP Iteration #7: Fitness 0.9979, RMSE 0.0042
40 | ICP Iteration #8: Fitness 0.9979, RMSE 0.0042
41 | ICP Iteration #9: Fitness 0.9979, RMSE 0.0042
42 | ICP Iteration #10: Fitness 0.9979, RMSE 0.0042
43 | [EvaluateHisto]
44 | Cropping geometry: [========================================] 100%
45 | Pointcloud down sampled from 6929586 points to 1449840 points.
46 | Pointcloud down sampled from 1449840 points to 1365628 points.
47 | path/to/TanksAndTemples/evaluation/data/Ignatius/evaluation//Ignatius.precision.ply
48 | Cropping geometry: [========================================] 100%
49 | Pointcloud down sampled from 5016769 points to 4957123 points.
50 | Pointcloud down sampled from 4957123 points to 4181506 points.
51 | [compute_point_cloud_to_point_cloud_distance]
52 | [compute_point_cloud_to_point_cloud_distance]
53 | :
54 | [ViewDistances] Add color coding to visualize error
55 | [ViewDistances] Add color coding to visualize error
56 | :
57 | [get_f1_score_histo2]
58 | ==============================
59 | evaluation result : Ignatius
60 | ==============================
61 | distance tau : 0.003
62 | precision : 0.7679
63 | recall : 0.7937
64 | f-score : 0.7806
65 | ==============================
66 | ```
67 | 
68 | **Step 5**. Go to the evaluation folder. ``TanksAndTemples/evaluation/data/Ignatius/evaluation/`` will have the following outputs.
69 | 
70 | <img src="images/f-score.jpg" width="400">
71 | 
72 | ``PR_Ignatius_@d_th_0_0030.pdf`` (Precision and recall curves with a F-score)
73 | 
74 | | <img src="images/precision.jpg" width="200"> | <img src="images/recall.jpg" width="200"> |
75 | |--|--|
76 | | ``Ignatius.precision.ply``  | ``Ignatius.recall.ply`` |
77 | 
78 | (3D visualization of precision and recall. Each mesh is color coded using hot colormap)
79 | 
80 | # Requirements
81 | 
82 | - Python 3
83 | - open3d v0.9.0
84 | - matplotlib
85 | 


--------------------------------------------------------------------------------
/scripts/eval_tnt/config.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------
 2 | # -                   TanksAndTemples Website Toolbox                        -
 3 | # -                    http://www.tanksandtemples.org                        -
 4 | # ----------------------------------------------------------------------------
 5 | # The MIT License (MIT)
 6 | #
 7 | # Copyright (c) 2017
 8 | # Arno Knapitsch <arno.knapitsch@gmail.com >
 9 | # Jaesik Park <syncle@gmail.com>
10 | # Qian-Yi Zhou <Qianyi.Zhou@gmail.com>
11 | # Vladlen Koltun <vkoltun@gmail.com>
12 | #
13 | # Permission is hereby granted, free of charge, to any person obtaining a copy
14 | # of this software and associated documentation files (the "Software"), to deal
15 | # in the Software without restriction, including without limitation the rights
16 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 | # copies of the Software, and to permit persons to whom the Software is
18 | # furnished to do so, subject to the following conditions:
19 | #
20 | # The above copyright notice and this permission notice shall be included in
21 | # all copies or substantial portions of the Software.
22 | #
23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 | # THE SOFTWARE.
30 | # ----------------------------------------------------------------------------
31 | 
32 | # some global parameters - do not modify
33 | scenes_tau_dict = {
34 |     "Barn": 0.01,
35 |     "Caterpillar": 0.005,
36 |     "Church": 0.025,
37 |     "Courthouse": 0.025,
38 |     "Ignatius": 0.003,
39 |     "Meetingroom": 0.01,
40 |     "Truck": 0.005,
41 | }
42 | 


--------------------------------------------------------------------------------
/scripts/eval_tnt/evaluate_single_scene.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import cv2
 5 | import numpy as np
 6 | import os
 7 | import glob
 8 | from skimage.morphology import binary_dilation, disk
 9 | import argparse
10 | 
11 | import trimesh
12 | from pathlib import Path
13 | import subprocess
14 | import sys
15 | import json
16 | 
17 | 
18 | if __name__ == "__main__":
19 | 
20 |     parser = argparse.ArgumentParser(
21 |         description='Arguments to evaluate the mesh.'
22 |     )
23 | 
24 |     parser.add_argument('--input_mesh', type=str,  help='path to the mesh to be evaluated')
25 |     parser.add_argument('--scene', type=str,  help='scan id of the input mesh')
26 |     parser.add_argument('--output_dir', type=str, default='evaluation_results_single', help='path to the output folder')
27 |     parser.add_argument('--TNT', type=str,  default='Offical_DTU_Dataset', help='path to the GT DTU point clouds')
28 |     args = parser.parse_args()
29 | 
30 | 
31 |     TNT_Dataset = args.TNT
32 |     out_dir = args.output_dir
33 |     Path(out_dir).mkdir(parents=True, exist_ok=True)
34 |     scene = args.scene
35 |     ply_file = args.input_mesh
36 |     result_mesh_file = os.path.join(out_dir, "culled_mesh.ply")
37 |     # read scene.json
38 |     f"python run.py --dataset-dir {ply_file} --traj-path {TNT_Dataset}/{scene}/{scene}_COLMAP_SfM.log --ply-path {TNT_Dataset}/{scene}/{scene}_COLMAP.ply"


--------------------------------------------------------------------------------
/scripts/eval_tnt/help_func.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | import torch
 4 | 
 5 | def rotation_matrix(a, b):
 6 |     """Compute the rotation matrix that rotates vector a to vector b.
 7 | 
 8 |     Args:
 9 |         a: The vector to rotate.
10 |         b: The vector to rotate to.
11 |     Returns:
12 |         The rotation matrix.
13 |     """
14 |     a = a / torch.linalg.norm(a)
15 |     b = b / torch.linalg.norm(b)
16 |     v = torch.cross(a, b)
17 |     c = torch.dot(a, b)
18 |     # If vectors are exactly opposite, we add a little noise to one of them
19 |     if c < -1 + 1e-8:
20 |         eps = (torch.rand(3) - 0.5) * 0.01
21 |         return rotation_matrix(a + eps, b)
22 |     s = torch.linalg.norm(v)
23 |     skew_sym_mat = torch.Tensor(
24 |         [
25 |             [0, -v[2], v[1]],
26 |             [v[2], 0, -v[0]],
27 |             [-v[1], v[0], 0],
28 |         ]
29 |     )
30 |     return torch.eye(3) + skew_sym_mat + skew_sym_mat @ skew_sym_mat * ((1 - c) / (s**2 + 1e-8))
31 | 
32 | 
33 | def auto_orient_and_center_poses(
34 |     poses, method="up", center_poses=True
35 | ):
36 |     """Orients and centers the poses. We provide two methods for orientation: pca and up.
37 | 
38 |     pca: Orient the poses so that the principal component of the points is aligned with the axes.
39 |         This method works well when all of the cameras are in the same plane.
40 |     up: Orient the poses so that the average up vector is aligned with the z axis.
41 |         This method works well when images are not at arbitrary angles.
42 | 
43 | 
44 |     Args:
45 |         poses: The poses to orient.
46 |         method: The method to use for orientation.
47 |         center_poses: If True, the poses are centered around the origin.
48 | 
49 |     Returns:
50 |         The oriented poses.
51 |     """
52 | 
53 |     translation = poses[..., :3, 3]
54 | 
55 |     mean_translation = torch.mean(translation, dim=0)
56 |     translation_diff = translation - mean_translation
57 | 
58 |     if center_poses:
59 |         translation = mean_translation
60 |     else:
61 |         translation = torch.zeros_like(mean_translation)
62 | 
63 |     if method == "pca":
64 |         _, eigvec = torch.linalg.eigh(translation_diff.T @ translation_diff)
65 |         eigvec = torch.flip(eigvec, dims=(-1,))
66 | 
67 |         if torch.linalg.det(eigvec) < 0:
68 |             eigvec[:, 2] = -eigvec[:, 2]
69 | 
70 |         transform = torch.cat([eigvec, eigvec @ -translation[..., None]], dim=-1)
71 |         oriented_poses = transform @ poses
72 | 
73 |         if oriented_poses.mean(axis=0)[2, 1] < 0:
74 |             oriented_poses[:, 1:3] = -1 * oriented_poses[:, 1:3]
75 |     elif method == "up":
76 |         up = torch.mean(poses[:, :3, 1], dim=0)
77 |         up = up / torch.linalg.norm(up)
78 | 
79 |         rotation = rotation_matrix(up, torch.Tensor([0, 0, 1]))
80 |         transform = torch.cat([rotation, rotation @ -translation[..., None]], dim=-1)
81 |         oriented_poses = transform @ poses
82 |     elif method == "none":
83 |         transform = torch.eye(4)
84 |         transform[:3, 3] = -translation
85 |         transform = transform[:3, :]
86 |         oriented_poses = transform @ poses
87 | 
88 |     return oriented_poses, transform
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/scripts/eval_tnt/plot.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # -                   TanksAndTemples Website Toolbox                        -
  3 | # -                    http://www.tanksandtemples.org                        -
  4 | # ----------------------------------------------------------------------------
  5 | # The MIT License (MIT)
  6 | #
  7 | # Copyright (c) 2017
  8 | # Arno Knapitsch <arno.knapitsch@gmail.com >
  9 | # Jaesik Park <syncle@gmail.com>
 10 | # Qian-Yi Zhou <Qianyi.Zhou@gmail.com>
 11 | # Vladlen Koltun <vkoltun@gmail.com>
 12 | #
 13 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 14 | # of this software and associated documentation files (the "Software"), to deal
 15 | # in the Software without restriction, including without limitation the rights
 16 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 17 | # copies of the Software, and to permit persons to whom the Software is
 18 | # furnished to do so, subject to the following conditions:
 19 | #
 20 | # The above copyright notice and this permission notice shall be included in
 21 | # all copies or substantial portions of the Software.
 22 | #
 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 28 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 29 | # THE SOFTWARE.
 30 | # ----------------------------------------------------------------------------
 31 | #
 32 | # This python script is for downloading dataset from www.tanksandtemples.org
 33 | # The dataset has a different license, please refer to
 34 | # https://tanksandtemples.org/license/
 35 | 
 36 | import matplotlib.pyplot as plt
 37 | from cycler import cycler
 38 | 
 39 | 
 40 | def plot_graph(
 41 |     scene,
 42 |     fscore,
 43 |     dist_threshold,
 44 |     edges_source,
 45 |     cum_source,
 46 |     edges_target,
 47 |     cum_target,
 48 |     plot_stretch,
 49 |     mvs_outpath,
 50 |     show_figure=False,
 51 | ):
 52 |     f = plt.figure()
 53 |     plt_size = [14, 7]
 54 |     pfontsize = "medium"
 55 | 
 56 |     ax = plt.subplot(111)
 57 |     label_str = "precision"
 58 |     ax.plot(
 59 |         edges_source[1::],
 60 |         cum_source * 100,
 61 |         c="red",
 62 |         label=label_str,
 63 |         linewidth=2.0,
 64 |     )
 65 | 
 66 |     label_str = "recall"
 67 |     ax.plot(
 68 |         edges_target[1::],
 69 |         cum_target * 100,
 70 |         c="blue",
 71 |         label=label_str,
 72 |         linewidth=2.0,
 73 |     )
 74 | 
 75 |     ax.grid(True)
 76 |     plt.rcParams["figure.figsize"] = plt_size
 77 |     plt.rc("axes", prop_cycle=cycler("color", ["r", "g", "b", "y"]))
 78 |     plt.title("Precision and Recall: " + scene + ", " + "%02.2f f-score" %
 79 |               (fscore * 100))
 80 |     plt.axvline(x=dist_threshold, c="black", ls="dashed", linewidth=2.0)
 81 | 
 82 |     plt.ylabel("# of points (%)", fontsize=15)
 83 |     plt.xlabel("Meters", fontsize=15)
 84 |     plt.axis([0, dist_threshold * plot_stretch, 0, 100])
 85 |     ax.legend(shadow=True, fancybox=True, fontsize=pfontsize)
 86 |     # plt.axis([0, dist_threshold*plot_stretch, 0, 100])
 87 | 
 88 |     plt.setp(ax.get_legend().get_texts(), fontsize=pfontsize)
 89 | 
 90 |     plt.legend(loc=2, borderaxespad=0.0, fontsize=pfontsize)
 91 |     plt.legend(loc=4)
 92 |     leg = plt.legend(loc="lower right")
 93 | 
 94 |     box = ax.get_position()
 95 |     ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
 96 | 
 97 |     # Put a legend to the right of the current axis
 98 |     ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
 99 |     plt.setp(ax.get_legend().get_texts(), fontsize=pfontsize)
100 |     png_name = mvs_outpath + "/PR_{0}_@d_th_0_{1}.png".format(
101 |         scene, "%04d" % (dist_threshold * 10000))
102 |     pdf_name = mvs_outpath + "/PR_{0}_@d_th_0_{1}.pdf".format(
103 |         scene, "%04d" % (dist_threshold * 10000))
104 | 
105 |     # save figure and display
106 |     f.savefig(png_name, format="png", bbox_inches="tight")
107 |     f.savefig(pdf_name, format="pdf", bbox_inches="tight")
108 |     if show_figure:
109 |         plt.show()
110 | 


--------------------------------------------------------------------------------
/scripts/eval_tnt/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib>=1.3
2 | open3d==0.10
3 | 


--------------------------------------------------------------------------------
/scripts/eval_tnt/trajectory_io.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import open3d as o3d
 3 | 
 4 | 
 5 | class CameraPose:
 6 | 
 7 |     def __init__(self, meta, mat):
 8 |         self.metadata = meta
 9 |         self.pose = mat
10 | 
11 |     def __str__(self):
12 |         return ("Metadata : " + " ".join(map(str, self.metadata)) + "\n" +
13 |                 "Pose : " + "\n" + np.array_str(self.pose))
14 | 
15 | 
16 | def convert_trajectory_to_pointcloud(traj):
17 |     pcd = o3d.geometry.PointCloud()
18 |     for t in traj:
19 |         pcd.points.append(t.pose[:3, 3])
20 |     return pcd
21 | 
22 | 
23 | def read_trajectory(filename):
24 |     traj = []
25 |     with open(filename, "r") as f:
26 |         metastr = f.readline()
27 |         while metastr:
28 |             metadata = map(int, metastr.split())
29 |             mat = np.zeros(shape=(4, 4))
30 |             for i in range(4):
31 |                 matstr = f.readline()
32 |                 mat[i, :] = np.fromstring(matstr, dtype=float, sep=" \t")
33 |             traj.append(CameraPose(metadata, mat))
34 |             metastr = f.readline()
35 |     return traj
36 | 
37 | 
38 | def write_trajectory(traj, filename):
39 |     with open(filename, "w") as f:
40 |         for x in traj:
41 |             p = x.pose.tolist()
42 |             f.write(" ".join(map(str, x.metadata)) + "\n")
43 |             f.write("\n".join(
44 |                 " ".join(map("{0:.12f}".format, p[i])) for i in range(4)))
45 |             f.write("\n")
46 | 


--------------------------------------------------------------------------------
/scripts/eval_tnt/util.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | def make_dir(path):
5 |     if not os.path.exists(path):
6 |         os.makedirs(path)
7 | 


--------------------------------------------------------------------------------
/scripts/mipnerf360_run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | DATA_ROOT=data/360_v2
10 | 
11 | lanuch_exp() {
12 |     local scene_name="$1"
13 |     shift
14 |     local output_dir="$1"
15 |     shift
16 |     local exp_args="$*"
17 | 
18 |     python train.py --cfg_files cfg/mipnerf360.yaml --source_path $DATA_ROOT/$scene_name/ --model_path $output_dir/$scene_name $exp_args
19 |     python render.py $output_dir/$scene_name --skip_train --eval_fps
20 |     python render.py $output_dir/$scene_name --skip_train
21 |     python eval.py $output_dir/$scene_name/
22 |     python render_fly_through.py $output_dir/$scene_name
23 |     rm -r $output_dir/$scene_name/checkpoints/
24 | }
25 | 
26 | 
27 | for scene in bonsai counter kitchen room
28 | do
29 |    echo "============ start " $scene " ============"
30 |    lanuch_exp $scene $1 --image_dir_name images_2 "${@:2}"
31 |    echo "============ end " $scene " ============"
32 | done
33 | 
34 | for scene in bicycle garden stump treehill flowers
35 | do
36 |     echo "============ start " $scene " ============"
37 |     lanuch_exp $scene $1 --image_dir_name images_4 "${@:2}"
38 |     echo "============ end " $scene " ============"
39 | done
40 | 


--------------------------------------------------------------------------------
/scripts/mipnerf360_stat.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import subprocess
 11 | import json
 12 | from argparse import ArgumentParser
 13 | import glob
 14 | import pandas as pd
 15 | 
 16 | parser = ArgumentParser(description="Training script parameters")
 17 | parser.add_argument('result_root')
 18 | parser.add_argument('--suffix', default='')
 19 | args = parser.parse_args()
 20 | 
 21 | indoor_scenes = ['bonsai', 'counter', 'kitchen', 'room']
 22 | outdoor_scenes = ['bicycle', 'garden', 'stump', 'treehill', 'flowers']
 23 | scenes = indoor_scenes + outdoor_scenes
 24 | 
 25 | indoor_psnr = []
 26 | indoor_ssim = []
 27 | indoor_lpips = []
 28 | indoor_lpips_corr = []
 29 | indoor_tr_time = []
 30 | indoor_fps = []
 31 | indoor_n_voxels = []
 32 | for scene in indoor_scenes:
 33 |     path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json'))
 34 |     if len(path) == 0:
 35 |         print(f'{scene:10s}: failed !!??')
 36 |         continue
 37 |     path = sorted(path)[-1]
 38 |     with open(path) as f:
 39 |         ret = json.load(f)
 40 |         tmp_psnr = ret['psnr']
 41 |         indoor_tr_time.append(ret['elapsed'] / 1000)
 42 |         # indoor_fps.append(ret['fps'])
 43 |         indoor_n_voxels.append(ret['n_voxels'])
 44 |     n_iter = int(os.path.split(path)[1].replace('iter', '').replace('.json', ''))
 45 |     fps_path = f'{args.result_root}/{scene}/test/ours_{n_iter}{args.suffix}.txt'
 46 |     with open(fps_path) as f:
 47 |         fps = float(f.read().strip().split()[-1].split('=')[1])
 48 |         indoor_fps.append(fps)
 49 |     eval_path = f'{args.result_root}/{scene}/results.json'
 50 |     if os.path.exists(eval_path):
 51 |         with open(os.path.join(eval_path)) as f:
 52 |             ret = json.load(f)
 53 |             ret = ret[f"ours_{n_iter}{args.suffix}"]
 54 |             indoor_psnr.append(ret['PSNR'])
 55 |             indoor_ssim.append(ret['SSIM'])
 56 |             indoor_lpips.append(ret['LPIPS'])
 57 |             indoor_lpips_corr.append(ret.get('LPIPS-corrected', 1))
 58 |     else:
 59 |         indoor_psnr.append(tmp_psnr)
 60 |         indoor_ssim.append(0)
 61 |         indoor_lpips.append(0)
 62 |         indoor_lpips_corr.append(1)
 63 | 
 64 | outdoor_psnr = []
 65 | outdoor_ssim = []
 66 | outdoor_lpips = []
 67 | outdoor_lpips_corr = []
 68 | outdoor_tr_time = []
 69 | outdoor_fps = []
 70 | outdoor_n_voxels = []
 71 | for scene in outdoor_scenes:
 72 |     path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json'))
 73 |     if len(path) == 0:
 74 |         print(f'{scene:10s}: failed !!??')
 75 |         continue
 76 |     path = sorted(path)[-1]
 77 |     with open(path) as f:
 78 |         ret = json.load(f)
 79 |         tmp_psnr = ret['psnr']
 80 |         outdoor_tr_time.append(ret['elapsed'] / 1000)
 81 |         # outdoor_fps.append(ret['fps'])
 82 |         outdoor_n_voxels.append(ret['n_voxels'])
 83 |     n_iter = int(os.path.split(path)[1].replace('iter', '').replace('.json', ''))
 84 |     fps_path = f'{args.result_root}/{scene}/test/ours_{n_iter}{args.suffix}.txt'
 85 |     with open(fps_path) as f:
 86 |         fps = float(f.read().strip().split()[-1].split('=')[1])
 87 |         outdoor_fps.append(fps)
 88 |     eval_path = f'{args.result_root}/{scene}/results.json'
 89 |     if os.path.exists(eval_path):
 90 |         with open(os.path.join(eval_path)) as f:
 91 |             ret = json.load(f)
 92 |             ret = ret[f"ours_{n_iter}{args.suffix}"]
 93 |             outdoor_psnr.append(ret['PSNR'])
 94 |             outdoor_ssim.append(ret['SSIM'])
 95 |             outdoor_lpips.append(ret['LPIPS'])
 96 |             outdoor_lpips_corr.append(ret.get('LPIPS-corrected', 1))
 97 |     else:
 98 |         outdoor_psnr.append(tmp_psnr)
 99 |         outdoor_ssim.append(0)
100 |         outdoor_lpips.append(0)
101 |         outdoor_lpips_corr.append(1)
102 | 
103 | 
104 | 
105 | def format_df_string(df):
106 |     df = df.copy()
107 |     df['scene'] = df['scene'].map(lambda s: s.rjust(15))
108 |     df['psnr'] = df['psnr'].round(2)
109 |     df['ssim'] = df['ssim'].round(3)
110 |     df['lpips'] = df['lpips'].round(3)
111 |     df['lpips*'] = df['lpips*'].round(3)
112 |     df['tr-mins'] = (df['tr-mins'] / 60).round(1)
113 |     df['fps'] = df['fps'].round(1)
114 |     df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1)
115 |     return df.to_string(index=False)
116 | 
117 | def add_avg_row(df):
118 |     df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose()
119 |     df_avg['scene'] = 'AVG'
120 |     return pd.concat([df, df_avg], ignore_index=True)
121 | 
122 | df_indoor = pd.DataFrame({
123 |     'scene': indoor_scenes,
124 |     'psnr': indoor_psnr,
125 |     'ssim': indoor_ssim,
126 |     'lpips': indoor_lpips,
127 |     'lpips*': indoor_lpips_corr,
128 |     'tr-mins': indoor_tr_time,
129 |     'fps': indoor_fps,
130 |     '#vox(M)': indoor_n_voxels,
131 | })
132 | 
133 | df_outdoor = pd.DataFrame({
134 |     'scene': outdoor_scenes,
135 |     'psnr': outdoor_psnr,
136 |     'ssim': outdoor_ssim,
137 |     'lpips': outdoor_lpips,
138 |     'lpips*': outdoor_lpips_corr,
139 |     'tr-mins': outdoor_tr_time,
140 |     'fps': outdoor_fps,
141 |     '#vox(M)': outdoor_n_voxels,
142 | })
143 | 
144 | df = pd.concat([df_indoor, df_outdoor], ignore_index=True)
145 | 
146 | df_indoor = add_avg_row(df_indoor)
147 | df_outdoor = add_avg_row(df_outdoor)
148 | df = add_avg_row(df)
149 | 
150 | print(format_df_string(df_indoor))
151 | print()
152 | print(format_df_string(df_outdoor))
153 | print()
154 | print(format_df_string(df))
155 | 
156 | 


--------------------------------------------------------------------------------
/scripts/scannetpp_preproc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import math
 3 | import json
 4 | import argparse
 5 | from tqdm import tqdm
 6 | 
 7 | 
 8 | def focal2fov(focal, pixels):
 9 |     return 2*math.atan(pixels/(2*focal))
10 | 
11 | 
12 | if __name__ == '__main__':
13 | 
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--indir', default='data/scannetpp_v2/data')
16 |     parser.add_argument('--split_dir', default='data/scannetpp_v2/splits')
17 | 
18 |     parser.add_argument('--splits', default=[], nargs='*')
19 | 
20 |     parser.add_argument('--ids', default=[], nargs='*')
21 |     # parser.add_argument('--ids', default=['08bbbdcc3d'], nargs='*')
22 |     # parser.add_argument('--ids', default=['7b6477cb95', 'c50d2d1d42', 'cc5237fd77', 'acd95847c5', 'fb5a96b1a2', 'a24f64f7fb', '1ada7a0617', '5eb31827b7', '3e8bba0176', '3f15a9266d', '21d970d8de', '5748ce6f01', 'c4c04e6d6c', '7831862f02', 'bde1e479ad', '38d58a7a31', '5ee7c22ba0', 'f9f95681fd', '3864514494', '40aec5fffa', '13c3e046d7', 'e398684d27', 'a8bf42d646', '45b0dac5e3', '31a2c91c43', 'e7af285f7d', '286b55a2bf', '7bc286c1b6', 'f3685d06a9', 'b0a08200c9', '825d228aec', 'a980334473', 'f2dc06b1d2', '5942004064', '25f3b7a318', 'bcd2436daf', 'f3d64c30f8', '0d2ee665be', '3db0a1c8f3', 'ac48a9b736', 'c5439f4607', '578511c8a9', 'd755b3d9d8', '99fa5c25e1', '09c1414f1b', '5f99900f09', '9071e139d9', '6115eddb86', '27dd4da69e', 'c49a8c6cff'], nargs='*')
23 | 
24 |     parser.add_argument('--is_test_hidden', default=False, action='store_true')
25 |     # parser.add_argument('--ids', default=['ca0e09014e', 'beb802368c', 'ebff4de90b', 'd228e2d9dd', '9e019d8be1', '11b696efba', '471cc4ba84', 'f20e7b5640', 'dfe9cbd72a', 'ccdc33dc2a', '124974734e', 'c0cbb1fea1', '047fb766c4', '7b37cccb03', '8283161f1b', 'c3e279be54', '5a14f9da39', 'cd7973d92b', '5298ec174f', 'e0e83b4ca3', '64ea6b73c2', 'f00bd5fa8a', '02a980c994', 'be91f7884d', '1c876c250f', '15155a88fb', '633f9a9f06', 'd6419f6478', 'f0b0a42ba3', 'a46b21d949', '74ff105c0d', '77596f5d2a', 'ecb5d01065', 'c9bf4c8b62', 'b074ca565a', '49c758655e', 'd4d2019f5d', '319787e6ec', '84b48f2614', 'bee11d6a41', '9a9e32c768', '9b365a9b68', '54e7ffaea3', '7d72f01865', '252652d5ba', '651dc6b4f1', '03f7a0e617', 'fe94fc30cf', 'd1b9dff904', '4bc04e0cde'], nargs='*')
26 |     args = parser.parse_args()
27 | 
28 |     if len(args.splits) > 0:
29 |         args.ids = []
30 |         for split in args.splits:
31 |             with open(os.path.join(args.split_dir, f"{split}.txt")) as f:
32 |                 args.ids.extend(f.read().strip().split())
33 |         print(args.ids)
34 | 
35 |     for scene_id in tqdm(args.ids):
36 |         in_scene_dir = os.path.join(args.indir, scene_id, 'dslr')
37 |         out_scene_dir = os.path.join(in_scene_dir, 'svraster_inputs')
38 | 
39 |         os.system(f'mkdir -p {out_scene_dir}')
40 | 
41 |         with open(os.path.join(in_scene_dir, 'nerfstudio', 'transforms_undistorted.json')) as f:
42 |             meta = json.load(f)
43 | 
44 |         cx_p = meta['cx'] / meta['w']
45 |         cy_p = meta['cy'] / meta['h']
46 |         camera_angle_x = focal2fov(meta['fl_x'], meta['w'])
47 |         camera_angle_y = focal2fov(meta['fl_y'], meta['h'])
48 | 
49 |         new_metas_lst = []
50 |         for key in ['frames', 'test_frames']:
51 |             new_metas_lst.append(dict(
52 |                 camera_angle_x=0,
53 |                 colmap={
54 |                     'path': '../colmap',
55 |                     'transform': [
56 |                         [0, 1, 0],
57 |                         [1, 0, 0],
58 |                         [0, 0, -1],
59 |                     ],
60 |                 },
61 |                 frames=[]))
62 |             for frame in meta[key]:
63 |                 new_metas_lst[-1]['frames'].append({
64 |                     'camera_angle_x': camera_angle_x,
65 |                     'camera_angle_y': camera_angle_y,
66 |                     'cx_p': cx_p,
67 |                     'cy_p': cy_p,
68 |                     'file_path': f"../undistorted_images/{frame['file_path']}",
69 |                     'depth_path': f"../undistorted_depths/{frame['file_path'].replace('.JPG', '.png')}",
70 |                     'transform_matrix': frame['transform_matrix'],
71 |                     'is_bad': frame['is_bad'],
72 |                     'heldout': args.is_test_hidden and (key == 'test_frames'),
73 |                     'w': meta['w'],
74 |                     'h': meta['h'],
75 |                 })
76 | 
77 |         new_train_meta, new_test_meta = new_metas_lst
78 | 
79 |         with open(os.path.join(out_scene_dir, 'transforms_train.json'), 'w') as f:
80 |             json.dump(new_train_meta, f, indent=2)
81 |         with open(os.path.join(out_scene_dir, 'transforms_test.json'), 'w') as f:
82 |             json.dump(new_test_meta, f, indent=2)
83 | 


--------------------------------------------------------------------------------
/scripts/scannetpp_run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | DATA_ROOT=data/scannetpp_nvs
10 | 
11 | lanuch_exp() {
12 |     local scene_name="$1"
13 |     shift
14 |     local output_dir="$1"
15 |     shift
16 |     local exp_args="$*"
17 | 
18 |     python train.py --source_path $DATA_ROOT/$scene_name --model_path $output_dir/$scene_name $exp_args
19 |     python render.py $output_dir/$scene_name --skip_train --eval_fps
20 |     python render.py $output_dir/$scene_name --skip_train
21 |     python eval.py $output_dir/$scene_name
22 |     python render_fly_through.py $output_dir/$scene_name
23 |     rm -r $output_dir/$scene_name/checkpoints/
24 | }
25 | 
26 | ulimit -n 4096  # Increase maximum number of files the script can read
27 | 
28 | for scene in 39f36da05b 5a269ba6fe dc263dfbf0 08bbbdcc3d
29 | do
30 |     echo "============ start " $scene " ============"
31 |     if [ ! -f $1/$scene/results.json ]; then
32 |         # We use the source image resolution and prevent automatic downsampling.
33 |         lanuch_exp $scene $1 --res_downscale 1.0 --cfg_files cfg/scannetpp.yaml "${@:2}"
34 |     fi
35 |     echo "============ end " $scene " ============"
36 | done
37 | 


--------------------------------------------------------------------------------
/scripts/scannetpp_stat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import os
10 | import subprocess
11 | import json
12 | from argparse import ArgumentParser
13 | import glob
14 | import pandas as pd
15 | 
16 | parser = ArgumentParser(description="Training script parameters")
17 | parser.add_argument('result_root')
18 | parser.add_argument('--suffix', default='_r1.0')
19 | args = parser.parse_args()
20 | 
21 | indoor_scenes = ['39f36da05b', '5a269ba6fe', 'dc263dfbf0', '08bbbdcc3d']
22 | 
23 | indoor_psnr = []
24 | indoor_ssim = []
25 | indoor_lpips = []
26 | indoor_lpips_corr = []
27 | indoor_tr_time = []
28 | indoor_fps = []
29 | indoor_n_voxels = []
30 | for scene in indoor_scenes:
31 |     path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json'))
32 |     if len(path) == 0:
33 |         print(f'{scene:10s}: failed !!??')
34 |         continue
35 |     path = sorted(path)[-1]
36 |     with open(path) as f:
37 |         ret = json.load(f)
38 |         tmp_psnr = ret['psnr']
39 |         indoor_tr_time.append(ret['elapsed'] / 1000)
40 |         indoor_fps.append(ret['fps'])
41 |         indoor_n_voxels.append(ret['n_voxels'])
42 |     n_iter = int(os.path.split(path)[1].replace('iter', '').replace('.json', ''))
43 |     fps_path = f'{args.result_root}/{scene}/test/ours_{n_iter}{args.suffix}.txt'
44 |     with open(fps_path) as f:
45 |         fps = float(f.read().strip().split()[-1].split('=')[1])
46 |         #indoor_fps.append(fps)
47 |     eval_path = f'{args.result_root}/{scene}/results.json'
48 |     if os.path.exists(eval_path):
49 |         with open(os.path.join(eval_path)) as f:
50 |             ret = json.load(f)
51 |             ret = ret[f"ours_{n_iter}{args.suffix}"]
52 |             indoor_psnr.append(ret['PSNR'])
53 |             indoor_ssim.append(ret['SSIM'])
54 |             indoor_lpips.append(ret['LPIPS'])
55 |             indoor_lpips_corr.append(ret.get('LPIPS-corrected', 1))
56 |     else:
57 |         indoor_psnr.append(tmp_psnr)
58 |         indoor_ssim.append(0)
59 |         indoor_lpips.append(0)
60 |         indoor_lpips_corr.append(1)
61 | 
62 | 
63 | def format_df_string(df):
64 |     df = df.copy()
65 |     df['scene'] = df['scene'].map(lambda s: s.rjust(15))
66 |     df['psnr'] = df['psnr'].round(2)
67 |     df['ssim'] = df['ssim'].round(3)
68 |     df['lpips'] = df['lpips'].round(3)
69 |     df['lpips*'] = df['lpips*'].round(3)
70 |     df['tr-mins'] = (df['tr-mins'] / 60).round(1)
71 |     df['fps'] = df['fps'].round(1)
72 |     df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1)
73 |     return df.to_string(index=False)
74 | 
75 | def add_avg_row(df):
76 |     df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose()
77 |     df_avg['scene'] = 'AVG'
78 |     return pd.concat([df, df_avg], ignore_index=True)
79 | 
80 | df = pd.DataFrame({
81 |     'scene': indoor_scenes,
82 |     'psnr': indoor_psnr,
83 |     'ssim': indoor_ssim,
84 |     'lpips': indoor_lpips,
85 |     'lpips*': indoor_lpips_corr,
86 |     'tr-mins': indoor_tr_time,
87 |     'fps': indoor_fps,
88 |     '#vox(M)': indoor_n_voxels,
89 | })
90 | df = add_avg_row(df)
91 | 
92 | print(format_df_string(df))
93 | 


--------------------------------------------------------------------------------
/scripts/synthetic_nerf_run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | DATA_ROOT=data/nerf_synthetic
10 | 
11 | lanuch_exp() {
12 |     local scene_name="$1"
13 |     shift
14 |     local output_dir="$1"
15 |     shift
16 |     local exp_args="$*"
17 | 
18 |     python train.py --cfg_files cfg/synthetic_nerf.yaml --source_path $DATA_ROOT/$scene_name --model_path $output_dir/$scene_name $exp_args
19 |     python render.py $output_dir/$scene_name --skip_train --eval_fps
20 |     python render.py $output_dir/$scene_name --skip_train
21 |     python eval.py $output_dir/$scene_name/
22 |     python render_fly_through.py $output_dir/$scene_name/
23 |     rm -r $output_dir/$scene_name/checkpoints/
24 | }
25 | 
26 | 
27 | for scene in chair drums ficus hotdog lego materials mic ship
28 | do
29 |    echo "============ start " $scene " ============"
30 |    lanuch_exp $scene $1 "${@:2}"
31 |    echo "============ end " $scene " ============"
32 | done
33 | 


--------------------------------------------------------------------------------
/scripts/synthetic_nerf_stat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import os
10 | import subprocess
11 | import json
12 | from argparse import ArgumentParser
13 | import glob
14 | import pandas as pd
15 | 
16 | parser = ArgumentParser(description="Training script parameters")
17 | parser.add_argument('result_root')
18 | args = parser.parse_args()
19 | 
20 | scenes = ['chair', 'drums', 'ficus', 'hotdog', 'lego', 'materials', 'mic', 'ship']
21 | 
22 | psnr = []
23 | ssim = []
24 | lpips = []
25 | tr_time = []
26 | fps = []
27 | n_voxels = []
28 | max_iter = sorted(glob.glob(f'{args.result_root}/{scenes[0]}/test_stat/iter*.json'))[-1].split('/')[-1]
29 | for scene in scenes:
30 |     eval_path = f'{args.result_root}/{scene}/test_stat/{max_iter}'
31 |     if os.path.exists(eval_path):
32 |         with open(eval_path) as f:
33 |             ret = json.load(f)
34 |             psnr.append(ret['psnr'])
35 |             tr_time.append(ret['elapsed'] / 1000)
36 |             fps.append(ret['fps'])
37 |             n_voxels.append(ret['n_voxels'])
38 |     else:
39 |         psnr.append(0)
40 |         tr_time.append(0)
41 |         fps.append(0)
42 |         n_voxels.append(0)
43 | 
44 |     eval_path = f'{args.result_root}/{scene}/results.json'
45 |     if os.path.exists(eval_path):
46 |         with open(os.path.join(eval_path)) as f:
47 |             ret = json.load(f)
48 |             ret = ret[sorted(ret.keys())[-1]]
49 |             psnr[-1] = ret['PSNR']
50 |             ssim.append(ret['SSIM'])
51 |             lpips.append(ret['LPIPS'])
52 |     else:
53 |         ssim.append(0)
54 |         lpips.append(0)
55 | 
56 | 
57 | 
58 | def format_df_string(df):
59 |     df = df.copy()
60 |     df['scene'] = df['scene'].map(lambda s: s.rjust(15))
61 |     df['psnr'] = df['psnr'].round(2)
62 |     df['ssim'] = df['ssim'].round(3)
63 |     df['lpips'] = df['lpips'].round(3)
64 |     df['tr-mins'] = (df['tr-mins'] / 60).round(1)
65 |     df['fps'] = df['fps'].round(1)
66 |     df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1)
67 |     return df.to_string(index=False)
68 | 
69 | def add_avg_row(df):
70 |     df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose()
71 |     df_avg['scene'] = 'AVG'
72 |     return pd.concat([df, df_avg], ignore_index=True)
73 | 
74 | df = pd.DataFrame({
75 |     'scene': scenes,
76 |     'psnr': psnr,
77 |     'ssim': ssim,
78 |     'lpips': lpips,
79 |     'tr-mins': tr_time,
80 |     'fps': fps,
81 |     '#vox(M)': n_voxels,
82 | })
83 | df = add_avg_row(df)
84 | 
85 | print(format_df_string(df))
86 | 


--------------------------------------------------------------------------------
/scripts/tandt_db_run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | DATA_ROOT=data/tandt_db
10 | 
11 | lanuch_exp() {
12 |     local scene_name="$1"
13 |     shift
14 |     local output_dir="$1"
15 |     shift
16 |     local exp_args="$*"
17 | 
18 |     python train.py --source_path $DATA_ROOT/$scene_name/ --model_path $output_dir/$scene_name $exp_args
19 |     python render.py $output_dir/$scene_name --skip_train --eval_fps
20 |     python render.py $output_dir/$scene_name --skip_train
21 |     python eval.py $output_dir/$scene_name/
22 |     python render_fly_through.py $output_dir/$scene_name/
23 |     rm -r $output_dir/$scene_name/checkpoints/
24 | }
25 | 
26 | 
27 | for scene in train truck
28 | do
29 |     echo "============ start " $scene " ============"
30 |     lanuch_exp $scene $1 --res_downscale 1.0 --cfg_files cfg/mipnerf360.yaml "${@:2}"
31 |     echo "============ end " $scene " ============"
32 | done
33 | 
34 | for scene in drjohnson playroom 
35 | do
36 |     echo "============ start " $scene " ============"
37 |     lanuch_exp $scene $1 --res_downscale 1.0 --cfg_files cfg/deep_blending.yaml "${@:2}"
38 |     echo "============ end " $scene " ============"
39 | done
40 | 


--------------------------------------------------------------------------------
/scripts/tandt_db_stat.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import subprocess
 11 | import json
 12 | from argparse import ArgumentParser
 13 | import glob
 14 | import pandas as pd
 15 | 
 16 | parser = ArgumentParser(description="Training script parameters")
 17 | parser.add_argument('result_root')
 18 | args = parser.parse_args()
 19 | 
 20 | tandt_scenes = ['train', 'truck']
 21 | db_scenes = ['drjohnson', 'playroom']
 22 | 
 23 | tandt_psnr = []
 24 | tandt_ssim = []
 25 | tandt_lpips = []
 26 | tandt_tr_time = []
 27 | tandt_fps = []
 28 | tandt_n_voxels = []
 29 | for scene in tandt_scenes:
 30 |     path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json'))
 31 |     if len(path) == 0:
 32 |         print(f'{scene:10s}: failed !!??')
 33 |         continue
 34 |     path = sorted(path)[-1]
 35 |     with open(path) as f:
 36 |         ret = json.load(f)
 37 |         tmp_psnr = ret['psnr']
 38 |         tandt_tr_time.append(ret['elapsed'] / 1000)
 39 |         tandt_fps.append(ret['fps'])
 40 |         tandt_n_voxels.append(ret['n_voxels'])
 41 |     eval_path = f'{args.result_root}/{scene}/results.json'
 42 |     if os.path.exists(eval_path):
 43 |         with open(os.path.join(eval_path)) as f:
 44 |             ret = json.load(f)
 45 |             ret = ret[sorted(ret.keys())[-1]]
 46 |             tandt_psnr.append(ret['PSNR'])
 47 |             tandt_ssim.append(ret['SSIM'])
 48 |             tandt_lpips.append(ret['LPIPS'])
 49 |     else:
 50 |         tandt_psnr.append(tmp_psnr)
 51 |         tandt_ssim.append(0)
 52 |         tandt_lpips.append(0)
 53 | 
 54 | db_psnr = []
 55 | db_ssim = []
 56 | db_lpips = []
 57 | db_tr_time = []
 58 | db_fps = []
 59 | db_n_voxels = []
 60 | for scene in db_scenes:
 61 |     path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json'))
 62 |     if len(path) == 0:
 63 |         print(f'{scene:10s}: failed !!??')
 64 |         continue
 65 |     path = sorted(path)[-1]
 66 |     with open(path) as f:
 67 |         ret = json.load(f)
 68 |         tmp_psnr = ret['psnr']
 69 |         db_tr_time.append(ret['elapsed'] / 1000)
 70 |         db_fps.append(ret['fps'])
 71 |         db_n_voxels.append(ret['n_voxels'])
 72 |     eval_path = f'{args.result_root}/{scene}/results.json'
 73 |     if os.path.exists(eval_path):
 74 |         with open(os.path.join(eval_path)) as f:
 75 |             ret = json.load(f)
 76 |             ret = ret[sorted(ret.keys())[-1]]
 77 |             db_psnr.append(ret['PSNR'])
 78 |             db_ssim.append(ret['SSIM'])
 79 |             db_lpips.append(ret['LPIPS'])
 80 |     else:
 81 |         db_psnr.append(tmp_psnr)
 82 |         db_ssim.append(0)
 83 |         db_lpips.append(0)
 84 | 
 85 | 
 86 | 
 87 | def format_df_string(df):
 88 |     df = df.copy()
 89 |     df['scene'] = df['scene'].map(lambda s: s.rjust(15))
 90 |     df['psnr'] = df['psnr'].round(2)
 91 |     df['ssim'] = df['ssim'].round(3)
 92 |     df['lpips'] = df['lpips'].round(3)
 93 |     df['tr-mins'] = (df['tr-mins'] / 60).round(1)
 94 |     df['fps'] = df['fps'].round(1)
 95 |     df['#vox(M)'] = (df['#vox(M)'] / 1_000_000).round(1)
 96 |     return df.to_string(index=False)
 97 | 
 98 | def add_avg_row(df):
 99 |     df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose()
100 |     df_avg['scene'] = 'AVG'
101 |     return pd.concat([df, df_avg], ignore_index=True)
102 | 
103 | df_tandt = pd.DataFrame({
104 |     'scene': tandt_scenes,
105 |     'psnr': tandt_psnr,
106 |     'ssim': tandt_ssim,
107 |     'lpips': tandt_lpips,
108 |     'tr-mins': tandt_tr_time,
109 |     'fps': tandt_fps,
110 |     '#vox(M)': tandt_n_voxels,
111 | })
112 | df_tandt = add_avg_row(df_tandt)
113 | 
114 | df_db = pd.DataFrame({
115 |     'scene': db_scenes,
116 |     'psnr': db_psnr,
117 |     'ssim': db_ssim,
118 |     'lpips': db_lpips,
119 |     'tr-mins': db_tr_time,
120 |     'fps': db_fps,
121 |     '#vox(M)': db_n_voxels,
122 | })
123 | df_db = add_avg_row(df_db)
124 | 
125 | print(format_df_string(df_tandt))
126 | print()
127 | print(format_df_string(df_db))
128 | 


--------------------------------------------------------------------------------
/scripts/tnt_run.sh:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | PATH_TO_OFFICIAL_TNT="data/TnT"
10 | PATH_TO_PREPROC_TNT="data/TnT/TNT_GOF"
11 | 
12 | lanuch_exp() {
13 |     local scene_name="$1"
14 |     shift
15 |     local output_dir="$1"
16 |     shift
17 |     local exp_args="$*"
18 | 
19 |     python train.py --cfg_files cfg/tnt_mesh.yaml --source_path $PATH_TO_PREPROC_TNT/TrainingSet/$scene_name/ --model_path $output_dir/$scene_name $exp_args
20 |     python render.py $output_dir/$scene_name --skip_test --eval_fps
21 |     python render.py $output_dir/$scene_name --skip_test --rgb_only --use_jpg
22 |     python render_fly_through.py $output_dir/$scene_name/
23 |     python extract_mesh.py $output_dir/$scene_name/ --save_gpu --bbox_path $PATH_TO_OFFICIAL_TNT/$scene_name/"$scene_name"_mesh_bbox.txt --use_vert_color --final_lv 11 --adaptive --mesh_fname mesh_svr
24 |     python scripts/eval_tnt/run.py --dataset-dir $PATH_TO_OFFICIAL_TNT/$scene_name/ --traj-path $PATH_TO_PREPROC_TNT/TrainingSet/$scene_name/"$scene_name"_COLMAP_SfM.log --ply-path $output_dir/$scene_name/mesh/latest/mesh_svr.ply
25 |     rm -r $output_dir/$scene_name/checkpoints/
26 | }
27 | 
28 | ulimit -n 2048  # Increase maximum number of files the script can read
29 | 
30 | for scene in Barn Caterpillar Ignatius Truck Meetingroom Courthouse
31 | do
32 |     echo "============ start " $scene " ============"
33 |     lanuch_exp $scene $1 "${@:2}"
34 |     echo "============ end " $scene " ============"
35 | done
36 | 


--------------------------------------------------------------------------------
/scripts/tnt_stat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import os
10 | import subprocess
11 | import json
12 | from argparse import ArgumentParser
13 | import glob
14 | import pandas as pd
15 | 
16 | parser = ArgumentParser(description="Training script parameters")
17 | parser.add_argument('result_root')
18 | parser.add_argument('--suffix', default='_r2.0')
19 | args = parser.parse_args()
20 | 
21 | all_scenes = ['Barn', 'Caterpillar', 'Courthouse', 'Ignatius', 'Meetingroom', 'Truck']
22 | 
23 | all_fscore = []
24 | all_precision = []
25 | all_recall = []
26 | all_tr_time = []
27 | all_fps = []
28 | all_n_voxels = []
29 | for scene in all_scenes:
30 |     path = glob.glob(os.path.join(f'{args.result_root}/{scene}/test_stat/iter*.json'))
31 |     if len(path) == 0:
32 |         print(f'{scene:10s}: failed !!??')
33 |         continue
34 |     path = sorted(path)[-1]
35 |     with open(path) as f:
36 |         ret = json.load(f)
37 |         all_tr_time.append(ret['elapsed'] / 1000)
38 |         # all_fps.append(ret['fps'])
39 |         all_n_voxels.append(ret['n_voxels'])
40 |     n_iter = int(os.path.split(path)[1].replace('iter', '').replace('.json', ''))
41 |     fps_path = f'{args.result_root}/{scene}/train/ours_{n_iter}{args.suffix}.txt'
42 |     with open(fps_path) as f:
43 |         fps = float(f.read().strip().split()[-1].split('=')[1])
44 |         all_fps.append(fps)
45 |     eval_path = f'{args.result_root}/{scene}/mesh/latest/evaluation/result.json'
46 |     if os.path.exists(eval_path):
47 |         with open(os.path.join(eval_path)) as f:
48 |             ret = json.load(f)
49 |             all_fscore.append(ret['f-score'])
50 |             all_precision.append(ret['precision'])
51 |             all_recall.append(ret['recall'])
52 |     else:
53 |         all_fscore.append(0)
54 |         all_precision.append(0)
55 |         all_recall.append(0)
56 | 
57 | 
58 | 
59 | def format_df_string(df):
60 |     df = df.copy()
61 |     df['scene'] = df['scene'].map(lambda s: s.rjust(15))
62 |     df['f-score'] = df['f-score'].round(2)
63 |     df['prec.'] = df['prec.'].round(2)
64 |     df['recall'] = df['recall'].round(2)
65 |     df['tr. mins'] = (df['tr. mins'] / 60).round(1)
66 |     df['fps'] = df['fps'].round(1)
67 |     df['#vox (M)'] = (df['#vox (M)'] / 1_000_000).round(1)
68 |     return df.to_string()
69 | 
70 | def add_avg_row(df):
71 |     df_avg = df.mean(axis=0, numeric_only=True).to_frame().transpose()
72 |     df_avg['scene'] = 'AVG'
73 |     return pd.concat([df, df_avg], ignore_index=True)
74 | 
75 | df = pd.DataFrame({
76 |     'scene': all_scenes,
77 |     'f-score': all_fscore,
78 |     'prec.': all_precision,
79 |     'recall': all_recall,
80 |     'tr. mins': all_tr_time,
81 |     'fps': all_fps,
82 |     '#vox (M)': all_n_voxels,
83 | })
84 | df = add_avg_row(df)
85 | 
86 | print(format_df_string(df))
87 | 


--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import argparse
 10 | from yacs.config import CfgNode
 11 | 
 12 | 
 13 | cfg = CfgNode()
 14 | 
 15 | cfg.model = CfgNode(dict(
 16 |     n_samp_per_vox = 1,       # Number of sampled points per visited voxel
 17 |     sh_degree = 3,            # Use 3 * (k+1)^2 params per voxels for view-dependent colors
 18 |     ss = 1.5,                 # Super-sampling rates for anti-aliasing
 19 |     white_background = False, # Assum white background
 20 |     black_background = False, # Assum black background
 21 | ))
 22 | 
 23 | cfg.data = CfgNode(dict(
 24 |     source_path = "",
 25 |     image_dir_name = "images",
 26 |     res_downscale = 0.,
 27 |     res_width = 0,
 28 |     skip_blend_alpha = False,
 29 |     data_device = "cpu",
 30 |     eval = False,
 31 |     test_every = 8,
 32 | ))
 33 | 
 34 | cfg.bounding = CfgNode(dict(
 35 |     # Define the main (inside) region bounding box
 36 |     # The default use the suggested bounding if given by dataset.
 37 |     # Otherwise, it automatically chose from forward or camera_median modes.
 38 |     # See src/utils/bounding_utils.py for details.
 39 | 
 40 |     # default | camera_median | camera_max | forward | pcd
 41 |     bound_mode = "default",
 42 |     bound_scale = 1.0,        # Scaling factor of the bound
 43 |     forward_dist_scale = 1.0, # For forward mode
 44 |     pcd_density_rate = 0.1,   # For pcd mode
 45 | 
 46 |     # Number of Octree level outside the main foreground region
 47 |     outside_level = 5,
 48 | ))
 49 | 
 50 | cfg.optimizer = CfgNode(dict(
 51 |     geo_lr = 0.025,
 52 |     sh0_lr = 0.010,
 53 |     shs_lr = 0.00025,
 54 | 
 55 |     optim_beta1 = 0.1,
 56 |     optim_beta2 = 0.99,
 57 |     optim_eps = 1e-15,
 58 | 
 59 |     lr_decay_ckpt = [19000],
 60 |     lr_decay_mult = 0.1,
 61 | ))
 62 | 
 63 | cfg.regularizer = CfgNode(dict(
 64 |     # Main photometric loss
 65 |     lambda_photo = 1.0,
 66 |     use_l1 = False,
 67 |     use_huber = False,
 68 |     huber_thres = 0.03,
 69 | 
 70 |     # SSIM loss
 71 |     lambda_ssim = 0.02,
 72 | 
 73 |     # Sparse depth loss
 74 |     lambda_sparse_depth = 0.0,
 75 |     sparse_depth_until = 10_000,
 76 | 
 77 |     # Mask loss
 78 |     lambda_mask = 0.0,
 79 | 
 80 |     # Depthanything loss
 81 |     lambda_depthanythingv2 = 0.0,
 82 |     depthanythingv2_from = 3000,
 83 |     depthanythingv2_end = 20000,
 84 |     depthanythingv2_end_mult = 0.1,
 85 | 
 86 |     # Mast3r metrid loss
 87 |     lambda_mast3r_metric_depth = 0.0,
 88 |     mast3r_repo_path = '',
 89 |     mast3r_metric_depth_from = 0,
 90 |     mast3r_metric_depth_end = 20000,
 91 |     mast3r_metric_depth_end_mult = 0.01,
 92 | 
 93 |     # Final transmittance should concentrate to either 0 or 1
 94 |     lambda_T_concen = 0.0,
 95 | 
 96 |     # Final transmittance should be 0
 97 |     lambda_T_inside = 0.0,
 98 | 
 99 |     # Per-point rgb loss
100 |     lambda_R_concen = 0.01,
101 | 
102 |     # Geometric regularization
103 |     lambda_ascending = 0.0,
104 |     ascending_from = 0,
105 | 
106 |     # Distortion loss (encourage distribution concentration on ray)
107 |     lambda_dist = 0.1,
108 |     dist_from = 10000,
109 | 
110 |     # Consistency loss of rendered normal and derived normal from expected depth
111 |     lambda_normal_dmean = 0.0,
112 |     n_dmean_from = 10_000,
113 |     n_dmean_end = 20_000,
114 |     n_dmean_ks = 3,
115 |     n_dmean_tol_deg = 90.0,
116 | 
117 |     # Consistency loss of rendered normal and derived normal from median depth
118 |     lambda_normal_dmed = 0.0,
119 |     n_dmed_from=3000,
120 |     n_dmed_end=20_000,
121 | 
122 |     # Total variation loss of density grid
123 |     lambda_tv_density = 1e-10,
124 |     tv_from = 0,
125 |     tv_until = 10000,
126 | 
127 |     # Data augmentation
128 |     ss_aug_max = 1.5,
129 |     rand_bg = False,
130 | ))
131 | 
132 | cfg.init = CfgNode(dict(
133 |     # Voxel property initialization
134 |     geo_init = -10.0,
135 |     sh0_init = 0.5,
136 |     shs_init = 0.0,
137 | 
138 |     sh_degree_init = 3,
139 | 
140 |     # Init main inside region by dense voxels
141 |     init_n_level = 6,  # (2^6)^3 voxels
142 | 
143 |     # Number of voxel ratio for outside (background region) 
144 |     init_out_ratio = 2.0,
145 | ))
146 | 
147 | cfg.procedure = CfgNode(dict(
148 |     # Schedule
149 |     n_iter = 20_000,
150 |     sche_mult = 1.0,
151 |     seed=3721,
152 | 
153 |     # Reset sh
154 |     reset_sh_ckpt = [-1],
155 | 
156 |     # Adaptive general setup
157 |     adapt_from = 1000,
158 |     adapt_every = 1000,
159 | 
160 |     # Adaptive voxel pruning
161 |     prune_until = 18000,
162 |     prune_thres_init = 0.0001,
163 |     prune_thres_final = 0.05,
164 | 
165 |     # Adaptive voxel pruning
166 |     subdivide_until = 15000,
167 |     subdivide_all_until = 0,
168 |     subdivide_samp_thres = 1.0, # A voxel max sampling rate should larger than this.
169 |     subdivide_prop = 0.05,
170 |     subdivide_max_num = 10_000_000,
171 | ))
172 | 
173 | cfg.auto_exposure = CfgNode(dict(
174 |     enable = False,
175 |     auto_exposure_upd_ckpt = [5000, 10000, 15000]
176 | ))
177 | 
178 | for i_cfg in cfg.values():
179 |     i_cfg.set_new_allowed(True)
180 | 
181 | 
182 | def everytype2bool(v):
183 |     if v.isnumeric():
184 |         return bool(int(v))
185 |     v = v.lower()
186 |     if v in ['n', 'no', 'none', 'false']:
187 |         return False
188 |     return True
189 | 
190 | 
191 | def update_argparser(parser):
192 |     for name in cfg.keys():
193 |         group = parser.add_argument_group(name)
194 |         for key, value in getattr(cfg, name).items():
195 |             t = type(value)
196 | 
197 |             if t == bool:
198 |                 group.add_argument(f"--{key}", action='store_true' if t else 'store_false')
199 |             elif t == list:
200 |                 group.add_argument(f"--{key}", default=value, type=type(value[0]), nargs="*")
201 |             elif t == tuple:
202 |                 group.add_argument(f"--{key}", default=value, type=type(value[0]), nargs=len(value))
203 |             else:
204 |                 group.add_argument(f"--{key}", default=value, type=t)
205 | 
206 | 
207 | def update_config(cfg_files, cmd_lst=[]):
208 |     # Update from config files
209 |     if isinstance(cfg_files, str):
210 |         cfg_files = [cfg_files]
211 |     for cfg_path in cfg_files:
212 |         cfg.merge_from_file(cfg_path)
213 | 
214 |     if len(cmd_lst) == 0:
215 |         return
216 | 
217 |     # Parse the arguments from command line
218 |     internal_parser = argparse.ArgumentParser()
219 |     update_argparser(internal_parser)
220 |     internal_args = internal_parser.parse_args(cmd_lst)
221 | 
222 |     # Update from command line args
223 |     for name in cfg.keys():
224 |         cfg_subgroup = getattr(cfg, name)
225 |         for key in cfg_subgroup.keys():
226 |             arg_val = getattr(internal_args, key)
227 |             # Check if the default values is updated
228 |             if internal_parser.get_default(key) != arg_val:
229 |                 cfg_subgroup[key] = arg_val
230 | 


--------------------------------------------------------------------------------
/src/dataloader/data_pack.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import random
 11 | import numpy as np
 12 | 
 13 | import torch
 14 | 
 15 | from src.dataloader.reader_colmap_dataset import read_colmap_dataset
 16 | from src.dataloader.reader_nerf_dataset import read_nerf_dataset
 17 | 
 18 | from src.cameras import Camera, MiniCam
 19 | 
 20 | 
 21 | class DataPack:
 22 | 
 23 |     def __init__(self,
 24 |                  source_path,
 25 |                  image_dir_name="images",
 26 |                  res_downscale=0.,
 27 |                  res_width=0,
 28 |                  skip_blend_alpha=False,
 29 |                  alpha_is_white=False,
 30 |                  data_device="cpu",
 31 |                  use_test=False,
 32 |                  test_every=8,
 33 |                  camera_params_only=False):
 34 | 
 35 |         camera_creator = CameraCreator(
 36 |             res_downscale=res_downscale,
 37 |             res_width=res_width,
 38 |             skip_blend_alpha=skip_blend_alpha,
 39 |             alpha_is_white=alpha_is_white,
 40 |             data_device=data_device,
 41 |             camera_params_only=camera_params_only,
 42 |         )
 43 | 
 44 |         sparse_path = os.path.join(source_path, "sparse")
 45 |         colmap_path = os.path.join(source_path, "colmap", "sparse")
 46 |         meta_path1 = os.path.join(source_path, "transforms_train.json")
 47 |         meta_path2 = os.path.join(source_path, "transforms.json")
 48 | 
 49 |         # TODO: read camera by multithreading
 50 | 
 51 |         if os.path.exists(sparse_path) or os.path.exists(colmap_path):
 52 |             print("Read dataset in COLMAP format.")
 53 |             dataset = read_colmap_dataset(
 54 |                 source_path=source_path,
 55 |                 image_dir_name=image_dir_name,
 56 |                 use_test=use_test,
 57 |                 test_every=test_every,
 58 |                 camera_creator=camera_creator)
 59 |         elif os.path.exists(meta_path1) or os.path.exists(meta_path2):
 60 |             print("Read dataset in NeRF format.")
 61 |             dataset = read_nerf_dataset(
 62 |                 source_path=source_path,
 63 |                 use_test=use_test,
 64 |                 test_every=test_every,
 65 |                 camera_creator=camera_creator)
 66 |         else:
 67 |             raise Exception("Unknown scene type!")
 68 | 
 69 |         self._cameras = {
 70 |             'train': dataset['train_cam_lst'],
 71 |             'test': dataset['test_cam_lst'],
 72 |         }
 73 | 
 74 |         ##############################
 75 |         # Read additional dataset info
 76 |         ##############################
 77 |         # If the dataset suggested a scene bound
 78 |         self.suggested_bounding = dataset.get('suggested_bounding', None)
 79 | 
 80 |         # If the dataset provide a transformation to other coordinate
 81 |         self.to_world_matrix = None
 82 |         to_world_path = os.path.join(source_path, 'to_world_matrix.txt')
 83 |         if os.path.isfile(to_world_path):
 84 |             self.to_world_matrix = np.loadtxt(to_world_path)
 85 | 
 86 |         # If the dataset has a point cloud
 87 |         self.point_cloud = dataset.get('point_cloud', None)
 88 | 
 89 |     def get_train_cameras(self):
 90 |         return self._cameras['train']
 91 | 
 92 |     def get_test_cameras(self):
 93 |         return self._cameras['test']
 94 | 
 95 | 
 96 | # Create a random sequence of image indices
 97 | def compute_iter_idx(num_data, num_iter):
 98 |     tr_iter_idx = []
 99 |     while len(tr_iter_idx) < num_iter:
100 |         lst = list(range(num_data))
101 |         random.shuffle(lst)
102 |         tr_iter_idx.extend(lst)
103 |     return tr_iter_idx[:num_iter]
104 | 
105 | 
106 | # Function that create Camera instances while parsing dataset
107 | class CameraCreator:
108 | 
109 |     warned = False
110 | 
111 |     def __init__(self,
112 |                  res_downscale=0.,
113 |                  res_width=0,
114 |                  skip_blend_alpha=False,
115 |                  alpha_is_white=False,
116 |                  data_device="cpu",
117 |                  camera_params_only=False):
118 | 
119 |         self.res_downscale = res_downscale
120 |         self.res_width = res_width
121 |         self.skip_blend_alpha = skip_blend_alpha
122 |         self.alpha_is_white = alpha_is_white
123 |         self.data_device = data_device
124 |         self.camera_params_only = camera_params_only
125 | 
126 |     def __call__(self,
127 |                  image,
128 |                  w2c,
129 |                  fovx,
130 |                  fovy,
131 |                  cx_p=0.5,
132 |                  cy_p=0.5,
133 |                  sparse_pt=None,
134 |                  image_name=""):
135 | 
136 |         if self.camera_params_only:
137 |             return MiniCam(
138 |                 c2w=np.linalg.inv(w2c),
139 |                 fovx=fovx, fovy=fovy,
140 |                 cx_p=cx_p, cy_p=cy_p,
141 |                 width=image.size[0],
142 |                 height=image.size[1],
143 |                 image_name=image_name)
144 | 
145 |         # Determine target resolution
146 |         if self.res_downscale > 0:
147 |             downscale = self.res_downscale
148 |         elif self.res_width > 0:
149 |             downscale = image.size[0] / self.res_width
150 |         else:
151 |             downscale = 1
152 | 
153 |             total_pix = image.size[0] * image.size[1]
154 |             if total_pix > 1200 ** 2 and not self.warned:
155 |                 self.warned = True
156 |                 suggest_ds = (total_pix ** 0.5) / 1200
157 |                 print(f"###################################################################")
158 |                 print(f"Image too large. Suggest to use `--res_downscale {suggest_ds:.1f}`.")
159 |                 print(f"###################################################################")
160 | 
161 |         # Resize image if needed
162 |         if downscale != 1:
163 |             image = image.resize(round(image.size[0] / downscale), round(image.size[1] / downscale))
164 | 
165 |         # Convert image to tensor
166 |         tensor = torch.tensor(np.array(image), dtype=torch.float32).moveaxis(-1, 0) / 255.0
167 |         if tensor.shape[0] == 4:
168 |             # Blend alpha channel
169 |             tensor, mask = tensor.split([3, 1], dim=0)
170 |             if not self.skip_blend_alpha:
171 |                 tensor = tensor * mask + int(self.alpha_is_white) * (1 - mask)
172 | 
173 |         return Camera(
174 |             w2c=w2c,
175 |             fovx=fovx, fovy=fovy,
176 |             cx_p=cx_p, cy_p=cy_p,
177 |             image=tensor,
178 |             sparse_pt=sparse_pt,
179 |             image_name=image_name)
180 | 


--------------------------------------------------------------------------------
/src/dataloader/reader_colmap_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import json
 11 | import natsort
 12 | import pycolmap
 13 | import numpy as np
 14 | from PIL import Image
 15 | from pathlib import Path
 16 | 
 17 | from src.utils.colmap_utils import parse_colmap_pts
 18 | from src.utils.camera_utils import focal2fov
 19 | 
 20 | 
 21 | def read_colmap_dataset(source_path, image_dir_name, use_test, test_every, camera_creator):
 22 | 
 23 |     source_path = Path(source_path)
 24 | 
 25 |     # Parse colmap meta data
 26 |     sparse_path = source_path / "sparse" / "0"
 27 |     if not sparse_path.exists():
 28 |         sparse_path = source_path / "colmap" / "sparse" / "0"
 29 |     if not sparse_path.exists():
 30 |         raise Exception("Can not find COLMAP reconstruction.")
 31 | 
 32 |     sfm = pycolmap.Reconstruction(sparse_path)
 33 |     point_cloud = parse_colmap_pts(sfm)
 34 |     correspondent = point_cloud.corr
 35 | 
 36 |     # Sort key by filename
 37 |     keys = natsort.natsorted(
 38 |         sfm.images.keys(),
 39 |         key = lambda k : sfm.images[k].name)
 40 | 
 41 |     # Load all images and cameras
 42 |     cam_lst = []
 43 |     for key in keys:
 44 | 
 45 |         frame = sfm.images[key]
 46 | 
 47 |         # Load image
 48 |         image_path = source_path / image_dir_name / frame.name
 49 |         if not image_path.exists():
 50 |             image_path = image_path.with_suffix('.png')
 51 |         if not image_path.exists():
 52 |             image_path = image_path.with_suffix('.jpg')
 53 |         if not image_path.exists():
 54 |             image_path = image_path.with_suffix('.JPG')
 55 |         if not image_path.exists():
 56 |             raise Exception(f"File not found: {str(image_path)}")
 57 |         image = Image.open(image_path)
 58 | 
 59 |         # Load camera intrinsic
 60 |         if frame.camera.model.name == "SIMPLE_PINHOLE":
 61 |             focal_x, cx, cy = frame.camera.params
 62 |             fovx = focal2fov(focal_x, frame.camera.width)
 63 |             fovy = focal2fov(focal_x, frame.camera.height)
 64 |             cx_p = cx / frame.camera.width
 65 |             cy_p = cy / frame.camera.height
 66 |         elif frame.camera.model.name == "PINHOLE":
 67 |             focal_x, focal_y, cx, cy = frame.camera.params
 68 |             fovx = focal2fov(focal_x, frame.camera.width)
 69 |             fovy = focal2fov(focal_y, frame.camera.height)
 70 |             cx_p = cx / frame.camera.width
 71 |             cy_p = cy / frame.camera.height
 72 |         else:
 73 |             assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
 74 | 
 75 |         # Load camera extrinsic
 76 |         w2c = np.eye(4, dtype=np.float32)
 77 |         w2c[:3] = frame.cam_from_world.matrix()
 78 | 
 79 |         # Load sparse point
 80 |         sparse_pt = point_cloud.points[correspondent[frame.name]]
 81 | 
 82 |         cam_lst.append(camera_creator(
 83 |             image=image,
 84 |             w2c=w2c,
 85 |             fovx=fovx,
 86 |             fovy=fovy,
 87 |             cx_p=cx_p,
 88 |             cy_p=cy_p,
 89 |             sparse_pt=sparse_pt,
 90 |             image_name=image_path.name,
 91 |         ))
 92 | 
 93 |     # Split train/test
 94 |     if use_test:
 95 |         train_cam_lst = [
 96 |             cam for i, cam in enumerate(cam_lst)
 97 |             if i % test_every != 0]
 98 |         test_cam_lst = [
 99 |             cam for i, cam in enumerate(cam_lst)
100 |             if i % test_every == 0]
101 |     else:
102 |         train_cam_lst = cam_lst
103 |         test_cam_lst = []
104 | 
105 |     # Parse main scene bound if there is
106 |     nerf_normalization_path = os.path.join(source_path, "nerf_normalization.json")
107 |     if os.path.isfile(nerf_normalization_path):
108 |         with open(nerf_normalization_path) as f:
109 |             nerf_normalization = json.load(f)
110 |         suggested_center = np.array(nerf_normalization["center"], dtype=np.float32)
111 |         suggested_radius = np.array(nerf_normalization["radius"], dtype=np.float32)
112 |         suggested_bounding = np.stack([
113 |             suggested_center - suggested_radius,
114 |             suggested_center + suggested_radius,
115 |         ])
116 |     else:
117 |         suggested_bounding = None
118 | 
119 |     # Pack dataset
120 |     dataset = {
121 |         'train_cam_lst': train_cam_lst,
122 |         'test_cam_lst': test_cam_lst,
123 |         'suggested_bounding': suggested_bounding,
124 |         'point_cloud': point_cloud,
125 |     }
126 |     return dataset
127 | 


--------------------------------------------------------------------------------
/src/dataloader/reader_nerf_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import json
 11 | import pycolmap
 12 | import numpy as np
 13 | from PIL import Image
 14 | from pathlib import Path
 15 | 
 16 | from src.utils.colmap_utils import parse_colmap_pts
 17 | from src.utils.camera_utils import fov2focal, focal2fov
 18 | 
 19 | 
 20 | def read_nerf_dataset(source_path, test_every, use_test, camera_creator):
 21 | 
 22 |     source_path = Path(source_path)
 23 | 
 24 |     # Load training cameras
 25 |     if (source_path / "transforms_train.json").exists():
 26 |         train_cam_lst, point_cloud = read_cameras_from_json(
 27 |             source_path=source_path,
 28 |             meta_fname="transforms_train.json",
 29 |             camera_creator=camera_creator)
 30 |     else:
 31 |         train_cam_lst, point_cloud = read_cameras_from_json(
 32 |             source_path=source_path,
 33 |             meta_fname="transforms.json",
 34 |             camera_creator=camera_creator)
 35 | 
 36 |     # Load testing cameras
 37 |     if (source_path / "transforms_test.json").exists():
 38 |         test_cam_lst, _ = read_cameras_from_json(
 39 |             source_path=source_path,
 40 |             meta_fname="transforms_test.json",
 41 |             camera_creator=camera_creator)
 42 |     elif use_test:
 43 |         test_cam_lst = [
 44 |             cam for i, cam in enumerate(train_cam_lst)
 45 |             if i % test_every == 0]
 46 |         train_cam_lst = [
 47 |             cam for i, cam in enumerate(train_cam_lst)
 48 |             if i % test_every != 0]
 49 |     else:
 50 |         test_cam_lst = []
 51 | 
 52 |     # Parse main scene bound if there is
 53 |     nerf_normalization_path = os.path.join(source_path, "nerf_normalization.json")
 54 |     if os.path.isfile(nerf_normalization_path):
 55 |         with open(nerf_normalization_path) as f:
 56 |             nerf_normalization = json.load(f)
 57 |         suggested_center = np.array(nerf_normalization["center"], dtype=np.float32)
 58 |         suggested_radius = np.array(nerf_normalization["radius"], dtype=np.float32)
 59 |         suggested_bounding = np.stack([
 60 |             suggested_center - suggested_radius,
 61 |             suggested_center + suggested_radius,
 62 |         ])
 63 |     else:
 64 |         # Assume synthetic blender scene bound
 65 |         suggested_bounding = np.array([
 66 |             [-1.5, -1.5, -1.5],
 67 |             [1.5, 1.5, 1.5],
 68 |         ], dtype=np.float32)
 69 | 
 70 |     # Pack dataset
 71 |     dataset = {
 72 |         'train_cam_lst': train_cam_lst,
 73 |         'test_cam_lst': test_cam_lst,
 74 |         'suggested_bounding': suggested_bounding,
 75 |         'point_cloud': point_cloud,
 76 |     }
 77 |     return dataset
 78 | 
 79 | 
 80 | def read_cameras_from_json(source_path, meta_fname, camera_creator):
 81 | 
 82 |     with open(source_path / meta_fname) as f:
 83 |         meta = json.load(f)
 84 | 
 85 |     # Load COLMAP points if there is
 86 |     if "colmap" in meta:
 87 |         sfm = pycolmap.Reconstruction(source_path / meta["colmap"]["path"])
 88 |         if "transform" in meta["colmap"]:
 89 |             transform = np.array(meta["colmap"]["transform"])
 90 |         else:
 91 |             transform = None
 92 |         point_cloud = parse_colmap_pts(sfm, transform)
 93 |         correspondent = point_cloud.corr
 94 |     else:
 95 |         point_cloud = None
 96 |         correspondent = None
 97 | 
 98 |     # Load global setup
 99 |     global_fovx = meta.get("camera_angle_x", 0)
100 |     global_fovy = meta.get("camera_angle_y", 0)
101 |     global_cx_p = parse_principle_point(meta, is_cx=True)
102 |     global_cy_p = parse_principle_point(meta, is_cx=False)
103 | 
104 |     # Load all images and cameras
105 |     cam_lst = []
106 |     for frame in meta["frames"]:
107 | 
108 |         # Guess the rgb image path and load image
109 |         path_candidates = [
110 |             source_path / frame["file_path"],
111 |             source_path / (frame["file_path"] + '.png'),
112 |             source_path / (frame["file_path"] + '.jpg'),
113 |             source_path / (frame["file_path"] + '.JPG'),
114 |         ]
115 |         for image_path in path_candidates:
116 |             if image_path.exists():
117 |                 break
118 | 
119 |         if frame.get('heldout', False):
120 |             image = Image.new('RGB', (frame['w'], frame['h']))
121 |         elif image_path.exists():
122 |             image = Image.open(image_path)
123 |         else:
124 |             raise Exception(f"File not found: {str(image_path)}")
125 | 
126 |         # Load camera intrinsic
127 |         fovx = frame.get('camera_angle_x', global_fovx)
128 |         cx_p = frame.get('cx_p', global_cx_p)
129 |         cy_p = frame.get('cy_p', global_cy_p)
130 | 
131 |         if 'camera_angle_y' in frame:
132 |             fovy = frame['camera_angle_y']
133 |         elif global_fovy > 0:
134 |             fovy = global_fovy
135 |         else:
136 |             fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1])
137 | 
138 |         # Load camera pose
139 |         c2w = np.array(frame["transform_matrix"])
140 |         c2w[:3, 1:3] *= -1  # from opengl y-up-z-back to colmap y-down-z-forward
141 |         w2c = np.linalg.inv(c2w).astype(np.float32)
142 | 
143 |         # Load sparse point
144 |         if point_cloud is not None:
145 |             sparse_pt = point_cloud.points[correspondent[image_path.name]]
146 |         else:
147 |             sparse_pt = None
148 | 
149 |         cam_lst.append(camera_creator(
150 |             image=image,
151 |             w2c=w2c,
152 |             fovx=fovx,
153 |             fovy=fovy,
154 |             cx_p=cx_p,
155 |             cy_p=cy_p,
156 |             sparse_pt=sparse_pt,
157 |             image_name=image_path.name,
158 |         ))
159 | 
160 |     return cam_lst, point_cloud
161 | 
162 | 
163 | def parse_principle_point(info, is_cx):
164 |     key = "cx" if is_cx else "cy"
165 |     key_res = "w" if is_cx else "h"
166 |     if f"{key}_p" in info:
167 |         return info[f"{key}_p"]
168 |     if key in info and key_res in info:
169 |         return info[key] / info[key_res]
170 |     return None
171 | 


--------------------------------------------------------------------------------
/src/sparse_voxel_gears/io.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import re
 11 | import torch
 12 | 
 13 | from src.utils import octree_utils
 14 | 
 15 | class SVInOut:
 16 | 
 17 |     def save(self, path, quantize=False):
 18 |         '''
 19 |         Save the necessary attributes and parameters for reproducing rendering.
 20 |         '''
 21 |         os.makedirs(os.path.dirname(path), exist_ok=True)
 22 |         state_dict = {
 23 |             'active_sh_degree': self.active_sh_degree,
 24 |             'scene_center': self.scene_center.data.contiguous(),
 25 |             'inside_extent': self.inside_extent.data.contiguous(),
 26 |             'scene_extent': self.scene_extent.data.contiguous(),
 27 |             'octpath': self.octpath.data.contiguous(),
 28 |             'octlevel': self.octlevel.data.contiguous(),
 29 |             '_geo_grid_pts': self._geo_grid_pts.data.contiguous(),
 30 |             '_sh0': self._sh0.data.contiguous(),
 31 |             '_shs': self._shs.data.contiguous(),
 32 |         }
 33 | 
 34 |         if quantize:
 35 |             quantize_state_dict(state_dict)
 36 |             state_dict['quantized'] = True
 37 |         else:
 38 |             state_dict['quantized'] = False
 39 | 
 40 |         for k, v in state_dict.items():
 41 |             if torch.is_tensor(v):
 42 |                 state_dict[k] = v.cpu()
 43 |         torch.save(state_dict, path)
 44 |         self.latest_save_path = path
 45 | 
 46 |     def load(self, path):
 47 |         '''
 48 |         Load the saved models.
 49 |         '''
 50 |         self.loaded_path = path
 51 |         state_dict = torch.load(path, map_location="cpu", weights_only=False)
 52 | 
 53 |         if state_dict.get('quantized', False):
 54 |             dequantize_state_dict(state_dict)
 55 | 
 56 |         self.active_sh_degree = state_dict['active_sh_degree']
 57 | 
 58 |         self.scene_center = state_dict['scene_center'].cuda()
 59 |         self.inside_extent = state_dict['inside_extent'].cuda()
 60 |         self.scene_extent = state_dict['scene_extent'].cuda()
 61 | 
 62 |         self.octpath = state_dict['octpath'].cuda()
 63 |         self.octlevel = state_dict['octlevel'].cuda().to(torch.int8)
 64 | 
 65 |         self._geo_grid_pts = state_dict['_geo_grid_pts'].cuda().requires_grad_()
 66 |         self._sh0 = state_dict['_sh0'].cuda().requires_grad_()
 67 |         self._shs = state_dict['_shs'].cuda().requires_grad_()
 68 | 
 69 |         # Subdivision priority trackor
 70 |         self._subdiv_p = torch.ones(
 71 |             [self.num_voxels, 1],
 72 |             dtype=torch.float32, device="cuda").requires_grad_()
 73 | 
 74 |     def save_iteration(self, model_path, iteration, quantize=False):
 75 |         path = os.path.join(model_path, "checkpoints", f"iter{iteration:06d}_model.pt")
 76 |         self.save(path, quantize=quantize)
 77 |         self.latest_save_iter = iteration
 78 | 
 79 |     def load_iteration(self, model_path, iteration=-1):
 80 |         if iteration == -1:
 81 |             # Find the maximum iteration if it is -1.
 82 |             fnames = os.listdir(os.path.join(model_path, "checkpoints"))
 83 |             loaded_iter = max(int(re.sub("[^0-9]", "", fname)) for fname in fnames)
 84 |         else:
 85 |             loaded_iter = iteration
 86 | 
 87 |         path = os.path.join(model_path, "checkpoints", f"iter{loaded_iter:06d}_model.pt")
 88 |         self.load(path)
 89 | 
 90 |         self.loaded_iter = iteration
 91 | 
 92 |         return loaded_iter
 93 | 
 94 | 
 95 | # Quantization utilities to reduce size when saving model.
 96 | # It can reduce ~70% model size with minor PSNR drop.
 97 | def quantize_state_dict(state_dict):
 98 |     state_dict['_geo_grid_pts'] = quantization(state_dict['_geo_grid_pts'])
 99 |     state_dict['_sh0'] = [quantization(v) for v in state_dict['_sh0'].split(1, dim=1)]
100 |     state_dict['_shs'] = [quantization(v) for v in state_dict['_shs'].split(1, dim=1)]
101 | 
102 | def dequantize_state_dict(state_dict):
103 |     state_dict['_geo_grid_pts'] = dequantization(state_dict['_geo_grid_pts'])
104 |     state_dict['_sh0'] = torch.cat(
105 |         [dequantization(v) for v in state_dict['_sh0']], dim=1)
106 |     state_dict['_shs'] = torch.cat(
107 |         [dequantization(v) for v in state_dict['_shs']], dim=1)
108 | 
109 | def quantization(src_tensor, max_iter=10):
110 |     src_shape = src_tensor.shape
111 |     src_vals = src_tensor.flatten().contiguous()
112 |     order = src_vals.argsort()
113 |     quantile_ind = (torch.linspace(0,1,257) * (len(order) - 1)).long().clamp_(0, len(order)-1)
114 |     codebook = src_vals[order[quantile_ind]].contiguous()
115 |     codebook[0] = -torch.inf
116 |     ind = torch.searchsorted(codebook, src_vals)
117 | 
118 |     codebook = codebook[1:]
119 |     ind = (ind - 1).clamp_(0, 255)
120 | 
121 |     diff_l = (src_vals - codebook[ind-1]).abs()
122 |     diff_m = (src_vals - codebook[ind]).abs()
123 |     ind = ind - 1 + (diff_m < diff_l)
124 |     ind.clamp_(0, 255)
125 | 
126 |     for _ in range(max_iter):
127 |         codebook = torch.zeros_like(codebook).index_reduce_(
128 |             dim=0,
129 |             index=ind,
130 |             source=src_vals,
131 |             reduce='mean',
132 |             include_self=False)
133 |         diff_l = (src_vals - codebook[ind-1]).abs()
134 |         diff_r = (src_vals - codebook[(ind+1).clamp_max_(255)]).abs()
135 |         diff_m = (src_vals - codebook[ind]).abs()
136 |         upd_mask = torch.minimum(diff_l, diff_r) < diff_m
137 |         if upd_mask.sum() == 0:
138 |             break
139 |         shift = (diff_r < diff_l) * 2 - 1
140 |         ind[upd_mask] += shift[upd_mask]
141 |         ind.clamp_(0, 255)
142 | 
143 |     codebook = torch.zeros_like(codebook).index_reduce_(
144 |         dim=0,
145 |         index=ind,
146 |         source=src_vals,
147 |         reduce='mean',
148 |         include_self=False)
149 | 
150 |     return dict(
151 |         index=ind.reshape(src_shape).to(torch.uint8),
152 |         codebook=codebook,
153 |     )
154 | 
155 | def dequantization(quant_dict):
156 |     return quant_dict['codebook'][quant_dict['index'].long()]
157 | 


--------------------------------------------------------------------------------
/src/sparse_voxel_gears/pooling.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import torch
10 | import svraster_cuda
11 | 
12 | from src.utils import octree_utils
13 | 
14 | 
15 | class SVPooling:
16 | 
17 |     def pooling_to_level(self, max_level, octpath=None, octlevel=None):
18 |         octpath = self.octpath if octpath is None else octpath
19 |         octlevel = self.octlevel if octlevel is None else octlevel
20 | 
21 |         num_bit_to_mask = 3 * max(0, svraster_cuda.meta.MAX_NUM_LEVELS - max_level)
22 |         octpath = (octpath >> num_bit_to_mask) << num_bit_to_mask
23 |         octlevel = octlevel.clamp_max(max_level)
24 |         octpack, invmap = torch.stack([octpath, octlevel]).unique(sorted=True, dim=1, return_inverse=True)
25 |         octpath, octlevel = octpack
26 |         octlevel = octlevel.to(torch.int8)
27 |         
28 |         vox_center, vox_size = octree_utils.octpath_decoding(
29 |             octpath, octlevel, self.scene_center, self.scene_extent)
30 | 
31 |         return dict(
32 |             invmap=invmap,
33 |             octpath=octpath,
34 |             octlevel=octlevel,
35 |             vox_center=vox_center,
36 |             vox_size=vox_size,
37 |         )
38 | 
39 |     def pooling_to_rate(self, cameras, max_rate, octpath=None, octlevel=None):
40 |         octpath = self.octpath.clone() if octpath is None else octpath
41 |         octlevel = self.octlevel.clone() if octlevel is None else octlevel
42 |         invmap = torch.arange(len(octpath), device="cuda")
43 | 
44 |         for _ in range(svraster_cuda.meta.MAX_NUM_LEVELS):
45 |             vox_center, vox_size = octree_utils.octpath_decoding(octpath, octlevel, self.scene_center, self.scene_extent)
46 |             samp_rate = svraster_cuda.renderer.mark_max_samp_rate(cameras, octpath, vox_center, vox_size)
47 |             pool_mask = (samp_rate < max_rate) & (octlevel.squeeze(1) > 1)
48 |             if pool_mask.sum() == 0:
49 |                 break
50 |             octlevel[pool_mask] = octlevel[pool_mask] - 1
51 |             num_bit_to_mask = 3 * (svraster_cuda.meta.MAX_NUM_LEVELS - octlevel[pool_mask])
52 |             octpath[pool_mask] = octpath[pool_mask] >> num_bit_to_mask << num_bit_to_mask
53 | 
54 |             octpack, cur_invmap = torch.stack([octpath, octlevel]).unique(sorted=True, dim=1, return_inverse=True)
55 |             octpath, octlevel = octpack
56 |             octlevel = octlevel.to(torch.int8)
57 |             invmap = cur_invmap[invmap]
58 | 
59 |         vox_center, vox_size = octree_utils.octpath_decoding(
60 |             octpath, octlevel, self.scene_center, self.scene_extent)
61 | 
62 |         return dict(
63 |             invmap=invmap,
64 |             octpath=octpath,
65 |             octlevel=octlevel,
66 |             vox_center=vox_center,
67 |             vox_size=vox_size,
68 |         )
69 | 


--------------------------------------------------------------------------------
/src/sparse_voxel_gears/properties.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import torch
 10 | 
 11 | from src.utils import octree_utils
 12 | from src.utils.fuser_utils import rgb_fusion
 13 | from src.utils.activation_utils import rgb2shzero
 14 | 
 15 | import svraster_cuda
 16 | 
 17 | 
 18 | class SVProperties:
 19 | 
 20 |     @property
 21 |     def num_voxels(self):
 22 |         return len(self.octpath)
 23 | 
 24 |     @property
 25 |     def num_grid_pts(self):
 26 |         return len(self.grid_pts_key)
 27 | 
 28 |     @property
 29 |     def scene_min(self):
 30 |         return self.scene_center - 0.5 * self.scene_extent
 31 | 
 32 |     @property
 33 |     def scene_max(self):
 34 |         return self.scene_center + 0.5 * self.scene_extent
 35 | 
 36 |     @property
 37 |     def inside_min(self):
 38 |         return self.scene_center - 0.5 * self.inside_extent
 39 | 
 40 |     @property
 41 |     def inside_max(self):
 42 |         return self.scene_center + 0.5 * self.inside_extent
 43 | 
 44 |     @property
 45 |     def inside_mask(self):
 46 |         isin = ((self.inside_min < self.vox_center) & (self.vox_center < self.inside_max)).all(1)
 47 |         return isin
 48 | 
 49 |     @property
 50 |     def sh0(self):
 51 |         return self._sh0
 52 | 
 53 |     @property
 54 |     def shs(self):
 55 |         return self._shs
 56 | 
 57 |     @property
 58 |     def subdivision_priority(self):
 59 |         return self._subdiv_p.grad
 60 | 
 61 |     def reset_subdivision_priority(self):
 62 |         self._subdiv_p.grad = None
 63 | 
 64 |     @property
 65 |     def signature(self):
 66 |         # Signature to check if the voxel grid layout is updated
 67 |         return (self.num_voxels, id(self.octpath), id(self.octlevel))
 68 | 
 69 |     def _check_derived_voxel_attr(self):
 70 |         # Lazy computation of inverse voxel sizes
 71 |         signature = self.signature
 72 |         need_recompute = not hasattr(self, '_check_derived_voxel_attr_signature') or \
 73 |                          self._check_derived_voxel_attr_signature != signature
 74 |         if need_recompute:
 75 |             self._vox_center, self._vox_size = octree_utils.octpath_decoding(
 76 |                 self.octpath, self.octlevel, self.scene_center, self.scene_extent)
 77 |             self._grid_pts_key, self._vox_key = octree_utils.build_grid_pts_link(self.octpath, self.octlevel)
 78 |             self._check_derived_voxel_attr_signature = signature
 79 | 
 80 |     @property
 81 |     def vox_center(self):
 82 |         self._check_derived_voxel_attr()
 83 |         return self._vox_center
 84 | 
 85 |     @property
 86 |     def vox_size(self):
 87 |         self._check_derived_voxel_attr()
 88 |         return self._vox_size
 89 | 
 90 |     @property
 91 |     def grid_pts_key(self):
 92 |         self._check_derived_voxel_attr()
 93 |         return self._grid_pts_key
 94 | 
 95 |     @property
 96 |     def vox_key(self):
 97 |         self._check_derived_voxel_attr()
 98 |         return self._vox_key
 99 | 
100 |     @property
101 |     def vox_size_inv(self):
102 |         # Lazy computation of inverse voxel sizes
103 |         signature = self.signature
104 |         need_recompute = not hasattr(self, '_vox_size_inv_signature') or \
105 |                          self._vox_size_inv_signature != signature
106 |         if need_recompute:
107 |             self._vox_size_inv = 1 / self.vox_size
108 |             self._vox_size_inv_signature = signature
109 |         return self._vox_size_inv
110 | 
111 |     @property
112 |     def grid_pts_xyz(self):
113 |         # Lazy computation of grid points xyz
114 |         signature = self.signature
115 |         need_recompute = not hasattr(self, '_grid_pts_xyz_signature') or \
116 |                          self._grid_pts_xyz_signature != signature
117 |         if need_recompute:
118 |             self._grid_pts_xyz = octree_utils.compute_gridpoints_xyz(
119 |                 self.grid_pts_key, self.scene_center, self.scene_extent)
120 |             self._grid_pts_xyz_signature = signature
121 |         return self._grid_pts_xyz
122 | 
123 |     @torch.no_grad()
124 |     def reset_sh_from_cameras(self, cameras):
125 |         self._sh0.data.copy_(rgb2shzero(rgb_fusion(self, cameras)))
126 |         self._shs.data.zero_()
127 | 
128 |     def apply_tv_on_density_field(self, lambda_tv_density):
129 |         if self._geo_grid_pts.grad is None:
130 |             self._geo_grid_pts.grad = torch.zeros_like(self._geo_grid_pts.data)
131 |         svraster_cuda.grid_loss_bw.total_variation(
132 |             grid_pts=self._geo_grid_pts,
133 |             vox_key=self.vox_key,
134 |             weight=lambda_tv_density,
135 |             vox_size_inv=self.vox_size_inv,
136 |             no_tv_s=True,
137 |             tv_sparse=False,
138 |             grid_pts_grad=self._geo_grid_pts.grad)
139 | 


--------------------------------------------------------------------------------
/src/sparse_voxel_gears/renderer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import torch
 10 | import svraster_cuda
 11 | 
 12 | from src.utils.image_utils import resize_rendering
 13 | 
 14 | class SVRenderer:
 15 | 
 16 |     def freeze_vox_geo(self):
 17 |         '''
 18 |         Freeze grid points parameter and pre-gather them to each voxel.
 19 |         '''
 20 |         with torch.no_grad():
 21 |             self.frozen_vox_geo = svraster_cuda.renderer.GatherGeoParams.apply(
 22 |                 self.vox_key,
 23 |                 torch.arange(self.num_voxels, device="cuda"),
 24 |                 self._geo_grid_pts
 25 |             )
 26 |         self._geo_grid_pts.requires_grad = False
 27 | 
 28 |     def unfreeze_vox_geo(self):
 29 |         '''
 30 |         Unfreeze grid points parameter.
 31 |         '''
 32 |         del self.frozen_vox_geo
 33 |         self._geo_grid_pts.requires_grad = True
 34 | 
 35 |     def vox_fn(self, idx, cam_pos, color_mode=None, viewdir=None):
 36 |         '''
 37 |         Per-frame voxel property processing. Two important operations:
 38 |         1. Gather grid points parameter into each voxel.
 39 |         2. Compute view-dependent color of each voxel.
 40 | 
 41 |         Input:
 42 |             @idx        Indices for active voxel for current frame.
 43 |             @cam_pos    Camera position.
 44 |         Output:
 45 |             @vox_params A dictionary of the pre-process voxel properties.
 46 |         '''
 47 | 
 48 |         # Gather the density values at the eight corners of each voxel.
 49 |         # It defined a trilinear density field.
 50 |         # The final tensor are in shape [#vox, 8]
 51 |         if hasattr(self, 'frozen_vox_geo'):
 52 |             geos = self.frozen_vox_geo
 53 |         else:
 54 |             geos = svraster_cuda.renderer.GatherGeoParams.apply(
 55 |                 self.vox_key,
 56 |                 idx,
 57 |                 self._geo_grid_pts
 58 |             )
 59 | 
 60 |         # Compute voxel colors
 61 |         if color_mode is None or color_mode == "sh":
 62 |             active_sh_degree = self.active_sh_degree
 63 |             color_mode = "sh"
 64 |         elif color_mode.startswith("sh"):
 65 |             active_sh_degree = int(color_mode[2])
 66 |             color_mode = "sh"
 67 | 
 68 |         if color_mode == "sh":
 69 |             rgbs = svraster_cuda.renderer.SH_eval.apply(
 70 |                 active_sh_degree,
 71 |                 idx,
 72 |                 self.vox_center,
 73 |                 cam_pos,
 74 |                 viewdir, # Ignore above two when viewdir is not None
 75 |                 self.sh0,
 76 |                 self.shs,
 77 |             )
 78 |         elif color_mode == "rand":
 79 |             rgbs = torch.rand([self.num_voxels, 3], dtype=torch.float32, device="cuda")
 80 |         elif color_mode == "dontcare":
 81 |             rgbs = torch.empty([self.num_voxels, 3], dtype=torch.float32, device="cuda")
 82 |         else:
 83 |             raise NotImplementedError
 84 | 
 85 |         # Pack everything
 86 |         vox_params = {
 87 |             'geos': geos,
 88 |             'rgbs': rgbs,
 89 |             'subdiv_p': self._subdiv_p, # Dummy param to record subdivision priority
 90 |         }
 91 |         if vox_params['subdiv_p'] is None:
 92 |             vox_params['subdiv_p'] = torch.ones([self.num_voxels, 1], device="cuda")
 93 | 
 94 |         return vox_params
 95 | 
 96 |     def render(
 97 |             self,
 98 |             camera,
 99 |             color_mode=None,
100 |             track_max_w=False,
101 |             ss=None,
102 |             output_depth=False,
103 |             output_normal=False,
104 |             output_T=False,
105 |             rand_bg=False,
106 |             use_auto_exposure=False,
107 |             **other_opt):
108 | 
109 |         ###################################
110 |         # Pre-processing
111 |         ###################################
112 |         if ss is None:
113 |             ss = self.ss
114 |         w_src, h_src = camera.image_width, camera.image_height
115 |         w, h = round(w_src * ss), round(h_src * ss)
116 |         w_ss, h_ss = w / w_src, h / h_src
117 |         if ss != 1.0 and 'gt_color' in other_opt:
118 |             other_opt['gt_color'] = resize_rendering(other_opt['gt_color'], size=(h, w))
119 | 
120 |         n_samp_per_vox = other_opt.pop('n_samp_per_vox', self.n_samp_per_vox)
121 | 
122 |         ###################################
123 |         # Call low-level rasterization API
124 |         ###################################
125 |         raster_settings = svraster_cuda.renderer.RasterSettings(
126 |             color_mode=color_mode,
127 |             n_samp_per_vox=n_samp_per_vox,
128 |             image_width=w,
129 |             image_height=h,
130 |             tanfovx=camera.tanfovx,
131 |             tanfovy=camera.tanfovy,
132 |             cx=camera.cx * w_ss,
133 |             cy=camera.cy * h_ss,
134 |             w2c_matrix=camera.w2c,
135 |             c2w_matrix=camera.c2w,
136 |             bg_color=float(self.white_background),
137 |             near=camera.near,
138 |             need_depth=output_depth,
139 |             need_normal=output_normal,
140 |             track_max_w=track_max_w,
141 |             **other_opt)
142 |         color, depth, normal, T, max_w = svraster_cuda.renderer.rasterize_voxels(
143 |             raster_settings,
144 |             self.octpath,
145 |             self.vox_center,
146 |             self.vox_size,
147 |             self.vox_fn)
148 | 
149 |         ###################################
150 |         # Post-processing and pack output
151 |         ###################################
152 |         if rand_bg:
153 |             color = color + T * torch.rand_like(color, requires_grad=False)
154 |         elif not self.white_background and not self.black_background:
155 |             color = color + T * color.mean((1,2), keepdim=True)
156 | 
157 |         if use_auto_exposure:
158 |             color = camera.auto_exposure_apply(color)
159 | 
160 |         render_pkg = {
161 |             'color': color,
162 |             'depth': depth if output_depth else None,
163 |             'normal': normal if output_normal else None,
164 |             'T': T if output_T else None,
165 |             'max_w': max_w,
166 |         }
167 | 
168 |         for k in ['color', 'depth', 'normal', 'T']:
169 |             render_pkg[f'raw_{k}'] = render_pkg[k]
170 | 
171 |             # Post process super-sampling
172 |             if render_pkg[k] is not None and render_pkg[k].shape[-2:] != (h_src, w_src):
173 |                 render_pkg[k] = resize_rendering(render_pkg[k], size=(h_src, w_src))
174 | 
175 |         # Clip intensity
176 |         render_pkg['color'] = render_pkg['color'].clamp(0, 1)
177 | 
178 |         return render_pkg
179 | 


--------------------------------------------------------------------------------
/src/sparse_voxel_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | from src.sparse_voxel_gears.constructor import SVConstructor
10 | from src.sparse_voxel_gears.properties import SVProperties
11 | from src.sparse_voxel_gears.renderer import SVRenderer
12 | from src.sparse_voxel_gears.adaptive import SVAdaptive
13 | from src.sparse_voxel_gears.io import SVInOut
14 | from src.sparse_voxel_gears.pooling import SVPooling
15 | 
16 | 
17 | class SparseVoxelModel(SVConstructor, SVProperties, SVRenderer, SVAdaptive, SVInOut, SVPooling):
18 | 
19 |     def __init__(self,
20 |                  n_samp_per_vox=1,       # Number of sampled points per visited voxel
21 |                  sh_degree=3,            # Use 3 * (k+1)^2 params per voxels for view-dependent colors
22 |                  ss=1.5,                 # Super-sampling rates for anti-aliasing
23 |                  white_background=False, # Assum white background
24 |                  black_background=False, # Assum black background
25 |                  ):
26 |         '''
27 |         Setup of the model meta. At this point, no voxel is allocated.
28 |         Use the following methods to allocate voxels and parameters.
29 | 
30 |         1. `model_load` defined in `src/sparse_voxel_gears/io.py`.
31 |            Load the saved models from a given path.
32 | 
33 |         2. `model_init` defined in `src/sparse_voxel_gears/constructor.py`.
34 |            Heuristically initial the sparse grid layout and parameters from the training datas.
35 |         '''
36 |         super().__init__()
37 | 
38 |         self.n_samp_per_vox = n_samp_per_vox
39 |         self.max_sh_degree = sh_degree
40 |         self.ss = ss
41 |         self.white_background = white_background
42 |         self.black_background = black_background
43 | 
44 |         # List the variable names
45 |         self.per_voxel_attr_lst = [
46 |             'octpath', 'octlevel',
47 |             '_subdiv_p',
48 |         ]
49 |         self.per_voxel_param_lst = [
50 |             '_sh0', '_shs',
51 |         ]
52 |         self.grid_pts_param_lst = [
53 |             '_geo_grid_pts',
54 |         ]
55 | 
56 |         # To be init from model_init
57 |         self.scene_center = None
58 |         self.scene_extent = None
59 |         self.inside_extent = None
60 |         self.octpath = None
61 |         self.octlevel = None
62 |         self.active_sh_degree = sh_degree
63 | 
64 |         self._geo_grid_pts = None
65 |         self._sh0 = None
66 |         self._shs = None
67 |         self._subdiv_p = None
68 | 


--------------------------------------------------------------------------------
/src/utils/activation_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import torch
10 | from svraster_cuda.meta import STEP_SZ_SCALE
11 | 
12 | def softplus(x):
13 |     return torch.nn.functional.softplus(x)
14 | 
15 | def exp_linear_10(x):
16 |     return torch.where(x > 1, x, torch.exp(x - 1))
17 | 
18 | def exp_linear_11(x):
19 |     return torch.where(x > 1.1, x, torch.exp(0.909090909091 * x - 0.904689820196))
20 | 
21 | def exp_linear_20(x):
22 |     return torch.where(x > 2.0, x, torch.exp(0.5 * x - 0.30685281944))
23 | 
24 | def softplus_inverse(y):
25 |     return y + torch.log(-torch.expm1(-y))
26 | 
27 | def exp_linear_10_inverse(y):
28 |     return torch.where(y > 1, y, torch.log(y) + 1)
29 | 
30 | def exp_linear_11_inverse(y):
31 |     return torch.where(y > 1.1, y, (torch.log(y) + 0.904689820196) / 0.909090909091)
32 | 
33 | def exp_linear_20_inverse(x):
34 |     return torch.where(y > 2.0, y, (torch.log(y) + 0.30685281944) / 0.5)
35 | 
36 | def smooth_clamp_max(x, max_val):
37 |     return max_val - torch.nn.functional.softplus(max_val - x)
38 | 
39 | def density2alpha(density, interval):
40 |     return 1 - torch.exp(-STEP_SZ_SCALE * interval * density)
41 | 
42 | def alpha2density(alpha, interval):
43 |     return torch.log(1 - alpha) / (-STEP_SZ_SCALE * interval)
44 | 
45 | def rgb2shzero(x):
46 |     return (x - 0.5) / 0.28209479177387814
47 | 
48 | def shzero2rgb(x):
49 |     return x * 0.28209479177387814 + 0.5
50 | 


--------------------------------------------------------------------------------
/src/utils/bounding_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import numpy as np
 10 | 
 11 | 
 12 | def decide_main_bounding(bound_mode="default",
 13 |                          forward_dist_scale=1.0,  # For "forward" mode
 14 |                          pcd_density_rate=0.1,    # For "pcd" mode
 15 |                          bound_scale=1.0,         # Scaling of the bounding
 16 |                          tr_cams=None,            # Cameras
 17 |                          pcd=None,                # Point cloud
 18 |                          suggested_bounding=None):
 19 |     if bound_mode == "default" and suggested_bounding is not None:
 20 |         print("Use suggested bounding")
 21 |         center = suggested_bounding.mean(0)
 22 |         radius = (suggested_bounding[1] - suggested_bounding[0]) * 0.5
 23 |     elif bound_mode in ["camera_max", "camera_median"]:
 24 |         center, radius = main_scene_bound_camera_heuristic(
 25 |             cams=tr_cams, bound_mode=bound_mode)
 26 |     elif bound_mode == "forward":
 27 |         center, radius = main_scene_bound_forward_heuristic(
 28 |             cams=tr_cams, forward_dist_scale=forward_dist_scale)
 29 |     elif bound_mode == "pcd":
 30 |         center, radius = main_scene_bound_pcd_heuristic(
 31 |             pcd=pcd, pcd_density_rate=pcd_density_rate)
 32 |     elif bound_mode == "default":
 33 |         cam_lookats = np.stack([cam.lookat.tolist() for cam in tr_cams])
 34 |         lookat_dots = (cam_lookats[:,None] * cam_lookats).sum(-1)
 35 |         is_forward_facing = lookat_dots.min() > 0
 36 | 
 37 |         if is_forward_facing:
 38 |             center, radius = main_scene_bound_forward_heuristic(
 39 |                 cams=tr_cams, forward_dist_scale=forward_dist_scale)
 40 |         else:
 41 |             center, radius = main_scene_bound_camera_heuristic(
 42 |                 cams=tr_cams, bound_mode="camera_median")
 43 |     else:
 44 |         raise NotImplementedError
 45 | 
 46 |     radius = radius * bound_scale
 47 | 
 48 |     bounding = np.array([
 49 |         center - radius,
 50 |         center + radius,
 51 |     ], dtype=np.float32)
 52 |     return bounding
 53 | 
 54 | 
 55 | def main_scene_bound_camera_heuristic(cams, bound_mode):
 56 |     print("Heuristic bounding:", bound_mode)
 57 |     cam_positions = np.stack([cam.position.tolist() for cam in cams])
 58 |     center = cam_positions.mean(0)
 59 |     dists = np.linalg.norm(cam_positions - center, axis=1)
 60 |     if bound_mode == "camera_max":
 61 |         radius = np.max(dists)
 62 |     elif bound_mode == "camera_median":
 63 |         radius = np.median(dists)
 64 |     else:
 65 |         raise NotImplementedError
 66 |     return center, radius
 67 | 
 68 | 
 69 | def main_scene_bound_forward_heuristic(cams, forward_dist_scale):
 70 |     print("Heuristic bounding: forward")
 71 |     positions = np.stack([cam.position.tolist() for cam in cams])
 72 |     cam_center = positions.mean(0)
 73 |     cam_lookat = np.stack([cam.lookat.tolist() for cam in cams]).mean(0)
 74 |     cam_lookat /= np.linalg.norm(cam_lookat)
 75 |     cam_extent = 2 * np.linalg.norm(positions - cam_center, axis=1).max()
 76 | 
 77 |     center = cam_center + forward_dist_scale * cam_extent * cam_lookat
 78 |     radius = 0.8 * forward_dist_scale * cam_extent
 79 | 
 80 |     return center, radius
 81 | 
 82 | 
 83 | def main_scene_bound_pcd_heuristic(pcd, pcd_density_rate):
 84 |     print("Heuristic bounding: pcd")
 85 |     center = np.median(pcd.points, axis=0)
 86 |     dist = np.abs(pcd.points - center).max(axis=1)
 87 |     dist = np.sort(dist)
 88 |     density = (1 + np.arange(len(dist))) * (dist > 0) / ((2 * dist) ** 3 + 1e-6)
 89 | 
 90 |     # Should cover at least 5% of the point
 91 |     begin_idx = round(len(density) * 0.05)
 92 | 
 93 |     # Find the radius with maximum point density
 94 |     max_idx = begin_idx + density[begin_idx:].argmax()
 95 | 
 96 |     # Find the smallest radius with point density equal to pcd_density_rate of maximum
 97 |     target_density = pcd_density_rate * density[max_idx]
 98 |     target_idx = max_idx + np.where(density[max_idx:] < target_density)[0][0]
 99 | 
100 |     radius = dist[target_idx]
101 | 
102 |     return center, radius
103 | 


--------------------------------------------------------------------------------
/src/utils/camera_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import numpy as np
10 | from scipy.interpolate import make_interp_spline
11 | 
12 | 
13 | def fov2focal(fov, pixels):
14 |     return pixels / (2 * np.tan(0.5 * fov))
15 | 
16 | def focal2fov(focal, pixels):
17 |     return 2 * np.arctan(pixels / (2 * focal))
18 | 
19 | 
20 | def interpolate_poses(poses, n_frame, periodic=True):
21 | 
22 |     assert len(poses) > 1
23 | 
24 |     poses = list(poses)
25 |     bc_type = None
26 | 
27 |     if periodic:
28 |         poses.append(poses[0])
29 |         bc_type = "periodic"
30 | 
31 |     pos_lst = np.stack([pose[:3, 3] for pose in poses])
32 |     lookat_lst = np.stack([pose[:3, 2] for pose in poses])
33 |     right_lst = np.stack([pose[:3, 0] for pose in poses])
34 | 
35 |     ts = np.linspace(0, 1, len(poses))
36 |     pos_interp_f = make_interp_spline(ts, pos_lst, bc_type=bc_type)
37 |     lookat_interp_f = make_interp_spline(ts, lookat_lst, bc_type=bc_type)
38 |     right_interp_f = make_interp_spline(ts, right_lst, bc_type=bc_type)
39 | 
40 |     samps = np.linspace(0, 1, n_frame+1)[:n_frame]
41 |     pos_video = pos_interp_f(samps)
42 |     lookat_video = lookat_interp_f(samps)
43 |     right_video = right_interp_f(samps)
44 |     interp_poses = []
45 |     for i in range(n_frame):
46 |         pos = pos_video[i]
47 |         lookat = lookat_video[i] / np.linalg.norm(lookat_video[i])
48 |         right_ = right_video[i] / np.linalg.norm(right_video[i])
49 |         down = np.cross(lookat, right_)
50 |         right = np.cross(down, lookat)
51 |         c2w = np.eye(4, dtype=np.float32)
52 |         c2w[:3, 0] = right
53 |         c2w[:3, 1] = down
54 |         c2w[:3, 2] = lookat
55 |         c2w[:3, 3] = pos
56 |         interp_poses.append(c2w)
57 | 
58 |     return interp_poses
59 | 
60 | 
61 | def gen_circular_poses(radius,
62 |                        n_frame,
63 |                        starting=1.5 * np.pi, # Starting from -z
64 |                        ):
65 |     poses = []
66 |     for rad in np.linspace(starting, starting + 2 * np.pi, n_frame):
67 |         pos = radius * np.array([np.cos(rad), 0, np.sin(rad)])
68 |         lookat = -pos / np.linalg.norm(pos)
69 |         down = np.array([0, 1, 0])
70 |         right = np.cross(down, lookat)
71 |         right = right / np.linalg.norm(right)
72 |         down = np.cross(lookat, right)
73 |         c2w = np.eye(4, dtype=np.float32)
74 |         c2w[:3, 0] = right
75 |         c2w[:3, 1] = down
76 |         c2w[:3, 2] = lookat
77 |         c2w[:3, 3] = pos
78 |         poses.append(c2w)
79 |     return poses
80 | 


--------------------------------------------------------------------------------
/src/utils/colmap_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import pycolmap
10 | import numpy as np
11 | 
12 | from typing import NamedTuple
13 | 
14 | 
15 | class PointCloud(NamedTuple):
16 |     points: np.array
17 |     colors: np.array
18 |     errors: np.array
19 |     corr: dict
20 | 
21 | 
22 | def parse_colmap_pts(sfm: pycolmap.Reconstruction, transform: np.array =None):
23 |     """
24 |     Parse COLMAP points and correspondents.
25 | 
26 |     Input:
27 |         @sfm        Reconstruction from COLMAP.
28 |         @transform  3x3 matrix to transform xyz.
29 |     Output:
30 |         @xyz        Nx3 point positions.
31 |         @rgb        Nx3 point colors.
32 |         @err        N   errors.
33 |         @corr       Dictionary from file name to point indices.
34 |     """
35 | 
36 |     xyz = []
37 |     rgb = []
38 |     err = []
39 |     points_id = []
40 |     for k, v in sfm.points3D.items():
41 |         points_id.append(k)
42 |         xyz.append(v.xyz)
43 |         rgb.append(v.color)
44 |         err.append(v.error)
45 |         if transform is not None:
46 |             xyz[-1] = transform @ xyz[-1]
47 | 
48 |     xyz = np.array(xyz)
49 |     rgb = np.array(rgb)
50 |     err = np.array(err)
51 |     points_id = np.array(points_id)
52 | 
53 |     points_idmap = np.full([points_id.max()+2], -1, dtype=np.int64)
54 |     points_idmap[points_id] = np.arange(len(xyz))
55 | 
56 |     corr = {}
57 |     for image in sfm.images.values():
58 |         idx = np.array([p.point3D_id for p in image.points2D if p.has_point3D()])
59 |         corr[image.name] = points_idmap[idx]
60 |         assert corr[image.name].min() >= 0 and corr[image.name].max() < len(xyz)
61 | 
62 |     return PointCloud(points=xyz, colors=rgb, errors=err, corr=corr)
63 | 


--------------------------------------------------------------------------------
/src/utils/fuser_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | '''
 10 | Reference: KinectFusion algorithm.
 11 | '''
 12 | 
 13 | import numpy as np
 14 | 
 15 | import torch
 16 | 
 17 | 
 18 | class Fuser:
 19 |     def __init__(self,
 20 |             xyz,
 21 |             bandwidth,
 22 |             use_trunc=True,
 23 |             fuse_tsdf=True,
 24 |             feat_dim=0,
 25 |             alpha_thres=0.5,
 26 |             crop_border=0.0,
 27 |             normal_weight=False,
 28 |             depth_weight=False,
 29 |             border_weight=False,
 30 |             max_norm_dist=10.,
 31 |             use_half=False):
 32 |         assert len(xyz.shape) == 2
 33 |         assert xyz.shape[1] == 3
 34 |         self.xyz = xyz
 35 |         self.bandwidth = bandwidth
 36 |         self.use_trunc = use_trunc
 37 |         self.fuse_tsdf = fuse_tsdf
 38 |         self.feat_dim = feat_dim
 39 |         self.alpha_thres = alpha_thres
 40 |         self.crop_border = crop_border
 41 |         self.normal_weight = normal_weight
 42 |         self.depth_weight = depth_weight
 43 |         self.border_weight = border_weight
 44 |         self.max_norm_dist = max_norm_dist
 45 | 
 46 |         self.dtype = torch.float16 if use_half else torch.float32
 47 |         self.weight = torch.zeros([len(xyz), 1], dtype=self.dtype, device="cuda")
 48 |         self.feat = torch.zeros([len(xyz), feat_dim], dtype=self.dtype, device="cuda")
 49 |         if self.fuse_tsdf:
 50 |             self.sd_val = torch.zeros([len(xyz), 1], dtype=self.dtype, device="cuda")
 51 |         else:
 52 |             self.sd_val = None
 53 | 
 54 |     def integrate(self, cam, depth, feat=None, alpha=None):
 55 |         # Project grid points to image
 56 |         xyz_uv = cam.project(self.xyz)
 57 |         
 58 |         # Filter points projected outside
 59 |         filter_idx = torch.where((xyz_uv.abs() <= 1-self.crop_border).all(-1))[0]
 60 |         valid_idx = filter_idx
 61 |         valid_xyz = self.xyz[filter_idx]
 62 |         valid_uv = xyz_uv[filter_idx]
 63 |         
 64 |         # Compute projective sdf
 65 |         valid_frame_depth = torch.nn.functional.grid_sample(
 66 |             depth.view(1,1,*depth.shape[-2:]),
 67 |             valid_uv.view(1,1,-1,2),
 68 |             mode='bilinear',
 69 |             align_corners=False).flatten()
 70 |         valid_xyz_depth = (valid_xyz - cam.position) @ cam.lookat
 71 |         valid_sdf = valid_frame_depth - valid_xyz_depth
 72 | 
 73 |         if torch.is_tensor(self.bandwidth):
 74 |             bandwidth = self.bandwidth[valid_idx]
 75 |         else:
 76 |             bandwidth = self.bandwidth
 77 | 
 78 |         valid_sdf *= (1 / bandwidth)
 79 | 
 80 |         if self.use_trunc:
 81 |             # Filter occluded
 82 |             filter_idx = torch.where(valid_sdf >= -1)[0]
 83 |             valid_idx = valid_idx[filter_idx]
 84 |             valid_uv = valid_uv[filter_idx]
 85 |             valid_frame_depth = valid_frame_depth[filter_idx]
 86 |             valid_sdf = valid_sdf[filter_idx]
 87 |             valid_sdf = valid_sdf.clamp_(-1, 1)
 88 | 
 89 |             # Init weighting
 90 |             w = torch.ones_like(valid_frame_depth)
 91 |         else:
 92 |             norm_dist = valid_sdf.abs()
 93 |             w = torch.exp(-norm_dist.clamp_max(self.max_norm_dist))
 94 | 
 95 |         # Alpha filtering
 96 |         if alpha is not None:
 97 |             valid_alpha = torch.nn.functional.grid_sample(
 98 |                 alpha.view(1,1,*alpha.shape[-2:]),
 99 |                 valid_uv.view(1,1,-1,2),
100 |                 mode='bilinear',
101 |                 align_corners=False).flatten()
102 |             w *= valid_alpha
103 | 
104 |             filter_idx = torch.where(valid_alpha >= self.alpha_thres)[0]
105 |             valid_idx = valid_idx[filter_idx]
106 |             valid_uv = valid_uv[filter_idx]
107 |             valid_frame_depth = valid_frame_depth[filter_idx]
108 |             valid_sdf = valid_sdf[filter_idx]
109 |             w = w[filter_idx]
110 | 
111 |         # Compute geometric weighting
112 |         if self.depth_weight:
113 |             w *= 1 / valid_frame_depth.clamp_min(0.1)
114 | 
115 |         if self.normal_weight:
116 |             normal = cam.depth2normal(depth)
117 |             rd = torch.nn.functional.normalize(cam.depth2pts(depth) - cam.position.view(3,1,1), dim=0)
118 |             cos_theta = (normal * rd).sum(0).clamp_min(0)
119 |             valid_cos_theta = torch.nn.functional.grid_sample(
120 |                 cos_theta.view(1,1,*cos_theta.shape[-2:]),
121 |                 valid_uv.view(1,1,-1,2),
122 |                 mode='bilinear',
123 |                 align_corners=False).flatten()
124 |             w *= valid_cos_theta
125 | 
126 |         if self.border_weight:
127 |             # The image center get 1.0; corners get 0.1
128 |             w *= 1 / (1 + 9/np.sqrt(2) * valid_uv.square().sum(1).sqrt())
129 |         
130 |         # Reshape integration weight
131 |         w = w.unsqueeze(-1).to(self.dtype)
132 | 
133 |         # Integrate weight
134 |         self.weight[valid_idx] += w
135 | 
136 |         # Integrate tsdf
137 |         if self.fuse_tsdf:
138 |             valid_sdf = valid_sdf.unsqueeze(-1).to(self.dtype)
139 |             self.sd_val[valid_idx] += w * valid_sdf
140 | 
141 |         # Sample feature
142 |         if self.feat_dim > 0:
143 |             valid_feat = torch.nn.functional.grid_sample(
144 |                 feat.view(1,self.feat_dim,*feat.shape[-2:]).to(self.dtype),
145 |                 valid_uv.view(1,1,-1,2).to(self.dtype),
146 |                 mode='bilinear',
147 |                 align_corners=False)[0,:,0].T
148 |             self.feat[valid_idx] += w * valid_feat
149 | 
150 |     @property
151 |     def feature(self):
152 |         return self.feat / self.weight
153 | 
154 |     @property
155 |     def tsdf(self):
156 |         return self.sd_val / self.weight
157 | 
158 | 
159 | @torch.no_grad()
160 | def rgb_fusion(voxel_model, cameras):
161 | 
162 |     from .octree_utils import level_2_vox_size
163 | 
164 |     # Define volume integrator
165 |     finest_vox_size = level_2_vox_size(voxel_model.scene_extent, voxel_model.octlevel.max()).item()
166 |     feat_volume = Fuser(
167 |         xyz=voxel_model.vox_center,
168 |         bandwidth=10 * finest_vox_size,
169 |         use_trunc=False,
170 |         fuse_tsdf=False,
171 |         feat_dim=3,
172 |         crop_border=0.,
173 |         normal_weight=False,
174 |         depth_weight=False,
175 |         border_weight=False,
176 |         use_half=True)
177 | 
178 |     # Run semantic maps fusion
179 |     for cam in cameras:
180 |         render_pkg = voxel_model.render(cam, color_mode="dontcare", output_depth=True)
181 |         depth = render_pkg['depth'][2]
182 |         feat_volume.integrate(cam=cam, feat=cam.image.cuda(), depth=depth)
183 | 
184 |     return feat_volume.feature.nan_to_num_(0.5).float()
185 | 


--------------------------------------------------------------------------------
/src/utils/image_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | 
10 | import os
11 | import cv2
12 | import torch
13 | import numpy as np
14 | 
15 | 
16 | def im_tensor2np(x):
17 |     if x.shape[0] == 1:
18 |         x = x.squeeze(0)
19 |     if len(x.shape) == 3:
20 |         x = x.moveaxis(0, -1)
21 |     return x.clamp(0, 1).mul(255).cpu().numpy().astype(np.uint8)
22 | 
23 | def im_pil2tensor(x):
24 |     return torch.from_numpy(np.array(x).astype(np.float32)).moveaxis(-1, 0) / 255
25 | 
26 | def viz_tensordepth_histeq(x, alpha_mass=None):
27 |     '''
28 |     Use histogram equalization for better depth visulization.
29 |     By doing so, each color scale will have similar amout of pixels.
30 |     The depth order is maintained but the scale do not reflect any actual distance.
31 |     '''
32 |     if alpha_mass is not None:
33 |         m = (alpha_mass>0.01) & (x>0)
34 |     else:
35 |         m = (x>0)
36 | 
37 |     x = x.cpu().numpy()
38 |     m = m.cpu().numpy()
39 |     n_valid = m.sum()
40 |     if alpha_mass is not None:
41 |         mass = alpha_mass.cpu().numpy()[m]
42 |     else:
43 |         mass = np.ones([n_valid])
44 |     order = np.argsort(x[m])
45 |     cdf = np.cumsum(mass[order]) / mass.sum()
46 |     hist = np.empty([n_valid])
47 |     hist[order] = 1 + 254 * (cdf ** 2)
48 |     x[~m] = 0
49 |     x[m] = np.clip(hist, 1, 255)
50 |     viz = cv2.applyColorMap(x.astype(np.uint8), cv2.COLORMAP_VIRIDIS)
51 |     viz = cv2.cvtColor(viz, cv2.COLOR_BGR2RGB)
52 |     viz[~m] = 0
53 |     return viz
54 | 
55 | def viz_tensordepth_log(x, alpha_mass=None):
56 |     if alpha_mass is not None:
57 |         m = (alpha_mass>0.01) & (x>0)
58 |     else:
59 |         m = (x>0)
60 | 
61 |     x = x.cpu().numpy()
62 |     m = m.cpu().numpy()
63 |     dmin, dmax = np.quantile(x[m], q=[0.03, 0.97])
64 |     x = np.log(np.clip(1 + x - dmin, 1, 1e9))
65 |     x = x / np.log(1 + dmax - dmin)
66 |     x = np.clip(x, 0, 1) * 255
67 |     viz = cv2.applyColorMap(x.astype(np.uint8), cv2.COLORMAP_VIRIDIS)
68 |     viz = cv2.cvtColor(viz, cv2.COLOR_BGR2RGB)
69 |     viz[~m] = 0
70 |     return viz
71 | 
72 | 
73 | def viz_tensordepth(x, alpha_mass=None, mode='log'):
74 |     if mode == 'histeq':
75 |         return viz_tensordepth_histeq(x, alpha_mass)
76 |     elif mode == 'log':
77 |         return viz_tensordepth_log(x)
78 |     raise NotImplementedError
79 | 
80 | def resize_rendering(render, size, mode='bilinear', align_corners=False):
81 |     return torch.nn.functional.interpolate(
82 |         render[None], size=size, mode=mode, align_corners=align_corners, antialias=True)[0]
83 | 


--------------------------------------------------------------------------------
/src/utils/mono_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
  2 | #
  3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  4 | # and proprietary rights in and to this software, related documentation
  5 | # and any modifications thereto.  Any use, reproduction, disclosure or
  6 | # distribution of this software and related documentation without an express
  7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  8 | 
  9 | import os
 10 | import sys
 11 | import cv2
 12 | import tqdm
 13 | import torch
 14 | import numpy as np
 15 | import imageio.v2 as iio
 16 | from PIL import Image
 17 | 
 18 | 
 19 | def depth_path(depth_root, cam):
 20 |     return os.path.join(depth_root, f"{cam.image_name}.png")
 21 | 
 22 | def codebook_path(depth_root, cam):
 23 |     return os.path.join(depth_root, f"{cam.image_name}.npy")
 24 | 
 25 | def gather_todo_list(depth_root, cameras, force_rerun=False):
 26 |     # Gather list of camera to estimate depth
 27 |     todo_indices = []
 28 |     for i, cam in enumerate(cameras):
 29 |         if not os.path.exists(depth_path(depth_root, cam)) or force_rerun:
 30 |             todo_indices.append(i)
 31 |     return todo_indices
 32 | 
 33 | def load_depth_to_camera(depth_root, cameras, depth_name):
 34 |     for cam in tqdm.tqdm(cameras):
 35 |         depth_np = iio.imread(depth_path(depth_root, cam))
 36 |         codebook = np.load(codebook_path(depth_root, cam))
 37 |         setattr(cam, depth_name, torch.tensor(codebook[depth_np]))
 38 | 
 39 | def save_quantize_depth(depth_root, cam, depth):
 40 |     # Quantize depth map to 16 bit
 41 |     codebook = depth.quantile(torch.linspace(0, 1, 65536).cuda(), interpolation='nearest')
 42 |     depth_idx = torch.searchsorted(codebook, depth, side='right').clamp_max_(65535)
 43 |     depth_idx[(depth - codebook[depth_idx-1]).abs() < (depth - codebook[depth_idx]).abs()] -= 1
 44 |     assert depth_idx.max() <= 65535
 45 |     assert depth_idx.min() >= 0
 46 | 
 47 |     # Save result
 48 |     depth_np = depth_idx.cpu().numpy().astype(np.uint16)
 49 |     iio.imwrite(depth_path(depth_root, cam), depth_np)
 50 |     np.save(codebook_path(depth_root, cam), codebook.cpu().numpy().astype(np.float32))
 51 | 
 52 | def resize_maxres_divisible(im, len, divisible):
 53 |     max_res = max(im.shape[-2:])
 54 |     target_size = (
 55 |         divisible * round(len * im.shape[-2] / max_res / divisible),
 56 |         divisible * round(len * im.shape[-1] / max_res / divisible))
 57 |     im = torch.nn.functional.interpolate(im, size=target_size, mode='bilinear', antialias=True)
 58 |     return im
 59 | 
 60 | 
 61 | @torch.no_grad()
 62 | def prepare_depthanythingv2(cameras, source_path, force_rerun=False):
 63 | 
 64 |     depth_root = os.path.join(source_path, "mono_priors", "depthanythingv2")
 65 |     os.makedirs(depth_root, exist_ok=True)
 66 | 
 67 |     todo_indices = gather_todo_list(depth_root, cameras, force_rerun=force_rerun)
 68 |     
 69 |     if len(todo_indices):
 70 |         print(f"Infer depth for {len(todo_indices)} images. Saved to {depth_root}.")
 71 | 
 72 |         # Load model
 73 |         from transformers import AutoImageProcessor, AutoModelForDepthEstimation
 74 |         image_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf")
 75 |         model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf").cuda()
 76 | 
 77 |     for i in tqdm.tqdm(todo_indices):
 78 |         cam = cameras[i]
 79 | 
 80 |         # Inference depth
 81 |         inputs = image_processor(images=cam.image, return_tensors="pt", do_rescale=False)
 82 |         inputs['pixel_values'] = inputs['pixel_values'].cuda()
 83 |         outputs = model(**inputs)
 84 |         depth = outputs['predicted_depth'].squeeze()
 85 | 
 86 |         # Save result
 87 |         save_quantize_depth(depth_root, cam, depth)
 88 | 
 89 |     # Load the estimated depth
 90 |     print("Load the estimated depths to cameras.")
 91 |     load_depth_to_camera(depth_root, cameras, 'depthanythingv2')
 92 | 
 93 | 
 94 | @torch.no_grad()
 95 | def _prepare_mast3r_metric_depth(cameras, depth_root, mast3r_repo_path):
 96 |     print(f"Infer depth for {len(cameras)} images. Saved to {depth_root}.")
 97 | 
 98 |     assert os.path.exists(mast3r_repo_path), "mast3r repo path: `{mast3r_repo_path}` not found"
 99 |     sys.path.insert(0, mast3r_repo_path)
100 | 
101 |     # Load model
102 |     from mast3r.model import AsymmetricMASt3R
103 |     from dust3r.inference import inference
104 |     from dust3r.cloud_opt.pair_viewer import PairViewer
105 | 
106 |     model = AsymmetricMASt3R.from_pretrained('naver/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric').cuda()
107 | 
108 |     depth_prev = None
109 | 
110 |     for i in tqdm.trange(len(cameras) - 1):
111 |         cam1 = cameras[i]
112 |         cam2 = cameras[i+1]
113 | 
114 |         # Inference depth
115 |         image_1 = resize_maxres_divisible(cam1.image[None].cuda(), len=512, divisible=16)
116 |         image_2 = resize_maxres_divisible(cam2.image[None].cuda(), len=512, divisible=16)
117 |         true_shape = np.array([image_1.shape[-2:]], dtype=np.int32)
118 | 
119 |         image_dicts = [
120 |             dict(img=image_1 * 2 - 1, true_shape=true_shape, idx=0, instance='0'),
121 |             dict(img=image_2 * 2 - 1, true_shape=true_shape, idx=1, instance='1')
122 |         ]
123 |         symmetric_pair = [(image_dicts[0], image_dicts[1]), (image_dicts[1], image_dicts[0])]
124 |         output = inference(symmetric_pair, model, 'cuda', batch_size=1, verbose=False)
125 | 
126 |         view1, pred1 = output['view1'], output['pred1']
127 |         view2, pred2 = output['view2'], output['pred2']
128 |         pair = PairViewer(view1, view2, pred1, pred2, verbose=False).cuda()
129 |         depth1, depth2 = pair.get_depthmaps()
130 |         pose1, pose2 = pair.get_im_poses()
131 | 
132 |         rescale = (cam1.position - cam2.position).norm() / (pose1[:3, 3] - pose2[:3, 3]).norm()
133 |         depth1 = (depth1 * rescale).detach().squeeze()
134 |         depth2 = (depth2 * rescale).detach().squeeze()
135 | 
136 |         # Average with previous result
137 |         if depth_prev is not None:
138 |             depth1 = (depth_prev + depth1) * 0.5
139 | 
140 |         depth_prev = depth2
141 | 
142 |         # Save result
143 |         save_quantize_depth(depth_root, cam1, depth1)
144 | 
145 |     # Save last frame
146 |     save_quantize_depth(depth_root, cam2, depth2)
147 | 
148 | 
149 | @torch.no_grad()
150 | def prepare_mast3r_metric_depth(cameras, source_path, mast3r_repo_path):
151 | 
152 |     depth_root = os.path.join(source_path, "mono_priors", "mast3r_metric_depth")
153 |     os.makedirs(depth_root, exist_ok=True)
154 | 
155 |     n_todo = len(gather_todo_list(depth_root, cameras, force_rerun=False))
156 |     if n_todo > 0:
157 |         _prepare_mast3r_metric_depth(cameras, depth_root, mast3r_repo_path)
158 | 
159 |     # Load the estimated depth
160 |     print("Load the estimated depths to cameras.")
161 |     load_depth_to_camera(depth_root, cameras, 'mast3r_metric_depth')
162 | 


--------------------------------------------------------------------------------
/src/utils/system_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | import random
10 | import numpy as np
11 | import torch
12 | 
13 | def seed_everything(seed):
14 |     random.seed(seed)
15 |     np.random.seed(seed)
16 |     torch.manual_seed(seed)
17 | 


--------------------------------------------------------------------------------