├── .gitignore ├── LICENSE ├── LICENSE-Plenoxels ├── README.md ├── assets ├── 2D_score.png ├── 3D_score.png └── teaser.png ├── cache └── .gitkeep ├── configs ├── co3d_v1.gin ├── co3d_v2.gin ├── rebuttal_quant │ ├── co3d_16bit.gin │ ├── co3d_2bit.gin │ ├── co3d_2bit_clip.gin │ ├── co3d_2bit_density.gin │ ├── co3d_4bit.gin │ ├── co3d_4bit_clip.gin │ ├── co3d_4bit_density.gin │ ├── co3d_8bit_clip.gin │ └── co3d_8bit_density.gin ├── rebuttal_reso │ ├── co3d_128.gin │ ├── co3d_256.gin │ ├── co3d_384.gin │ └── co3d_64.gin ├── rebuttal_sigma_weight │ ├── co3d_sigma_10.gin │ ├── co3d_sigma_100.gin │ ├── co3d_sigma_5.gin │ ├── co3d_weight_1.28.gin │ ├── co3d_weight_2.56.gin │ └── co3d_weight_none.gin ├── scannet.gin ├── scannet_depth.gin ├── scannet_tsdf.gin ├── scannet_tsdf_reso1024_pad3.gin ├── scannet_tsdf_reso256_pad1.gin ├── scannet_tsdf_reso256_pad2.gin ├── scannet_tsdf_reso512_pad1.gin └── scannet_tsdf_reso512_pad2.gin ├── data └── .gitkeep ├── dataloader ├── co3d_lists │ └── .gitkeep ├── data_util │ ├── co3d.py │ ├── common.py │ └── scannet.py ├── interface.py ├── litdata.py ├── random_pose.py ├── sampler.py └── spherical_poses.py ├── lib └── plenoxel │ ├── CMakeLists.txt │ ├── include │ ├── cubemap_util.cuh │ ├── cuda_util.cuh │ ├── data_spec.hpp │ ├── data_spec_packed.cuh │ ├── random_util.cuh │ ├── render_util.cuh │ └── util.hpp │ ├── loss_kernel.cu │ ├── misc_kernel.cu │ ├── optim_kernel.cu │ ├── render_lerp_kernel_cuvol.cu │ ├── render_lerp_kernel_nvol.cu │ ├── render_svox1_kernel.cu │ ├── svox2.cpp │ ├── svox2_kernel.cu │ └── version.py ├── model ├── interface.py └── plenoxel_torch │ ├── __global__.py │ ├── autograd.py │ ├── dataclass.py │ ├── model.py │ ├── sparse_grid.py │ └── utils.py ├── run.py ├── sbatch.sh ├── script ├── collage.sh ├── v1.sh └── v2.sh ├── setup.py └── utils ├── SensorData.py ├── download_perf.py ├── extract_scannet.py ├── extract_stats.py ├── logger.py ├── notebooks ├── co3d_lists.ipynb ├── co3d_v2.ipynb ├── collect_results.ipynb ├── find_missing_logs.ipynb ├── make_collage_list.ipynb ├── make_video.ipynb ├── masked_out.ipynb ├── v1_v2_score.ipynb └── visualize_voxel.ipynb ├── ray.py ├── select_option.py ├── store_util.py ├── tsdf.py └── tsdf_cuda.py /.gitignore: -------------------------------------------------------------------------------- 1 | *logs/* 2 | logs 3 | *wandb/* 4 | *vscode* 5 | *pycache* 6 | *swap-pane* 7 | *.idea* 8 | logs_collection 9 | *build* 10 | *.egg-info* 11 | *.eggs* 12 | *_debug* 13 | 14 | cache/* 15 | !cache/.gitkeep 16 | data/* 17 | !data/.gitkeep 18 | dataloader/co3d_lists/* 19 | !dataloader/co3d_lists/.gitkeep 20 | *.ipynb_checkpoints* 21 | 22 | playground/* 23 | render/* 24 | tsdf_results/ 25 | -------------------------------------------------------------------------------- /LICENSE-Plenoxels: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2021, the Plenoxels authors 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PeRFception - Perception using Radiance Fields 2 | 3 |
4 | teaser 5 |
6 | 7 | ### Quick Access 8 | 9 | [Project Page](https://postech-cvlab.github.io/PeRFception) | [Paper](https://arxiv.org/abs/2208.11537) | [Supplementary Materials](https://openreview.net/attachment?id=MzaPEKHv-0J&name=supplementary_material) 10 | ### Author Info 11 | 12 | - [Yoonwoo Jeong](https://yoonwooinfo.notion.site) [[Google Scholar](https://scholar.google.com/citations?user=HQ1PMggAAAAJ&hl=en)] 13 | - [Seungjoo Shin](https://seungjooshin.github.io/) [[Google Scholar](https://scholar.google.com/citations?user=io7PSDIAAAAJ&hl=en)] 14 | - [Junha Lee](https://junha-l.github.io/) [[Google Scholar](https://scholar.google.com/citations?user=RB7qMm4AAAAJ&hl=en)] 15 | - [Chris Choy](https://chrischoy.org) [[Google Scholar](https://scholar.google.com/citations?user=2u8G5ksAAAAJ&hl=en&oi=ao)] 16 | - [Animashree Anandkumar](http://tensorlab.cms.caltech.edu/users/anima/) [[Google Scholar](https://scholar.google.com/citations?user=bEcLezcAAAAJ&hl=en&oi=ao)] 17 | - [Minsu Cho](http://cvlab.postech.ac.kr/~mcho/) [[Google Scholar](https://scholar.google.com/citations?user=5TyoF5QAAAAJ&hl=en&oi=ao)] 18 | - [Jaesik Park](http://jaesik.info/) [[Google Scholar](https://scholar.google.com/citations?user=_3q6KBIAAAAJ&hl=en&oi=ao)] 19 | 20 | ### News 21 | - 22.10.27: Added HuggingFace repo for ScanNet. 22 | - 22.10.25: Moved PeRFception datasets from OneDrive to HuggingFace. 23 | - 22.08.04: The first version of PeRFception has been released. 24 | 25 |
26 | teaser 27 |
28 | 29 | ## Abstract 30 | 31 | The recent progress in implicit 3D representation, i.e., Neural Radiance Fields (NeRFs), has made accurate and photorealistic 3D reconstruction possible in a differentiable manner. This new representation can effectively convey the information of hundreds of high-resolution images in one compact format and allows photorealistic synthesis of novel views. In this work, using the variant of NeRF called Plenoxels, we create the first large-scale implicit representation datasets for perception tasks, called PeRFception, which consists of two parts that incorporate both object-centric and scene-centric scans for classification and segmentation. It shows a significant memory compression rate (96.4%) from the original dataset, while containing both 2D and 3D information in a unified form. We construct the classification and segmentation models that directly take as input this implicit format and also propose a novel augmentation technique to avoid overfitting on backgrounds of images. The code and data will be publicly available. 32 | 33 | ## Downloading PeRFception-Datastes [[CO3D-link1](https://huggingface.co/datasets/YWjimmy/PeRFception-v1-1)] [[CO3D-link2](https://huggingface.co/datasets/YWjimmy/PeRFception-v1-2)] [[CO3D-link3](https://huggingface.co/datasets/YWjimmy/PeRFception-v1-3)] [[ScanNet]( https://huggingface.co/datasets/YWjimmy/PeRFception-ScanNet)] 34 | 35 | ``` 36 | # Link1 - PeRFception-CO3D-v1 37 | git clone https://huggingface.co/datasets/YWjimmy/PeRFception-v1-1 38 | # Link2 - PeRFception-CO3D-v1 39 | git clone https://huggingface.co/datasets/YWjimmy/PeRFception-v1-2 40 | # Link3 - PeRFception-CO3D-v1 41 | git clone https://huggingface.co/datasets/YWjimmy/PeRFception-v1-3 42 | # Link1 - PeRFception-ScanNet 43 | git clone https://huggingface.co/datasets/YWjimmy/PeRFception-ScanNet 44 | ``` 45 | ### Downloading specific chunks 46 | ``` 47 | mkdir 48 | cd 49 | git init 50 | git remote add -f origin [link] 51 | git config core.sparseCheckout true 52 | echo "some/dir/" >> .git/info/sparse-checkout 53 | echo "another/sub/tree" >> .git/info/sparse-checkout 54 | git pull origin main 55 | 56 | # ex) If you want to download data only from 288_30460_58530 57 | echo "30/plenoxel_co3d_288_30460_58530" >> .git/info/sparse-checkout 58 | ``` 59 | ### PeRFception-CO3D 60 | 61 | |Dataset| # Scenes | # Frames | 3D Shape | Features | 3D-BKGD | Memory | Memoery(Rel) 62 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 63 | |CO3D| 18.6K | 1.5M | pcd | C | X | 1.44TB | $$\pm0.00\%$$ 64 | |PeRFception-CO3D| 18.6K | $$\infty$$ | voxel | SH + D | O | 1.33TB | $$-6.94\%$$ 65 | 66 | ### PeRFception-ScanNet 67 | 68 | |Dataset| # Scenes | # Frames | 3D Shape | Features | 3D-BKGD | Memory | Memoery(Rel) 69 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 70 | |ScanNet| 1.5K | 2.5M | pcd | C | X | 966GB | $$\pm0.00\%$$ 71 | |PeRFception-ScanNet| 1.5K | $$\infty$$ | voxel | SH + D | O | 35GB | $$-96.4\%$$ 72 | 73 | 74 | ## Get Ready (Installation) 75 | 76 | Our code is verified on Ubuntu 20.04 with a CUDA version 11.1. 77 | 78 | ``` 79 | conda create -n perfception -c anaconda python=3.8 -y 80 | conda activate perfception 81 | conda install pytorch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 cudatoolkit=11.1 -c pytorch -c conda-forge -y 82 | pip3 install imageio tqdm requests configargparse scikit-image imageio-ffmpeg piqa wandb pytorch_lightning==1.5.5 opencv-python gin-config gdown plyfile 83 | pip3 install . 84 | ``` 85 | 86 | 87 | ## Demo 88 | We provide a short demo for rendering a scene on CO3D or ScanNet. After installing the requirements, you could run the demo with the codes below: 89 | ``` 90 | # CO3D demo 91 | python3 -m run --ginc configs/co3d.gin 92 | # ScanNet demo 93 | python3 -m run --ginc configs/scannet.gin 94 | ``` 95 | 96 | ## Rendering CO3D and ScanNet 97 | We deliver the full code to reproduce the performance reported in the main paper. To run the code, you should first put the dataset on a proper location. 98 | ``` 99 | data 100 | | 101 | |--- co3d 102 | -- apple 103 | -- banana 104 | ... 105 | | 106 | |--- scannet 107 | -- scene000_00 108 | -- scene000_01 109 | ... 110 | ``` 111 | ScanNet-v2 can be downloaded in [here](http://www.scan-net.org/) and CO3D-v1 can be downloaded in [here](https://github.com/facebookresearch/co3d). Thanks to great functions in `wandb`, we could manage tremendous scripts. You can download the `sweep` file [here](https://1drv.ms/u/s!As9A9EbDsoWcj6toSOfdeWMaHhqF3Q?e=1INfNg). 112 | 113 | 114 | ## Downstream Tasks 115 | 116 | ### Codes for downstream tasks: https://github.com/POSTECH-CVLab/NeRF-Downstream 117 | 118 | ### 2D object classification (PeRFception-CO3D) 119 | 120 | We benchmark several 2D classification models on rendered PeRFception-CO3D. For faster reproducing, we also provide the rendered images from PeRFception-CO3D on the link [link](https://1drv.ms/u/s!AgY2evoYo6FggthVfVngtHinq3czqQ?e=crnTlu). Before running the code, be sure that you had put the downloaded dataset on `data/perfcepton_2d`. You can easily reproduce the scores using the scripts of `scripts/downstream/2d_cls/[model].sh`. Details for the training pipeline and models are elaborated in the main paper. 121 | 122 | The pretrained models can be reached with the links below: 123 |
124 | 2D score 125 |
126 | 127 | 128 | ### 3D object classification (PeRFception-CO3D) 129 | 130 | We also benchmark several 3D classification models on PeRFception-CO3D. We provide the full code on the link [](). You can downloa 131 | 132 |
133 | 3D score 134 |
135 | 136 | 137 | ### 3D semantic segmentation (PeRFception-ScanNet) 138 | In PeRFception-ScanNet, we have evaluated several 3D semantic segmentation models with depth-supervised labels. 139 | 140 | ## Plans for v2 141 | 142 | According to the official CO3D repository[[link](https://github.com/facebookresearch/co3d)], authors provided an improved version, v2, of CO3D, which would result in better rendering quality and more accurate geometries in our model. We are planning to extend this work to PeRFception-CO3D-v2 from the CO3D-v2. 143 | 144 | ## Citation 145 | ```bib 146 | @article{jeong2022perfception, 147 | title = {PeRFception: Perception using Radiance Fields}, 148 | author = {Jeong, Yoonwoo and Shin, Seungjoo and Lee, Junha and Choy, Chris and Anandkumar, Anima and Cho, Minsu and Park, Jaesik} 149 | year = {2022} 150 | } 151 | ``` 152 | 153 | ## Acknowledgement 154 | We appreciate for the reviewers for their constructive comments and suggestions. 155 | -------------------------------------------------------------------------------- /assets/2D_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/assets/2D_score.png -------------------------------------------------------------------------------- /assets/3D_score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/assets/3D_score.png -------------------------------------------------------------------------------- /assets/teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/assets/teaser.png -------------------------------------------------------------------------------- /cache/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/cache/.gitkeep -------------------------------------------------------------------------------- /configs/co3d_v1.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "v1" -------------------------------------------------------------------------------- /configs/co3d_v2.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d_v2" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | 41 | load_co3d_data.v2_mode = True 42 | run.postfix = "v2" -------------------------------------------------------------------------------- /configs/rebuttal_quant/co3d_16bit.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | LitPlenoxel.quant_bit = 16 28 | 29 | ResampleCallBack.upsamp_every = 25600 30 | 31 | run.max_steps = 76800 32 | run.save_last_only = True 33 | run.proj_name = "co3d" 34 | run.dataset_name = "co3d" 35 | run.datadir = "data/co3d" 36 | run.logbase = "logs" 37 | run.log_every_n_steps = 100 38 | run.model_name = "plenoxel" 39 | run.seed = 0 40 | run.progressbar_refresh_rate = 100 41 | run.postfix = "16bit" -------------------------------------------------------------------------------- /configs/rebuttal_quant/co3d_2bit.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | LitPlenoxel.quant_bit = 2 28 | 29 | ResampleCallBack.upsamp_every = 25600 30 | 31 | run.max_steps = 76800 32 | run.save_last_only = True 33 | run.proj_name = "co3d" 34 | run.dataset_name = "co3d" 35 | run.datadir = "data/co3d" 36 | run.logbase = "logs" 37 | run.log_every_n_steps = 100 38 | run.model_name = "plenoxel" 39 | run.seed = 0 40 | run.progressbar_refresh_rate = 100 41 | run.postfix = "2bit" -------------------------------------------------------------------------------- /configs/rebuttal_quant/co3d_2bit_clip.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | LitPlenoxel.quant_bit = 2 28 | LitPlenoxel.clip_quant = True 29 | 30 | ResampleCallBack.upsamp_every = 25600 31 | 32 | run.max_steps = 76800 33 | run.save_last_only = True 34 | run.proj_name = "co3d" 35 | run.dataset_name = "co3d" 36 | run.datadir = "data/co3d" 37 | run.logbase = "logs" 38 | run.log_every_n_steps = 100 39 | run.model_name = "plenoxel" 40 | run.seed = 0 41 | run.progressbar_refresh_rate = 100 42 | run.postfix = "clip_2bit" -------------------------------------------------------------------------------- /configs/rebuttal_quant/co3d_2bit_density.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | LitPlenoxel.quantize_density = True 28 | LitPlenoxel.quant_bit = 2 29 | 30 | ResampleCallBack.upsamp_every = 25600 31 | 32 | run.max_steps = 76800 33 | run.save_last_only = True 34 | run.proj_name = "co3d" 35 | run.dataset_name = "co3d" 36 | run.datadir = "data/co3d" 37 | run.logbase = "logs" 38 | run.log_every_n_steps = 100 39 | run.model_name = "plenoxel" 40 | run.seed = 0 41 | run.progressbar_refresh_rate = 100 42 | run.postfix = "density_2bit" -------------------------------------------------------------------------------- /configs/rebuttal_quant/co3d_4bit.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | LitPlenoxel.quant_bit = 4 28 | 29 | ResampleCallBack.upsamp_every = 25600 30 | 31 | run.max_steps = 76800 32 | run.save_last_only = True 33 | run.proj_name = "co3d" 34 | run.dataset_name = "co3d" 35 | run.datadir = "data/co3d" 36 | run.logbase = "logs" 37 | run.log_every_n_steps = 100 38 | run.model_name = "plenoxel" 39 | run.seed = 0 40 | run.progressbar_refresh_rate = 100 41 | run.postfix = "4bit" -------------------------------------------------------------------------------- /configs/rebuttal_quant/co3d_4bit_clip.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | LitPlenoxel.quant_bit = 4 28 | LitPlenoxel.clip_quant = True 29 | 30 | ResampleCallBack.upsamp_every = 25600 31 | 32 | run.max_steps = 76800 33 | run.save_last_only = True 34 | run.proj_name = "co3d" 35 | run.dataset_name = "co3d" 36 | run.datadir = "data/co3d" 37 | run.logbase = "logs" 38 | run.log_every_n_steps = 100 39 | run.model_name = "plenoxel" 40 | run.seed = 0 41 | run.progressbar_refresh_rate = 100 42 | run.postfix = "clip_4bit" -------------------------------------------------------------------------------- /configs/rebuttal_quant/co3d_4bit_density.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | LitPlenoxel.quantize_density = True 28 | LitPlenoxel.quant_bit = 4 29 | 30 | 31 | ResampleCallBack.upsamp_every = 25600 32 | 33 | run.max_steps = 76800 34 | run.save_last_only = True 35 | run.proj_name = "co3d" 36 | run.dataset_name = "co3d" 37 | run.datadir = "data/co3d" 38 | run.logbase = "logs" 39 | run.log_every_n_steps = 100 40 | run.model_name = "plenoxel" 41 | run.seed = 0 42 | run.progressbar_refresh_rate = 100 43 | run.postfix = "density_4bit" -------------------------------------------------------------------------------- /configs/rebuttal_quant/co3d_8bit_clip.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | LitPlenoxel.clip_quant = True 28 | 29 | ResampleCallBack.upsamp_every = 25600 30 | 31 | run.max_steps = 76800 32 | run.save_last_only = True 33 | run.proj_name = "co3d" 34 | run.dataset_name = "co3d" 35 | run.datadir = "data/co3d" 36 | run.logbase = "logs" 37 | run.log_every_n_steps = 100 38 | run.model_name = "plenoxel" 39 | run.seed = 0 40 | run.progressbar_refresh_rate = 100 41 | run.postfix = "clip_8bit" -------------------------------------------------------------------------------- /configs/rebuttal_quant/co3d_8bit_density.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | LitPlenoxel.quantize_density = True 28 | 29 | 30 | ResampleCallBack.upsamp_every = 25600 31 | 32 | run.max_steps = 76800 33 | run.save_last_only = True 34 | run.proj_name = "co3d" 35 | run.dataset_name = "co3d" 36 | run.datadir = "data/co3d" 37 | run.logbase = "logs" 38 | run.log_every_n_steps = 100 39 | run.model_name = "plenoxel" 40 | run.seed = 0 41 | run.progressbar_refresh_rate = 100 42 | run.postfix = "density_8bit" -------------------------------------------------------------------------------- /configs/rebuttal_reso/co3d_128.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[64, 64, 64], [128, 128, 128]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "128" -------------------------------------------------------------------------------- /configs/rebuttal_reso/co3d_256.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "256" -------------------------------------------------------------------------------- /configs/rebuttal_reso/co3d_384.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[96, 96, 96], [192, 192, 192], [384, 384, 384]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "384" -------------------------------------------------------------------------------- /configs/rebuttal_reso/co3d_64.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[32, 32, 32], [64, 64, 64]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 20.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "64" -------------------------------------------------------------------------------- /configs/rebuttal_sigma_weight/co3d_sigma_10.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 10.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "sigma10" -------------------------------------------------------------------------------- /configs/rebuttal_sigma_weight/co3d_sigma_100.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 100.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "sigma100" -------------------------------------------------------------------------------- /configs/rebuttal_sigma_weight/co3d_sigma_5.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.density_thresh = 5.0 15 | LitPlenoxel.thresh_type = "sigma" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "sigma5" -------------------------------------------------------------------------------- /configs/rebuttal_sigma_weight/co3d_weight_1.28.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.weight_thresh = 1.28 15 | LitPlenoxel.thresh_type = "weight" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "weight1.28" -------------------------------------------------------------------------------- /configs/rebuttal_sigma_weight/co3d_weight_2.56.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.weight_thresh = 2.56 15 | LitPlenoxel.thresh_type = "weight" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | LitPlenoxel.quantize = True 27 | 28 | ResampleCallBack.upsamp_every = 25600 29 | 30 | run.max_steps = 76800 31 | run.save_last_only = True 32 | run.proj_name = "co3d" 33 | run.dataset_name = "co3d" 34 | run.datadir = "data/co3d" 35 | run.logbase = "logs" 36 | run.log_every_n_steps = 100 37 | run.model_name = "plenoxel" 38 | run.seed = 0 39 | run.progressbar_refresh_rate = 100 40 | run.postfix = "weight2.56" -------------------------------------------------------------------------------- /configs/rebuttal_sigma_weight/co3d_weight_none.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]] 8 | LitPlenoxel.background_nlayers = 16 9 | LitPlenoxel.background_reso = 512 10 | LitPlenoxel.lr_sigma = 3.0e+1 11 | LitPlenoxel.lr_sh = 1.0e-2 12 | LitPlenoxel.lr_sigma_delay_steps = 0 13 | LitPlenoxel.lr_fg_begin_step = 1000 14 | LitPlenoxel.weight_thresh = -10000000 15 | LitPlenoxel.thresh_type = "weight" 16 | LitPlenoxel.near_clip = 0.35 17 | LitPlenoxel.lambda_tv = 5.0e-5 18 | LitPlenoxel.lambda_tv_sh = 5.0e-3 19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 21 | LitPlenoxel.lambda_beta = 1.0e-5 22 | LitPlenoxel.lambda_sparsity = 1.0e-10 23 | LitPlenoxel.background_brightness = 0.5 24 | LitPlenoxel.tv_early_only = 0 25 | LitPlenoxel.tv_decay = 0.5 26 | 27 | ResampleCallBack.upsamp_every = 25600 28 | 29 | run.max_steps = 76800 30 | run.save_last_only = True 31 | run.proj_name = "co3d" 32 | run.dataset_name = "co3d" 33 | run.datadir = "data/co3d" 34 | run.logbase = "logs" 35 | run.log_every_n_steps = 100 36 | run.model_name = "plenoxel" 37 | run.seed = 0 38 | run.progressbar_refresh_rate = 100 39 | run.postfix = "none" -------------------------------------------------------------------------------- /configs/scannet.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitDataScannet.frame_skip = 1 8 | LitDataScannet.max_frame = 1500 9 | LitDataScannet.max_image_dim = 640 10 | LitDataScannet.cam_scale_factor = 1.0 11 | LitDataScannet.blur_thresh = 10 12 | LitDataScannet.use_depth = True 13 | LitDataScannet.use_scans = True 14 | 15 | LitPlenoxel.reso = [[256, 256, 256], [256, 256, 256]] 16 | LitPlenoxel.background_nlayers = 0 17 | LitPlenoxel.background_reso = 512 18 | LitPlenoxel.lr_sigma = 3.0e+1 19 | LitPlenoxel.lr_sh = 1.0e-2 20 | LitPlenoxel.lr_sigma_delay_steps = 0 21 | LitPlenoxel.lr_fg_begin_step = 1000 22 | LitPlenoxel.density_thresh = 5.0 23 | LitPlenoxel.thresh_type = "sigma" 24 | LitPlenoxel.near_clip = 0.35 25 | LitPlenoxel.lambda_tv = 5.0e-5 26 | LitPlenoxel.lambda_tv_sh = 5.0e-3 27 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 28 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 29 | LitPlenoxel.lambda_beta = 1.0e-5 30 | LitPlenoxel.lambda_sparsity = 1.0e-10 31 | LitPlenoxel.background_brightness = 0.5 32 | LitPlenoxel.tv_early_only = 0 33 | LitPlenoxel.tv_decay = 0.5 34 | LitPlenoxel.quantize = True 35 | LitPlenoxel.init_grid_with_pcd=True 36 | 37 | ResampleCallBack.upsamp_every = 25600 38 | 39 | run.max_steps = 51200 40 | run.save_last_only = True 41 | run.proj_name = "scannet" 42 | run.dataset_name = "scannet" 43 | run.datadir = "data/scannet" 44 | run.logbase = "logs" 45 | run.log_every_n_steps = 100 46 | run.model_name = "plenoxel" 47 | run.seed = 0 48 | run.progressbar_refresh_rate = 100 49 | run.check_val_every_n_epoch = 999 # no validation 50 | run.run_render = False -------------------------------------------------------------------------------- /configs/scannet_depth.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitDataScannet.frame_skip = 1 8 | LitDataScannet.max_frame = 1500 9 | LitDataScannet.max_image_dim = 640 10 | LitDataScannet.cam_scale_factor = 1.0 11 | LitDataScannet.blur_thresh = 10 12 | LitDataScannet.use_depth = True 13 | LitDataScannet.use_scans = True 14 | LitDataScannet.pcd_name = 'tsdf_pcd_0.015.pcd' 15 | 16 | LitPlenoxel.reso = [[256, 256, 256]] 17 | LitPlenoxel.background_nlayers = 0 18 | LitPlenoxel.background_reso = 512 19 | LitPlenoxel.lr_sigma = 3.0e+1 20 | LitPlenoxel.lr_sh = 1.0e-2 21 | LitPlenoxel.lr_sigma_delay_steps = 0 22 | LitPlenoxel.lr_fg_begin_step = 1000 23 | LitPlenoxel.density_thresh = 5.0 24 | LitPlenoxel.thresh_type = "sigma" 25 | LitPlenoxel.near_clip = 0.35 26 | LitPlenoxel.lambda_tv = 5.0e-5 27 | LitPlenoxel.lambda_tv_sh = 5.0e-3 28 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 29 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 30 | LitPlenoxel.lambda_beta = 1.0e-5 31 | LitPlenoxel.lambda_sparsity = 1.0e-10 32 | LitPlenoxel.background_brightness = 0.5 33 | LitPlenoxel.tv_early_only = 0 34 | LitPlenoxel.tv_decay = 0.5 35 | LitPlenoxel.quantize = True 36 | 37 | ResampleCallBack.upsamp_every = 25600 38 | 39 | run.max_steps = 51200 40 | run.save_last_only = True 41 | run.proj_name = "scannet" 42 | run.dataset_name = "scannet" 43 | run.datadir = "data/scannet_extracted" 44 | run.logbase = "logs/depth_reso256_pad1" 45 | run.log_every_n_steps = 100 46 | run.model_name = "plenoxel" 47 | run.seed = 0 48 | run.progressbar_refresh_rate = 100 49 | run.check_val_every_n_epoch = 999 # no validation 50 | run.run_render = False 51 | -------------------------------------------------------------------------------- /configs/scannet_tsdf.gin: -------------------------------------------------------------------------------- 1 | LitData.batch_sampler = "all_images" 2 | LitData.epoch_size = 25600 3 | LitData.batch_size = 5000 4 | LitData.chunk = 10000 5 | LitData.use_pixel_centers = True 6 | 7 | LitDataScannet.frame_skip = 1 8 | LitDataScannet.max_frame = 1500 9 | LitDataScannet.max_image_dim = 640 10 | LitDataScannet.cam_scale_factor = 1.0 11 | LitDataScannet.blur_thresh = 10 12 | LitDataScannet.use_depth = False 13 | LitDataScannet.use_scans = True 14 | LitDataScannet.pcd_name = 'tsdf_pcd_512.npy' 15 | 16 | LitPlenoxel.reso = [[256, 256, 256]] 17 | LitPlenoxel.background_nlayers = 0 18 | LitPlenoxel.background_reso = 512 19 | LitPlenoxel.lr_sigma = 3.0e+1 20 | LitPlenoxel.lr_sh = 1.0e-2 21 | LitPlenoxel.lr_sigma_delay_steps = 0 22 | LitPlenoxel.lr_fg_begin_step = 1000 23 | LitPlenoxel.density_thresh = 5.0 24 | LitPlenoxel.thresh_type = "sigma" 25 | LitPlenoxel.near_clip = 0.35 26 | LitPlenoxel.lambda_tv = 5.0e-5 27 | LitPlenoxel.lambda_tv_sh = 5.0e-3 28 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 29 | LitPlenoxel.lambda_tv_background_color = 1.0e-3 30 | LitPlenoxel.lambda_beta = 1.0e-5 31 | LitPlenoxel.lambda_sparsity = 1.0e-10 32 | LitPlenoxel.background_brightness = 0.5 33 | LitPlenoxel.tv_early_only = 0 34 | LitPlenoxel.tv_decay = 0.5 35 | LitPlenoxel.quantize = True 36 | 37 | ResampleCallBack.upsamp_every = 25600 38 | 39 | run.max_steps = 51200 40 | run.save_last_only = True 41 | run.proj_name = "scannet" 42 | run.dataset_name = "scannet" 43 | run.datadir = "data/scannet_extracted" 44 | run.logbase = "logs" 45 | run.log_every_n_steps = 100 46 | run.model_name = "plenoxel" 47 | run.seed = 0 48 | run.progressbar_refresh_rate = 100 49 | run.check_val_every_n_epoch = 999 # no validation 50 | run.run_render = False -------------------------------------------------------------------------------- /configs/scannet_tsdf_reso1024_pad3.gin: -------------------------------------------------------------------------------- 1 | include "./configs/scannet_tsdf.gin" 2 | 3 | LitPlenoxel.reso = [[1024, 1024, 1024]] 4 | LitPlenoxel.upsample_stride = 3 5 | 6 | run.logbase = "logs/tsdf_reso1024_pad3" 7 | -------------------------------------------------------------------------------- /configs/scannet_tsdf_reso256_pad1.gin: -------------------------------------------------------------------------------- 1 | include "./configs/scannet_tsdf.gin" 2 | 3 | LitPlenoxel.reso = [[256, 256, 256]] 4 | LitPlenoxel.upsample_stride = 1 5 | 6 | run.logbase = "logs/tsdf_reso256_pad1" -------------------------------------------------------------------------------- /configs/scannet_tsdf_reso256_pad2.gin: -------------------------------------------------------------------------------- 1 | include "./configs/scannet_tsdf.gin" 2 | 3 | LitPlenoxel.reso = [[256, 256, 256]] 4 | LitPlenoxel.upsample_stride = 2 5 | 6 | run.logbase = "logs/tsdf_reso256_pad2" -------------------------------------------------------------------------------- /configs/scannet_tsdf_reso512_pad1.gin: -------------------------------------------------------------------------------- 1 | include "./configs/scannet_tsdf.gin" 2 | 3 | LitPlenoxel.reso = [[512, 512, 512]] 4 | LitPlenoxel.upsample_stride = 1 5 | 6 | run.logbase = "logs/tsdf_reso512_pad1" -------------------------------------------------------------------------------- /configs/scannet_tsdf_reso512_pad2.gin: -------------------------------------------------------------------------------- 1 | include "./configs/scannet_tsdf.gin" 2 | 3 | LitPlenoxel.reso = [[512, 512, 512]] 4 | LitPlenoxel.upsample_stride = 2 5 | 6 | run.logbase = "logs/tsdf_reso512_pad2" -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/data/.gitkeep -------------------------------------------------------------------------------- /dataloader/co3d_lists/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/dataloader/co3d_lists/.gitkeep -------------------------------------------------------------------------------- /dataloader/data_util/co3d.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import gzip 3 | import json 4 | import os 5 | 6 | import cv2 7 | import numpy as np 8 | import scipy as sp 9 | import gin 10 | import torch 11 | 12 | from dataloader.random_pose import random_pose, pose_interp 13 | from dataloader.spherical_poses import spherical_poses 14 | 15 | 16 | def find_files(dir, exts): 17 | if os.path.isdir(dir): 18 | files_grabbed = [] 19 | for ext in exts: 20 | files_grabbed.extend(glob.glob(os.path.join(dir, ext))) 21 | if len(files_grabbed) > 0: 22 | files_grabbed = sorted(files_grabbed) 23 | return files_grabbed 24 | else: 25 | return [] 26 | 27 | 28 | def similarity_from_cameras(c2w, fix_rot=False): 29 | """ 30 | Get a similarity transform to normalize dataset 31 | from c2w (OpenCV convention) cameras 32 | :param c2w: (N, 4) 33 | :return T (4,4) , scale (float) 34 | """ 35 | t = c2w[:, :3, 3] 36 | R = c2w[:, :3, :3] 37 | 38 | # (1) Rotate the world so that z+ is the up axis 39 | # we estimate the up axis by averaging the camera up axes 40 | ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1) 41 | world_up = np.mean(ups, axis=0) 42 | world_up /= np.linalg.norm(world_up) 43 | 44 | up_camspace = np.array([0.0, -1.0, 0.0]) 45 | c = (up_camspace * world_up).sum() 46 | cross = np.cross(world_up, up_camspace) 47 | skew = np.array( 48 | [ 49 | [0.0, -cross[2], cross[1]], 50 | [cross[2], 0.0, -cross[0]], 51 | [-cross[1], cross[0], 0.0], 52 | ] 53 | ) 54 | if c > -1: 55 | R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c) 56 | else: 57 | # In the unlikely case the original data has y+ up axis, 58 | # rotate 180-deg about x axis 59 | R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) 60 | 61 | if fix_rot: 62 | R_align = np.eye(3) 63 | R = np.eye(3) 64 | else: 65 | R = R_align @ R 66 | fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1) 67 | t = (R_align @ t[..., None])[..., 0] 68 | 69 | # (2) Recenter the scene using camera center rays 70 | # find the closest point to the origin for each camera's center ray 71 | nearest = t + (fwds * -t).sum(-1)[:, None] * fwds 72 | 73 | # median for more robustness 74 | translate = -np.median(nearest, axis=0) 75 | 76 | # translate = -np.mean(t, axis=0) # DEBUG 77 | 78 | transform = np.eye(4) 79 | transform[:3, 3] = translate 80 | transform[:3, :3] = R_align 81 | 82 | # (3) Rescale the scene using camera distances 83 | scale = 1.0 / np.median(np.linalg.norm(t + translate, axis=-1)) 84 | return transform, scale 85 | 86 | 87 | @gin.configurable() 88 | def load_co3d_data( 89 | datadir: str, 90 | scene_name: str, 91 | max_image_dim: int, 92 | cam_scale_factor: float, 93 | render_scene_interp: bool = False, 94 | render_random_pose: bool = True, 95 | interp_fac: int = 5, 96 | v2_mode: bool = False 97 | ): 98 | 99 | with open("dataloader/co3d_lists/co3d_list.json") as fp: 100 | co3d_lists = json.load(fp) 101 | 102 | datadir = datadir.rstrip("/") 103 | cls_name = co3d_lists[scene_name] 104 | basedir = os.path.join(datadir, cls_name, scene_name) 105 | cam_trans = np.diag(np.array([-1, -1, 1, 1], dtype=np.float32)) 106 | 107 | scene_number = basedir.split("/")[-1] 108 | 109 | json_path = os.path.join(basedir, "..", "frame_annotations.jgz") 110 | with gzip.open(json_path, "r") as fp: 111 | all_frames_data = json.load(fp) 112 | 113 | frame_data, images, intrinsics, extrinsics, image_sizes = [], [], [], [], [] 114 | 115 | for temporal_data in all_frames_data: 116 | if temporal_data["sequence_name"] == scene_number: 117 | frame_data.append(temporal_data) 118 | 119 | used = [] 120 | for (i, frame) in enumerate(frame_data): 121 | img = cv2.imread(os.path.join(datadir, frame["image"]["path"])) 122 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 123 | 124 | H, W = frame["image"]["size"] 125 | max_hw = max(H, W) 126 | approx_scale = max_image_dim / max_hw 127 | 128 | if approx_scale < 1.0: 129 | H2 = int(approx_scale * H) 130 | W2 = int(approx_scale * W) 131 | img = cv2.resize(img, (W2, H2), interpolation=cv2.INTER_AREA) 132 | else: 133 | H2 = H 134 | W2 = W 135 | 136 | image_size = np.array([H2, W2]) 137 | fxy = np.array(frame["viewpoint"]["focal_length"]) 138 | cxy = np.array(frame["viewpoint"]["principal_point"]) 139 | R = np.array(frame["viewpoint"]["R"]) 140 | T = np.array(frame["viewpoint"]["T"]) 141 | 142 | if v2_mode: 143 | min_HW = min(W2, H2) 144 | image_size_half = np.array([W2 * 0.5, H2 * 0.5], dtype=np.float32) 145 | scale_arr = np.array([min_HW * 0.5, min_HW * 0.5], dtype=np.float32) 146 | fxy_x = fxy * scale_arr 147 | prp_x = np.array([W2 * 0.5, H2 * 0.5], dtype=np.float32) - cxy * scale_arr 148 | cxy = (image_size_half - prp_x) / image_size_half 149 | fxy = fxy_x / image_size_half 150 | 151 | scale_arr = np.array([W2 * 0.5, H2 * 0.5], dtype=np.float32) 152 | focal = fxy * scale_arr 153 | prp = -1.0 * (cxy - 1.0) * scale_arr 154 | 155 | pose = np.eye(4) 156 | pose[:3, :3] = R 157 | pose[:3, 3:] = -R @ T[..., None] 158 | pose = pose @ cam_trans 159 | intrinsic = np.array( 160 | [ 161 | [focal[0], 0.0, prp[0], 0.0], 162 | [0.0, focal[1], prp[1], 0.0], 163 | [0.0, 0.0, 1.0, 0.0], 164 | [0.0, 0.0, 0.0, 1.0], 165 | ] 166 | ) 167 | 168 | if any([np.all(pose == _pose) for _pose in extrinsics]): 169 | continue 170 | 171 | used.append(i) 172 | image_sizes.append(image_size) 173 | intrinsics.append(intrinsic) 174 | extrinsics.append(pose) 175 | images.append(img) 176 | 177 | intrinsics = np.stack(intrinsics) 178 | extrinsics = np.stack(extrinsics) 179 | image_sizes = np.stack(image_sizes) 180 | 181 | H_median, W_median = np.median( 182 | np.stack([image_size for image_size in image_sizes]), axis=0 183 | ) 184 | 185 | H_inlier = np.abs(image_sizes[:, 0] - H_median) / H_median < 0.1 186 | W_inlier = np.abs(image_sizes[:, 1] - W_median) / W_median < 0.1 187 | inlier = np.logical_and(H_inlier, W_inlier) 188 | dists = np.linalg.norm( 189 | extrinsics[:, :3, 3] - np.median(extrinsics[:, :3, 3], axis=0), axis=-1 190 | ) 191 | med = np.median(dists) 192 | good_mask = dists < (med * 5.0) 193 | inlier = np.logical_and(inlier, good_mask) 194 | 195 | if inlier.sum() != 0: 196 | intrinsics = intrinsics[inlier] 197 | extrinsics = extrinsics[inlier] 198 | image_sizes = image_sizes[inlier] 199 | images = [images[i] for i in range(len(inlier)) if inlier[i]] 200 | 201 | extrinsics = np.stack(extrinsics) 202 | T, sscale = similarity_from_cameras(extrinsics) 203 | extrinsics = T @ extrinsics 204 | 205 | extrinsics[:, :3, 3] *= sscale * cam_scale_factor 206 | 207 | num_frames = len(extrinsics) 208 | 209 | i_all = np.arange(num_frames) 210 | i_test = i_all[::10] 211 | i_val = i_test 212 | i_train = np.array([i for i in i_all if not i in i_test]) 213 | i_split = (i_train, i_val, i_test, i_all) 214 | 215 | if render_random_pose: 216 | render_poses = random_pose(extrinsics[i_all], 50) 217 | elif render_scene_interp: 218 | render_poses = pose_interp(extrinsics[i_all], interp_fac) 219 | # render_poses = spherical_poses(sscale * cam_scale_factor * np.eye(4)) 220 | 221 | near, far = 0., 1. 222 | ndc_coeffs = (-1., -1.) 223 | 224 | label_info = {} 225 | label_info["T"] = T 226 | label_info["sscale"] = sscale * cam_scale_factor 227 | label_info["class_label"] = basedir.rstrip("/").split("/")[-2] 228 | label_info["extrinsics"] = extrinsics 229 | label_info["intrinsics"] = intrinsics 230 | label_info["image_sizes"] = image_sizes 231 | 232 | return ( 233 | images, 234 | intrinsics, 235 | extrinsics, 236 | image_sizes, 237 | near, 238 | far, 239 | ndc_coeffs, 240 | i_split, 241 | render_poses, 242 | label_info 243 | ) 244 | -------------------------------------------------------------------------------- /dataloader/data_util/common.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | import cc3d 5 | import MinkowskiEngine as ME 6 | import numpy as np 7 | import torch 8 | 9 | 10 | def find_files(dir, exts): 11 | if os.path.isdir(dir): 12 | files_grabbed = [] 13 | for ext in exts: 14 | files_grabbed.extend(glob.glob(os.path.join(dir, ext))) 15 | if len(files_grabbed) > 0: 16 | files_grabbed = sorted(files_grabbed) 17 | return files_grabbed 18 | else: 19 | return [] 20 | 21 | 22 | def similarity_from_cameras(c2w): 23 | """ 24 | Get a similarity transform to normalize dataset 25 | from c2w (OpenCV convention) cameras 26 | :param c2w: (N, 4) 27 | :return T (4,4) , scale (float) 28 | """ 29 | t = c2w[:, :3, 3] 30 | R = c2w[:, :3, :3] 31 | 32 | # (1) Rotate the world so that z+ is the up axis 33 | # we estimate the up axis by averaging the camera up axes 34 | ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1) 35 | world_up = np.mean(ups, axis=0) 36 | world_up /= np.linalg.norm(world_up) 37 | 38 | up_camspace = np.array([0.0, -1.0, 0.0]) 39 | c = (up_camspace * world_up).sum() 40 | cross = np.cross(world_up, up_camspace) 41 | skew = np.array( 42 | [ 43 | [0.0, -cross[2], cross[1]], 44 | [cross[2], 0.0, -cross[0]], 45 | [-cross[1], cross[0], 0.0], 46 | ] 47 | ) 48 | if c > -1: 49 | R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c) 50 | else: 51 | # In the unlikely case the original data has y+ up axis, 52 | # rotate 180-deg about x axis 53 | R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) 54 | 55 | R = R_align @ R 56 | fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1) 57 | t = (R_align @ t[..., None])[..., 0] 58 | 59 | # (2) Recenter the scene using camera center rays 60 | # find the closest point to the origin for each camera's center ray 61 | nearest = t + (fwds * -t).sum(-1)[:, None] * fwds 62 | 63 | # median for more robustness 64 | translate = -np.median(nearest, axis=0) 65 | 66 | # translate = -np.mean(t, axis=0) # DEBUG 67 | 68 | transform = np.eye(4) 69 | transform[:3, 3] = translate 70 | transform[:3, :3] = R_align 71 | 72 | # (3) Rescale the scene using camera distances 73 | scale = 1.0 / np.median(np.linalg.norm(t + translate, axis=-1)) 74 | return transform, scale 75 | 76 | 77 | def connected_component_filter(xyz, voxel_size): 78 | svoxel, idx, idx_inverse = ME.utils.sparse_quantize( 79 | xyz / voxel_size, return_index=True, return_inverse=True 80 | ) 81 | svoxel -= svoxel.min(0, keepdim=True).values 82 | svoxel = svoxel.long() 83 | dvoxel = torch.zeros((svoxel + 1).max(0).values.tolist()) 84 | dvoxel[svoxel[:, 0], svoxel[:, 1], svoxel[:, 2]] = 1 85 | labels_out = cc3d.connected_components(dvoxel.numpy(), connectivity=26) 86 | labels_out = labels_out[svoxel[:, 0], svoxel[:, 1], svoxel[:, 2]] 87 | counts = np.bincount(labels_out) 88 | argmax = np.argmax(counts) 89 | labels_inverse = labels_out[idx_inverse] 90 | sel = labels_inverse == argmax 91 | print( 92 | f">>>> connected component filtering, from {xyz.shape[0]} to {sel.sum()} <<<<" 93 | ) 94 | return sel 95 | -------------------------------------------------------------------------------- /dataloader/litdata.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gin 4 | import numpy as np 5 | 6 | from dataloader.data_util.co3d import load_co3d_data 7 | from dataloader.data_util.scannet import load_scannet_data, load_scannet_data_ext 8 | from dataloader.interface import LitData 9 | 10 | 11 | @gin.configurable() 12 | class LitDataCo3D(LitData): 13 | def __init__( 14 | self, 15 | datadir: str, 16 | scene_name: str, 17 | accelerator: bool, 18 | num_gpus: int, 19 | num_tpus: int, 20 | # Co3D specific arguments 21 | max_image_dim: int = 800, 22 | cam_scale_factor: float = 1.50, 23 | ): 24 | ( 25 | self.images, 26 | self.intrinsics, 27 | self.extrinsics, 28 | self.image_sizes, 29 | self.near, 30 | self.far, 31 | self.ndc_coeffs, 32 | (self.i_train, self.i_val, self.i_test, self.i_all), 33 | self.render_poses, 34 | self.label_info, 35 | ) = load_co3d_data( 36 | datadir=datadir, 37 | scene_name=scene_name, 38 | max_image_dim=max_image_dim, 39 | cam_scale_factor=cam_scale_factor, 40 | ) 41 | 42 | self.render_scale = 300 / max(self.image_sizes[0][0], self.image_sizes[0][1]) 43 | 44 | super(LitDataCo3D, self).__init__( 45 | datadir=datadir, 46 | accelerator=accelerator, 47 | num_gpus=num_gpus, 48 | num_tpus=num_tpus, 49 | ) 50 | 51 | 52 | @gin.configurable() 53 | class LitDataScannet(LitData): 54 | def __init__( 55 | self, 56 | datadir: str, 57 | scene_name: str, 58 | accelerator: bool, 59 | num_gpus: int, 60 | num_tpus: int, 61 | # scannet specific arguments 62 | frame_skip: int = 1, 63 | max_frame: int = 1500, 64 | max_image_dim: int = 800, 65 | cam_scale_factor: float = 1.50, 66 | use_depth: bool = True, 67 | use_scans: bool = True, 68 | blur_thresh: float = 10.0, 69 | pcd_name: str = "tsdf_pcd.pcd", 70 | ): 71 | super(LitDataScannet, self).__init__( 72 | datadir=datadir, 73 | accelerator=accelerator, 74 | num_gpus=num_gpus, 75 | num_tpus=num_tpus, 76 | ) 77 | 78 | ( 79 | images, 80 | extrinsics, 81 | render_poses, 82 | (h, w), 83 | intrinsics, 84 | i_split, 85 | depths, 86 | trans_info, 87 | ) = load_scannet_data_ext( 88 | os.path.join(datadir, scene_name), 89 | cam_scale_factor=cam_scale_factor, 90 | frame_skip=frame_skip, 91 | max_frame=max_frame, 92 | max_image_dim=max_image_dim, 93 | blur_thresh=blur_thresh, 94 | use_depth=use_depth, 95 | pcd_name=pcd_name, 96 | ) 97 | i_train, i_val, i_test = i_split 98 | 99 | print(f"loaded scannet, image with size: {h} * {w}") 100 | self.scene_name = scene_name 101 | self.images = images 102 | self.intrinsics = intrinsics.reshape(-1, 4, 4).repeat(len(images), axis=0) 103 | self.extrinsics = extrinsics 104 | self.image_sizes = np.array([h, w]).reshape(1, 2).repeat(len(images), axis=0) 105 | self.near = 0.0 106 | self.far = 1.0 107 | self.ndc_coeffs = (-1.0, -1.0) 108 | self.i_train, self.i_val, self.i_test = i_train, i_val, i_test 109 | self.i_all = np.arange(len(images)) 110 | self.render_poses = render_poses 111 | self.trans_info = trans_info 112 | self.use_sphere_bound = False 113 | -------------------------------------------------------------------------------- /dataloader/random_pose.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def random_pose(poses, num_frames=50): 5 | 6 | rot_diff = np.einsum("ilk, jlm -> ijkm", poses[:, :3, :3], poses[:, :3, :3]) 7 | rot_angle = ( 8 | np.arccos( 9 | np.clip( 10 | (rot_diff[:, :, 0, 0] + rot_diff[:, :, 1, 1] + rot_diff[:, :, 2, 2] - 1) 11 | / 2, 12 | -1.0, 13 | 1.0, 14 | ) 15 | ) 16 | / np.pi 17 | * 180 18 | ) 19 | ignore_self = np.logical_not(np.eye(len(rot_diff), dtype=np.bool)) 20 | 21 | trans_mask = ( 22 | np.linalg.norm(poses[:, None, :3, 3] - poses[None, :, :3, 3], axis=-1) < 0.5 23 | ) 24 | rot_idx = np.where( 25 | np.logical_and(trans_mask, np.logical_and(rot_angle < 40, ignore_self)) 26 | ) 27 | n_candidates = len(rot_idx[0]) 28 | ret = np.zeros((num_frames, 4, 4)) 29 | indices = np.random.choice(n_candidates, num_frames, replace=True) 30 | t = np.random.rand(num_frames) 31 | axis, angle = R_to_axis_angle(rot_diff[rot_idx[0][indices], rot_idx[1][indices]]) 32 | angle = angle * t 33 | pose_rot = R_axis_angle(angle, axis) 34 | 35 | trans_t = ( 36 | t[:, None] * poses[rot_idx[0][indices], :3, 3] 37 | + (1 - t)[:, None] * poses[rot_idx[1][indices], :3, 3] 38 | ) 39 | ret[:, :3, :3] = np.einsum( 40 | "ijk, ikl -> ijl", poses[rot_idx[0][indices], :3, :3], pose_rot 41 | ) 42 | ret[:, :3, 3] = trans_t 43 | ret[:, 3, 3] = 1.0 44 | 45 | return ret 46 | 47 | def pose_interp(poses, factor): 48 | 49 | pose_list = [] 50 | for i in range(len(poses)): 51 | pose_list.append(poses[i]) 52 | 53 | if i == len(poses) - 1: 54 | factor = 4 * factor 55 | 56 | next_idx = (i+1) % len(poses) 57 | axis, angle = R_to_axis_angle((poses[next_idx, :3, :3] @ poses[i, :3, :3].T)[None]) 58 | for j in range(factor-1): 59 | ret = np.eye(4) 60 | j_fact = (j + 1) / factor 61 | angle_j = angle * j_fact 62 | pose_rot = R_axis_angle(angle_j, axis) 63 | ret[:3, :3] = pose_rot @ poses[i, :3, :3] 64 | trans_t = ( 65 | (1 - j_fact) * poses[i, :3, 3] 66 | + (j_fact) * poses[next_idx, :3, 3] 67 | ) 68 | ret[:3, 3] = trans_t 69 | pose_list.append(ret) 70 | 71 | return np.stack(pose_list) 72 | 73 | 74 | 75 | def R_axis_angle(angle, axis): 76 | """Generate the rotation matrix from the axis-angle notation. 77 | Conversion equations 78 | ==================== 79 | From Wikipedia (http://en.wikipedia.org/wiki/Rotation_matrix), the conversion is given by:: 80 | c = cos(angle); s = sin(angle); C = 1-c 81 | xs = x*s; ys = y*s; zs = z*s 82 | xC = x*C; yC = y*C; zC = z*C 83 | xyC = x*yC; yzC = y*zC; zxC = z*xC 84 | [ x*xC+c xyC-zs zxC+ys ] 85 | [ xyC+zs y*yC+c yzC-xs ] 86 | [ zxC-ys yzC+xs z*zC+c ] 87 | @param matrix: The 3x3 rotation matrix to update. 88 | @type matrix: 3x3 numpy array 89 | @param axis: The 3D rotation axis. 90 | @type axis: numpy array, len 3 91 | @param angle: The rotation angle. 92 | @type angle: float 93 | """ 94 | len_angle = len(angle) 95 | matrix = np.zeros((len_angle, 3, 3)) 96 | 97 | # Trig factors. 98 | ca = np.cos(angle) 99 | sa = np.sin(angle) 100 | C = 1 - ca 101 | 102 | # Depack the axis. 103 | x, y, z = axis[:, 0], axis[:, 1], axis[:, 2] 104 | 105 | # Multiplications (to remove duplicate calculations). 106 | xs = x * sa 107 | ys = y * sa 108 | zs = z * sa 109 | xC = x * C 110 | yC = y * C 111 | zC = z * C 112 | xyC = x * yC 113 | yzC = y * zC 114 | zxC = z * xC 115 | 116 | # Update the rotation matrix. 117 | matrix[:, 0, 0] = x * xC + ca 118 | matrix[:, 0, 1] = xyC - zs 119 | matrix[:, 0, 2] = zxC + ys 120 | matrix[:, 1, 0] = xyC + zs 121 | matrix[:, 1, 1] = y * yC + ca 122 | matrix[:, 1, 2] = yzC - xs 123 | matrix[:, 2, 0] = zxC - ys 124 | matrix[:, 2, 1] = yzC + xs 125 | matrix[:, 2, 2] = z * zC + ca 126 | 127 | return matrix 128 | 129 | 130 | def R_to_axis_angle(matrix): 131 | """Convert the rotation matrix into the axis-angle notation. 132 | Conversion equations 133 | ==================== 134 | From Wikipedia (http://en.wikipedia.org/wiki/Rotation_matrix), the conversion is given by:: 135 | x = Qzy-Qyz 136 | y = Qxz-Qzx 137 | z = Qyx-Qxy 138 | r = hypot(x,hypot(y,z)) 139 | t = Qxx+Qyy+Qzz 140 | theta = atan2(r,t-1) 141 | @param matrix: The 3x3 rotation matrix to update. 142 | @type matrix: 3x3 numpy array 143 | @return: The 3D rotation axis and angle. 144 | @rtype: numpy 3D rank-1 array, float 145 | """ 146 | 147 | # Axes. 148 | len_matrix = len(matrix) 149 | axis = np.zeros((len_matrix, 3)) 150 | axis[:, 0] = matrix[:, 2, 1] - matrix[:, 1, 2] 151 | axis[:, 1] = matrix[:, 0, 2] - matrix[:, 2, 0] 152 | axis[:, 2] = matrix[:, 1, 0] - matrix[:, 0, 1] 153 | 154 | # Angle. 155 | r = np.hypot(axis[:, 0], np.hypot(axis[:, 1], axis[:, 2])) 156 | t = matrix[:, 0, 0] + matrix[:, 1, 1] + matrix[:, 2, 2] 157 | theta = np.arctan2(r, t - 1) 158 | 159 | # Normalise the axis. 160 | axis = axis / r[:, None] 161 | 162 | # Return the data. 163 | return axis, theta 164 | -------------------------------------------------------------------------------- /dataloader/sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch.utils.data import Dataset 4 | from torch.utils.data.distributed import DistributedSampler 5 | from torch.utils.data.sampler import SequentialSampler 6 | import torch.distributed as dist 7 | 8 | class DDPSampler(SequentialSampler): 9 | 10 | def __init__(self, batch_size, num_replicas, rank, tpu): 11 | self.data_source=None 12 | self.batch_size = batch_size 13 | self.drop_last=False 14 | ngpus = torch.cuda.device_count() 15 | if ngpus == 1 and not tpu: 16 | rank, num_replicas = 0, 1 17 | else: 18 | if num_replicas is None: 19 | if not dist.is_available(): 20 | raise RuntimeError("Requires distributed package to be available") 21 | num_replicas = dist.get_world_size() 22 | if rank is None: 23 | if not dist.is_available(): 24 | raise RuntimeError("Requires distributed package to be available") 25 | rank = dist.get_rank() 26 | self.rank = rank 27 | self.num_replicas = num_replicas 28 | 29 | 30 | class DDPSequnetialSampler(DDPSampler): 31 | 32 | def __init__(self, batch_size, num_replicas, rank, N_total, tpu): 33 | self.N_total = N_total 34 | super(DDPSequnetialSampler, self).__init__(batch_size, num_replicas, rank, tpu) 35 | 36 | def __iter__(self): 37 | idx_list = np.arange(self.N_total) 38 | return iter(idx_list[self.rank::self.num_replicas]) 39 | 40 | def __len__(self): 41 | return int(np.ceil(self.N_total / self.num_replicas)) 42 | 43 | 44 | class SingleImageDDPSampler(DDPSampler): 45 | 46 | def __init__( 47 | self, 48 | batch_size, 49 | num_replicas, 50 | rank, 51 | N_img, 52 | N_pixels, 53 | epoch_size, 54 | tpu, 55 | precrop, 56 | precrop_steps, 57 | ): 58 | super(SingleImageDDPSampler, self).__init__(batch_size, num_replicas, rank, tpu) 59 | self.N_pixels = N_pixels 60 | self.N_img = N_img 61 | self.epoch_size = epoch_size 62 | self.precrop = precrop 63 | self.precrop_steps = precrop_steps 64 | 65 | def __iter__(self): 66 | image_choice = np.random.choice( 67 | np.arange(self.N_img), 68 | self.epoch_size, 69 | replace=True 70 | ) 71 | image_shape = self.N_pixels[image_choice] 72 | if not self.precrop: 73 | idx_choice = [ 74 | np.random.choice(np.arange(image_shape[i, 0] * image_shape[i, 1]), self.batch_size) 75 | for i in range(self.epoch_size) 76 | ] 77 | else: 78 | idx_choice = [] 79 | h_pick = [ 80 | np.random.choice( 81 | np.arange(image_shape[i, 0] // 2), self.batch_size 82 | ) + image_shape[i, 0] // 4 for i in range(self.precrop_steps) 83 | ] 84 | w_pick = [ 85 | np.random.choice( 86 | np.arange(image_shape[i, 1] // 2), self.batch_size 87 | ) + image_shape[i, 1] // 4 for i in range(self.precrop_steps) 88 | ] 89 | idx_choice = [h_pick[i] * image_shape[i, 1] + w_pick[i] for i in range(self.precrop_steps)] 90 | 91 | idx_choice += [ 92 | np.random.choice(np.arange(image_shape[i, 0] * image_shape[i, 1]), self.batch_size) 93 | for i in range(self.epoch_size - self.precrop_steps) 94 | ] 95 | self.precrop = False 96 | 97 | for ((h, w), image_idx, idx) in zip(image_shape, image_choice, idx_choice): 98 | idx_ret = image_idx * h * w + idx 99 | yield idx_ret[self.rank::self.num_replicas] 100 | 101 | def __len__(self): 102 | return self.epoch_size 103 | 104 | 105 | class MultipleImageDDPSampler(DDPSampler): 106 | def __init__(self, batch_size, num_replicas, rank, total_len, epoch_size, tpu): 107 | super(MultipleImageDDPSampler, self).__init__(batch_size, num_replicas, rank, tpu) 108 | self.total_len = total_len 109 | self.epoch_size = epoch_size 110 | 111 | def __iter__(self): 112 | full_index = np.arange(self.total_len) 113 | indices = [ 114 | np.random.choice(full_index, self.batch_size) \ 115 | for _ in range(self.epoch_size) 116 | ] 117 | for batch in indices: 118 | yield batch[self.rank::self.num_replicas] 119 | 120 | def __len__(self): 121 | return self.epoch_size 122 | 123 | 124 | class MultipleImageWOReplaceDDPSampler(MultipleImageDDPSampler): 125 | 126 | def __init__(self, batch_size, num_replicas, rank, total_len, epoch_size, tpu): 127 | super(MultipleImageWOReplaceDDPSampler, self).__init__( 128 | batch_size, num_replicas, rank, total_len, epoch_size, tpu 129 | ) 130 | 131 | def __iter__(self): 132 | indices = [ 133 | np.random.permutation(self.total_len) \ 134 | for _ in range(int( 135 | np.ceil(self.epoch_size * self.batch_size / self.total_len) 136 | )) 137 | ] 138 | indices = np.concatenate(indices)[:self.epoch_size * self.batch_size] 139 | indices = indices.reshape(self.epoch_size, self.batch_size) 140 | 141 | for batch in indices: 142 | yield batch[self.rank::self.num_replicas] 143 | 144 | def __len__(self): 145 | return self.epoch_size 146 | 147 | 148 | class RaySet(Dataset): 149 | 150 | def __init__(self, images=None, rays=None): 151 | self.images = images 152 | self.images_exist = self.images is not None 153 | assert rays is not None 154 | rays[:, 1] = rays[:, 1] / np.linalg.norm(rays[:, 1], axis=1)[:, np.newaxis] 155 | self.rays = rays 156 | 157 | self.N = len(rays) 158 | 159 | def __getitem__(self, index): 160 | ret = {"ray": torch.from_numpy(self.rays[index])} 161 | if self.images_exist: 162 | ret["target"] = torch.from_numpy(self.images[index]) 163 | return ret 164 | 165 | def __len__(self): 166 | return self.N 167 | -------------------------------------------------------------------------------- /dataloader/spherical_poses.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | trans_t = lambda t : np.array([ 5 | [1,0,0,0], 6 | [0,1,0,0], 7 | [0,0,1,t], 8 | [0,0,0,1]], dtype=np.float32) 9 | 10 | rot_phi = lambda phi : np.array([ 11 | [1,0,0,0], 12 | [0,np.cos(phi),-np.sin(phi),0], 13 | [0,np.sin(phi), np.cos(phi),0], 14 | [0,0,0,1]], dtype=np.float32) 15 | 16 | rot_theta = lambda th : np.array([ 17 | [np.cos(th),0,-np.sin(th),0], 18 | [0,1,0,0], 19 | [np.sin(th),0, np.cos(th),0], 20 | [0,0,0,1]], dtype=np.float32) 21 | 22 | 23 | def spherical_pose(theta, phi, radius, torch_output=False): 24 | c2w = trans_t(radius) 25 | c2w = rot_phi(phi/180.*np.pi) @ c2w 26 | c2w = rot_theta(theta/180.*np.pi) @ c2w 27 | c2w = np.array([[-1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]], dtype=np.float32) @ c2w 28 | # c2w = np.array([[1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]], dtype=np.float32) @ c2w 29 | 30 | return torch.from_numpy(c2w) if torch_output else c2w 31 | 32 | 33 | def spherical_poses(cam_trans): 34 | return np.stack( 35 | [ 36 | spherical_pose(angle, -30.0, 0.5) @ cam_trans 37 | for angle in np.linspace(-180,180,40+1)[:-1] 38 | ], 0 39 | ) -------------------------------------------------------------------------------- /lib/plenoxel/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2021 PlenOctree Authors. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions are met: 5 | # 6 | # 1. Redistributions of source code must retain the above copyright notice, 7 | # this list of conditions and the following disclaimer. 8 | # 9 | # 2. Redistributions in binary form must reproduce the above copyright notice, 10 | # this list of conditions and the following disclaimer in the documentation 11 | # and/or other materials provided with the distribution. 12 | # 13 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 14 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 17 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 18 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 19 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 20 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 21 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 22 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 23 | # POSSIBILITY OF SUCH DAMAGE. 24 | 25 | # NOTE: This CMakeLists is for development purposes only 26 | # (To check CUDA compile errors) 27 | # It is NOT necessary to use this for installation. Just use pip install . 28 | cmake_minimum_required( VERSION 3.3 ) 29 | 30 | if(NOT CMAKE_BUILD_TYPE) 31 | set(CMAKE_BUILD_TYPE Release) 32 | endif() 33 | if (POLICY CMP0048) 34 | cmake_policy(SET CMP0048 NEW) 35 | endif (POLICY CMP0048) 36 | if (POLICY CMP0069) 37 | cmake_policy(SET CMP0069 NEW) 38 | endif (POLICY CMP0069) 39 | if (POLICY CMP0072) 40 | cmake_policy(SET CMP0072 NEW) 41 | endif (POLICY CMP0072) 42 | 43 | project( svox2 ) 44 | 45 | set(CMAKE_CXX_STANDARD 14) 46 | enable_language(CUDA) 47 | message(STATUS "CUDA enabled") 48 | set( CMAKE_CUDA_STANDARD 14 ) 49 | set( CMAKE_CUDA_STANDARD_REQUIRED ON) 50 | set( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -Xcudafe \"--display_error_number --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=3060\" -lineinfo -arch=sm_75 ") 51 | # -Xptxas=\"-v\" 52 | 53 | set( INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include" ) 54 | 55 | if( MSVC ) 56 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") 57 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT /GLT /Ox") 58 | set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -Xcompiler=\"/MT\"" ) 59 | endif() 60 | 61 | file(GLOB SOURCES 62 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp 63 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu) 64 | 65 | find_package(pybind11 REQUIRED) 66 | find_package(Torch REQUIRED) 67 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") 68 | 69 | include_directories (${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) 70 | 71 | pybind11_add_module(svox2-test SHARED ${SOURCES}) 72 | target_link_libraries(svox2-test PRIVATE "${TORCH_LIBRARIES}") 73 | target_include_directories(svox2-test PRIVATE "${INCLUDE_DIR}") 74 | 75 | if (MSVC) 76 | file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll") 77 | add_custom_command(TARGET svox2-test 78 | POST_BUILD 79 | COMMAND ${CMAKE_COMMAND} -E copy_if_different 80 | ${TORCH_DLLS} 81 | $) 82 | endif (MSVC) 83 | -------------------------------------------------------------------------------- /lib/plenoxel/include/cubemap_util.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "cuda_util.cuh" 3 | #include 4 | #include 5 | 6 | #define _AXIS(x) (x>>1) 7 | #define _ORI(x) (x&1) 8 | #define _FACE(axis, ori) uint8_t((axis << 1) | ori) 9 | 10 | namespace { 11 | namespace device { 12 | 13 | struct CubemapCoord { 14 | uint8_t face; 15 | float uv[2]; 16 | }; 17 | 18 | struct CubemapLocation { 19 | uint8_t face; 20 | int16_t uv[2]; 21 | }; 22 | 23 | struct CubemapBilerpQuery { 24 | CubemapLocation ptr[2][2]; 25 | float duv[2]; 26 | }; 27 | 28 | __device__ __inline__ void 29 | invert_cubemap(int u, int v, float r, 30 | int reso, 31 | float* __restrict__ out) { 32 | const float u_norm = (u + 0.5f) / reso * 2 - 1; 33 | const float v_norm = (v + 0.5f) / reso * 2 - 1; 34 | // EAC 35 | const float tx = tanf((M_PI / 4) * u_norm); 36 | const float ty = tanf((M_PI / 4) * v_norm); 37 | const float common = r * rnorm3df(1.f, tx, ty); 38 | out[0] = tx * common; 39 | out[1] = ty * common; 40 | out[2] = common; 41 | } 42 | 43 | __device__ __inline__ void 44 | invert_cubemap_traditional(int u, int v, float r, 45 | int reso, 46 | float* __restrict__ out) { 47 | const float u_norm = (u + 0.5f) / reso * 2 - 1; 48 | const float v_norm = (v + 0.5f) / reso * 2 - 1; 49 | const float common = r * rnorm3df(1.f, u_norm, v_norm); 50 | out[0] = u_norm * common; 51 | out[1] = v_norm * common; 52 | out[2] = common; 53 | } 54 | 55 | __device__ __host__ __inline__ CubemapCoord 56 | dir_to_cubemap_coord(const float* __restrict__ xyz_o, 57 | int face_reso, 58 | bool eac = true) { 59 | float maxv; 60 | int ax; 61 | float xyz[3] = {xyz_o[0], xyz_o[1], xyz_o[2]}; 62 | if (fabsf(xyz[0]) >= fabsf(xyz[1]) && fabsf(xyz[0]) >= fabsf(xyz[2])) { 63 | ax = 0; maxv = xyz[0]; 64 | } else if (fabsf(xyz[1]) >= fabsf(xyz[2])) { 65 | ax = 1; maxv = xyz[1]; 66 | } else { 67 | ax = 2; maxv = xyz[2]; 68 | } 69 | const float recip = 1.f / fabsf(maxv); 70 | xyz[0] *= recip; 71 | xyz[1] *= recip; 72 | xyz[2] *= recip; 73 | 74 | if (eac) { 75 | #pragma unroll 3 76 | for (int i = 0; i < 3; ++i) { 77 | xyz[i] = atanf(xyz[i]) * (4 * M_1_PI); 78 | } 79 | } 80 | 81 | CubemapCoord idx; 82 | idx.uv[0] = ((xyz[(ax ^ 1) & 1] + 1) * face_reso - 1) * 0.5; 83 | idx.uv[1] = ((xyz[(ax ^ 2) & 2] + 1) * face_reso - 1) * 0.5; 84 | const int ori = xyz[ax] >= 0; 85 | idx.face = _FACE(ax, ori); 86 | 87 | return idx; 88 | } 89 | 90 | __device__ __host__ __inline__ CubemapBilerpQuery 91 | cubemap_build_query( 92 | const CubemapCoord& idx, 93 | int face_reso) { 94 | const int uv_idx[2] ={ (int)floorf(idx.uv[0]), (int)floorf(idx.uv[1]) }; 95 | 96 | bool m[2][2]; 97 | m[0][0] = uv_idx[0] < 0; 98 | m[0][1] = uv_idx[0] > face_reso - 2; 99 | m[1][0] = uv_idx[1] < 0; 100 | m[1][1] = uv_idx[1] > face_reso - 2; 101 | 102 | const int face = idx.face; 103 | const int ax = _AXIS(face); 104 | const int ori = _ORI(face); 105 | // if ax is one of {0, 1, 2}, this trick gets the 2 106 | // of {0, 1, 2} other than ax 107 | const int uvd[2] = {((ax ^ 1) & 1), ((ax ^ 2) & 2)}; 108 | int uv_ori[2]; 109 | 110 | CubemapBilerpQuery result; 111 | result.duv[0] = idx.uv[0] - uv_idx[0]; 112 | result.duv[1] = idx.uv[1] - uv_idx[1]; 113 | 114 | #pragma unroll 2 115 | for (uv_ori[0] = 0; uv_ori[0] < 2; ++uv_ori[0]) { 116 | #pragma unroll 2 117 | for (uv_ori[1] = 0; uv_ori[1] < 2; ++uv_ori[1]) { 118 | CubemapLocation& nidx = result.ptr[uv_ori[0]][uv_ori[1]]; 119 | nidx.face = face; 120 | nidx.uv[0] = uv_idx[0] + uv_ori[0]; 121 | nidx.uv[1] = uv_idx[1] + uv_ori[1]; 122 | 123 | const bool mu = m[0][uv_ori[0]]; 124 | const bool mv = m[1][uv_ori[1]]; 125 | 126 | int edge_idx = -1; 127 | if (mu) { 128 | // Crosses edge in u-axis 129 | if (mv) { 130 | // FIXME: deal with corners properly, right now 131 | // just clamps, resulting in a little artifact 132 | // at each cube corner 133 | nidx.uv[0] = min(max(nidx.uv[0], 0), face_reso - 1); 134 | nidx.uv[1] = min(max(nidx.uv[1], 0), face_reso - 1); 135 | } else { 136 | edge_idx = 0; 137 | } 138 | } else if (mv) { 139 | // Crosses edge in v-axis 140 | edge_idx = 1; 141 | } 142 | if (~edge_idx) { 143 | const int nax = uvd[edge_idx]; 144 | const int16_t other_coord = nidx.uv[1 - edge_idx]; 145 | 146 | // Determine directions in the new face 147 | const int nud = (nax ^ 1) & 1; 148 | // const int nvd = (nax ^ 2) & 2; 149 | 150 | if (nud == ax) { 151 | nidx.uv[0] = ori ? (face_reso - 1) : 0; 152 | nidx.uv[1] = other_coord; 153 | } else { 154 | nidx.uv[0] = other_coord; 155 | nidx.uv[1] = ori ? (face_reso - 1) : 0; 156 | } 157 | 158 | nidx.face = _FACE(nax, uv_ori[edge_idx]); 159 | } 160 | // Interior point: nothing needs to be done 161 | 162 | } 163 | } 164 | 165 | return result; 166 | } 167 | 168 | __device__ __host__ __inline__ float 169 | cubemap_sample( 170 | const float* __restrict__ cubemap, // (6, face_reso, face_reso, n_channels) 171 | const CubemapBilerpQuery& query, 172 | int face_reso, 173 | int n_channels, 174 | int chnl_id) { 175 | 176 | // NOTE: assuming address will fit in int32 177 | const int stride1 = face_reso * n_channels; 178 | const int stride0 = face_reso * stride1; 179 | const CubemapLocation& p00 = query.ptr[0][0]; 180 | const float v00 = cubemap[p00.face * stride0 + p00.uv[0] * stride1 + p00.uv[1] * n_channels + chnl_id]; 181 | const CubemapLocation& p01 = query.ptr[0][1]; 182 | const float v01 = cubemap[p01.face * stride0 + p01.uv[0] * stride1 + p01.uv[1] * n_channels + chnl_id]; 183 | const CubemapLocation& p10 = query.ptr[1][0]; 184 | const float v10 = cubemap[p10.face * stride0 + p10.uv[0] * stride1 + p10.uv[1] * n_channels + chnl_id]; 185 | const CubemapLocation& p11 = query.ptr[1][1]; 186 | const float v11 = cubemap[p11.face * stride0 + p11.uv[0] * stride1 + p11.uv[1] * n_channels + chnl_id]; 187 | 188 | const float val0 = lerp(v00, v01, query.duv[1]); 189 | const float val1 = lerp(v10, v11, query.duv[1]); 190 | 191 | return lerp(val0, val1, query.duv[0]); 192 | } 193 | 194 | __device__ __inline__ void 195 | cubemap_sample_backward( 196 | float* __restrict__ cubemap_grad, // (6, face_reso, face_reso, n_channels) 197 | const CubemapBilerpQuery& query, 198 | int face_reso, 199 | int n_channels, 200 | float grad_out, 201 | int chnl_id, 202 | bool* __restrict__ mask_out = nullptr) { 203 | 204 | // NOTE: assuming address will fit in int32 205 | const float bu = query.duv[0], bv = query.duv[1]; 206 | const float au = 1.f - bu, av = 1.f - bv; 207 | 208 | #define _ADD_CUBEVERT(i, j, val) { \ 209 | const CubemapLocation& p00 = query.ptr[i][j]; \ 210 | const int idx = (p00.face * face_reso + p00.uv[0]) * face_reso + p00.uv[1]; \ 211 | float* __restrict__ v00 = &cubemap_grad[idx * n_channels + chnl_id]; \ 212 | atomicAdd(v00, val); \ 213 | if (mask_out != nullptr) { \ 214 | mask_out[idx] = true; \ 215 | } \ 216 | } 217 | 218 | _ADD_CUBEVERT(0, 0, au * av * grad_out); 219 | _ADD_CUBEVERT(0, 1, au * bv * grad_out); 220 | _ADD_CUBEVERT(1, 0, bu * av * grad_out); 221 | _ADD_CUBEVERT(1, 1, bu * bv * grad_out); 222 | #undef _ADD_CUBEVERT 223 | 224 | } 225 | 226 | __device__ __host__ __inline__ float 227 | multi_cubemap_sample( 228 | const float* __restrict__ cubemap1, // (6, face_reso, face_reso, n_channels) 229 | const float* __restrict__ cubemap2, // (6, face_reso, face_reso, n_channels) 230 | const CubemapBilerpQuery& query, 231 | float interp_wt, 232 | int face_reso, 233 | int n_channels, 234 | int chnl_id) { 235 | const float val1 = cubemap_sample(cubemap1, 236 | query, 237 | face_reso, 238 | n_channels, 239 | chnl_id); 240 | const float val2 = cubemap_sample(cubemap2, 241 | query, 242 | face_reso, 243 | n_channels, 244 | chnl_id); 245 | return lerp(val1, val2, interp_wt); 246 | } 247 | 248 | __device__ __inline__ void 249 | multi_cubemap_sample_backward( 250 | float* __restrict__ cubemap_grad1, // (6, face_reso, face_reso, n_channels) 251 | float* __restrict__ cubemap_grad2, // (6, face_reso, face_reso, n_channels) 252 | const CubemapBilerpQuery& query, 253 | float interp_wt, 254 | int face_reso, 255 | int n_channels, 256 | float grad_out, 257 | int chnl_id, 258 | bool* __restrict__ mask_out1 = nullptr, 259 | bool* __restrict__ mask_out2 = nullptr) { 260 | if (cubemap_grad1 == nullptr) return; 261 | cubemap_sample_backward(cubemap_grad1, 262 | query, 263 | face_reso, 264 | n_channels, 265 | grad_out * (1.f - interp_wt), 266 | chnl_id, 267 | mask_out1); 268 | cubemap_sample_backward(cubemap_grad2, 269 | query, 270 | face_reso, 271 | n_channels, 272 | grad_out * interp_wt, 273 | chnl_id, 274 | mask_out1 == nullptr ? nullptr : mask_out2); 275 | } 276 | 277 | 278 | } // namespace device 279 | } // namespace 280 | -------------------------------------------------------------------------------- /lib/plenoxel/include/cuda_util.cuh: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Alex Yu 2 | #pragma once 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "util.hpp" 8 | 9 | 10 | #define DEVICE_GUARD(_ten) \ 11 | const at::cuda::OptionalCUDAGuard device_guard(device_of(_ten)); 12 | 13 | #define CUDA_GET_THREAD_ID(tid, Q) const int tid = blockIdx.x * blockDim.x + threadIdx.x; \ 14 | if (tid >= Q) return 15 | #define CUDA_GET_THREAD_ID_U64(tid, Q) const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; \ 16 | if (tid >= Q) return 17 | #define CUDA_N_BLOCKS_NEEDED(Q, CUDA_N_THREADS) ((Q - 1) / CUDA_N_THREADS + 1) 18 | #define CUDA_CHECK_ERRORS \ 19 | cudaError_t err = cudaGetLastError(); \ 20 | if (err != cudaSuccess) \ 21 | printf("Error in svox2.%s : %s\n", __FUNCTION__, cudaGetErrorString(err)) 22 | 23 | #define CUDA_MAX_THREADS at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock 24 | 25 | #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 26 | #else 27 | __device__ inline double atomicAdd(double* address, double val){ 28 | unsigned long long int* address_as_ull = (unsigned long long int*)address; 29 | unsigned long long int old = *address_as_ull, assumed; 30 | do { 31 | assumed = old; 32 | old = atomicCAS(address_as_ull, assumed, 33 | __double_as_longlong(val + __longlong_as_double(assumed))); 34 | } while (assumed != old); 35 | return __longlong_as_double(old); 36 | } 37 | #endif 38 | 39 | __device__ inline void atomicMax(float* result, float value){ 40 | unsigned* result_as_u = (unsigned*)result; 41 | unsigned old = *result_as_u, assumed; 42 | do { 43 | assumed = old; 44 | old = atomicCAS(result_as_u, assumed, 45 | __float_as_int(fmaxf(value, __int_as_float(assumed)))); 46 | } while (old != assumed); 47 | return; 48 | } 49 | 50 | __device__ inline void atomicMax(double* result, double value){ 51 | unsigned long long int* result_as_ull = (unsigned long long int*)result; 52 | unsigned long long int old = *result_as_ull, assumed; 53 | do { 54 | assumed = old; 55 | old = atomicCAS(result_as_ull, assumed, 56 | __double_as_longlong(fmaxf(value, __longlong_as_double(assumed)))); 57 | } while (old != assumed); 58 | return; 59 | } 60 | 61 | __device__ __inline__ void transform_coord(float* __restrict__ point, 62 | const float* __restrict__ scaling, 63 | const float* __restrict__ offset) { 64 | point[0] = fmaf(point[0], scaling[0], offset[0]); // a*b + c 65 | point[1] = fmaf(point[1], scaling[1], offset[1]); // a*b + c 66 | point[2] = fmaf(point[2], scaling[2], offset[2]); // a*b + c 67 | } 68 | 69 | // Linear interp 70 | // Subtract and fused multiply-add 71 | // (1-w) a + w b 72 | template 73 | __host__ __device__ __inline__ T lerp(T a, T b, T w) { 74 | return fmaf(w, b - a, a); 75 | } 76 | 77 | __device__ __inline__ static float _norm( 78 | const float* __restrict__ dir) { 79 | // return sqrtf(dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]); 80 | return norm3df(dir[0], dir[1], dir[2]); 81 | } 82 | 83 | __device__ __inline__ static float _rnorm( 84 | const float* __restrict__ dir) { 85 | // return 1.f / _norm(dir); 86 | return rnorm3df(dir[0], dir[1], dir[2]); 87 | } 88 | 89 | __host__ __device__ __inline__ static void xsuby3d( 90 | float* __restrict__ x, 91 | const float* __restrict__ y) { 92 | x[0] -= y[0]; 93 | x[1] -= y[1]; 94 | x[2] -= y[2]; 95 | } 96 | 97 | __host__ __device__ __inline__ static float _dot( 98 | const float* __restrict__ x, 99 | const float* __restrict__ y) { 100 | return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]; 101 | } 102 | 103 | __host__ __device__ __inline__ static void _cross( 104 | const float* __restrict__ a, 105 | const float* __restrict__ b, 106 | float* __restrict__ out) { 107 | out[0] = a[1] * b[2] - a[2] * b[1]; 108 | out[1] = a[2] * b[0] - a[0] * b[2]; 109 | out[2] = a[0] * b[1] - a[1] * b[0]; 110 | } 111 | 112 | __device__ __inline__ static float _dist_ray_to_origin( 113 | const float* __restrict__ origin, 114 | const float* __restrict__ dir) { 115 | // dir must be unit vector 116 | float tmp[3]; 117 | _cross(origin, dir, tmp); 118 | return _norm(tmp); 119 | } 120 | 121 | #define int_div2_ceil(x) ((((x) - 1) >> 1) + 1) 122 | 123 | __host__ __inline__ cudaError_t cuda_assert( 124 | const cudaError_t code, const char* const file, 125 | const int line, const bool abort) { 126 | if (code != cudaSuccess) { 127 | fprintf(stderr, "cuda_assert: %s %s %s %d\n", cudaGetErrorName(code) ,cudaGetErrorString(code), 128 | file, line); 129 | 130 | if (abort) { 131 | cudaDeviceReset(); 132 | exit(code); 133 | } 134 | } 135 | 136 | return code; 137 | } 138 | 139 | #define cuda(...) cuda_assert((cuda##__VA_ARGS__), __FILE__, __LINE__, true); 140 | 141 | -------------------------------------------------------------------------------- /lib/plenoxel/include/data_spec.hpp: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Alex Yu 2 | #pragma once 3 | #include "util.hpp" 4 | #include 5 | 6 | using torch::Tensor; 7 | 8 | enum BasisType { 9 | // For svox 1 compatibility 10 | // BASIS_TYPE_RGBA = 0 11 | BASIS_TYPE_SH = 1, 12 | // BASIS_TYPE_SG = 2 13 | // BASIS_TYPE_ASG = 3 14 | BASIS_TYPE_3D_TEXTURE = 4, 15 | BASIS_TYPE_MLP = 255, 16 | }; 17 | 18 | struct SparseGridSpec { 19 | Tensor density_data; 20 | Tensor sh_data; 21 | Tensor links; 22 | Tensor _offset; 23 | Tensor _scaling; 24 | 25 | Tensor background_links; 26 | Tensor background_data; 27 | 28 | int basis_dim; 29 | uint8_t basis_type; 30 | Tensor basis_data; 31 | 32 | inline void check() { 33 | CHECK_INPUT(density_data); 34 | CHECK_INPUT(sh_data); 35 | CHECK_INPUT(links); 36 | if (background_links.defined()) { 37 | CHECK_INPUT(background_links); 38 | CHECK_INPUT(background_data); 39 | TORCH_CHECK(background_links.ndimension() == 40 | 2); // (H, W) -> [N] \cup {-1} 41 | TORCH_CHECK(background_data.ndimension() == 3); // (N, D, C) -> R 42 | } 43 | if (basis_data.defined()) { 44 | CHECK_INPUT(basis_data); 45 | } 46 | CHECK_CPU_INPUT(_offset); 47 | CHECK_CPU_INPUT(_scaling); 48 | TORCH_CHECK(density_data.ndimension() == 2); 49 | TORCH_CHECK(sh_data.ndimension() == 2); 50 | TORCH_CHECK(links.ndimension() == 3); 51 | } 52 | }; 53 | 54 | struct GridOutputGrads { 55 | torch::Tensor grad_density_out; 56 | torch::Tensor grad_sh_out; 57 | torch::Tensor grad_basis_out; 58 | torch::Tensor grad_background_out; 59 | 60 | torch::Tensor mask_out; 61 | torch::Tensor mask_background_out; 62 | inline void check() { 63 | if (grad_density_out.defined()) { 64 | CHECK_INPUT(grad_density_out); 65 | } 66 | if (grad_sh_out.defined()) { 67 | CHECK_INPUT(grad_sh_out); 68 | } 69 | if (grad_basis_out.defined()) { 70 | CHECK_INPUT(grad_basis_out); 71 | } 72 | if (grad_background_out.defined()) { 73 | CHECK_INPUT(grad_background_out); 74 | } 75 | if (mask_out.defined() && mask_out.size(0) > 0) { 76 | CHECK_INPUT(mask_out); 77 | } 78 | if (mask_background_out.defined() && mask_background_out.size(0) > 0) { 79 | CHECK_INPUT(mask_background_out); 80 | } 81 | } 82 | }; 83 | 84 | struct CameraSpec { 85 | torch::Tensor c2w; 86 | float fx; 87 | float fy; 88 | float cx; 89 | float cy; 90 | int width; 91 | int height; 92 | 93 | float ndc_coeffx; 94 | float ndc_coeffy; 95 | 96 | inline void check() { 97 | CHECK_INPUT(c2w); 98 | TORCH_CHECK(c2w.is_floating_point()); 99 | TORCH_CHECK(c2w.ndimension() == 2); 100 | TORCH_CHECK(c2w.size(1) == 4); 101 | } 102 | }; 103 | 104 | struct RaysSpec { 105 | Tensor origins; 106 | Tensor dirs; 107 | inline void check() { 108 | CHECK_INPUT(origins); 109 | CHECK_INPUT(dirs); 110 | TORCH_CHECK(origins.is_floating_point()); 111 | TORCH_CHECK(dirs.is_floating_point()); 112 | } 113 | }; 114 | 115 | struct RenderOptions { 116 | float background_brightness; 117 | // float step_epsilon; 118 | float step_size; 119 | float sigma_thresh; 120 | float stop_thresh; 121 | 122 | float near_clip; 123 | bool use_spheric_clip; 124 | 125 | bool last_sample_opaque; 126 | float mask_transmit_threshold; 127 | }; 128 | -------------------------------------------------------------------------------- /lib/plenoxel/include/data_spec_packed.cuh: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Alex Yu 2 | #pragma once 3 | #include 4 | #include "data_spec.hpp" 5 | #include "cuda_util.cuh" 6 | #include "random_util.cuh" 7 | 8 | namespace { 9 | namespace device { 10 | 11 | struct PackedSparseGridSpec { 12 | PackedSparseGridSpec(SparseGridSpec& spec) 13 | : 14 | density_data(spec.density_data.data_ptr()), 15 | sh_data(spec.sh_data.data_ptr()), 16 | links(spec.links.data_ptr()), 17 | basis_type(spec.basis_type), 18 | basis_data(spec.basis_data.defined() ? spec.basis_data.data_ptr() : nullptr), 19 | background_links(spec.background_links.defined() ? 20 | spec.background_links.data_ptr() : 21 | nullptr), 22 | background_data(spec.background_data.defined() ? 23 | spec.background_data.data_ptr() : 24 | nullptr), 25 | size{(int)spec.links.size(0), 26 | (int)spec.links.size(1), 27 | (int)spec.links.size(2)}, 28 | stride_x{(int)spec.links.stride(0)}, 29 | background_reso{ 30 | spec.background_links.defined() ? (int)spec.background_links.size(1) : 0, 31 | }, 32 | background_nlayers{ 33 | spec.background_data.defined() ? (int)spec.background_data.size(1) : 0 34 | }, 35 | basis_dim(spec.basis_dim), 36 | sh_data_dim((int)spec.sh_data.size(1)), 37 | basis_reso(spec.basis_data.defined() ? spec.basis_data.size(0) : 0), 38 | _offset{spec._offset.data_ptr()[0], 39 | spec._offset.data_ptr()[1], 40 | spec._offset.data_ptr()[2]}, 41 | _scaling{spec._scaling.data_ptr()[0], 42 | spec._scaling.data_ptr()[1], 43 | spec._scaling.data_ptr()[2]} { 44 | } 45 | 46 | float* __restrict__ density_data; 47 | float* __restrict__ sh_data; 48 | const int32_t* __restrict__ links; 49 | 50 | const uint8_t basis_type; 51 | float* __restrict__ basis_data; 52 | 53 | const int32_t* __restrict__ background_links; 54 | float* __restrict__ background_data; 55 | 56 | const int size[3], stride_x; 57 | const int background_reso, background_nlayers; 58 | 59 | const int basis_dim, sh_data_dim, basis_reso; 60 | const float _offset[3]; 61 | const float _scaling[3]; 62 | }; 63 | 64 | struct PackedGridOutputGrads { 65 | PackedGridOutputGrads(GridOutputGrads& grads) : 66 | grad_density_out(grads.grad_density_out.defined() ? grads.grad_density_out.data_ptr() : nullptr), 67 | grad_sh_out(grads.grad_sh_out.defined() ? grads.grad_sh_out.data_ptr() : nullptr), 68 | grad_basis_out(grads.grad_basis_out.defined() ? grads.grad_basis_out.data_ptr() : nullptr), 69 | grad_background_out(grads.grad_background_out.defined() ? grads.grad_background_out.data_ptr() : nullptr), 70 | mask_out((grads.mask_out.defined() && grads.mask_out.size(0) > 0) ? grads.mask_out.data_ptr() : nullptr), 71 | mask_background_out((grads.mask_background_out.defined() && grads.mask_background_out.size(0) > 0) ? grads.mask_background_out.data_ptr() : nullptr) 72 | {} 73 | float* __restrict__ grad_density_out; 74 | float* __restrict__ grad_sh_out; 75 | float* __restrict__ grad_basis_out; 76 | float* __restrict__ grad_background_out; 77 | 78 | bool* __restrict__ mask_out; 79 | bool* __restrict__ mask_background_out; 80 | }; 81 | 82 | struct PackedCameraSpec { 83 | PackedCameraSpec(CameraSpec& cam) : 84 | c2w(cam.c2w.packed_accessor32()), 85 | fx(cam.fx), fy(cam.fy), 86 | cx(cam.cx), cy(cam.cy), 87 | width(cam.width), height(cam.height), 88 | ndc_coeffx(cam.ndc_coeffx), ndc_coeffy(cam.ndc_coeffy) {} 89 | const torch::PackedTensorAccessor32 90 | c2w; 91 | float fx; 92 | float fy; 93 | float cx; 94 | float cy; 95 | int width; 96 | int height; 97 | 98 | float ndc_coeffx; 99 | float ndc_coeffy; 100 | }; 101 | 102 | struct PackedRaysSpec { 103 | const torch::PackedTensorAccessor32 origins; 104 | const torch::PackedTensorAccessor32 dirs; 105 | PackedRaysSpec(RaysSpec& spec) : 106 | origins(spec.origins.packed_accessor32()), 107 | dirs(spec.dirs.packed_accessor32()) 108 | { } 109 | }; 110 | 111 | struct SingleRaySpec { 112 | SingleRaySpec() = default; 113 | __device__ SingleRaySpec(const float* __restrict__ origin, const float* __restrict__ dir) 114 | : origin{origin[0], origin[1], origin[2]}, 115 | dir{dir[0], dir[1], dir[2]} {} 116 | __device__ void set(const float* __restrict__ origin, const float* __restrict__ dir) { 117 | #pragma unroll 3 118 | for (int i = 0; i < 3; ++i) { 119 | this->origin[i] = origin[i]; 120 | this->dir[i] = dir[i]; 121 | } 122 | } 123 | 124 | float origin[3]; 125 | float dir[3]; 126 | float tmin, tmax, world_step; 127 | 128 | float pos[3]; 129 | int32_t l[3]; 130 | RandomEngine32 rng; 131 | }; 132 | 133 | } // namespace device 134 | } // namespace 135 | -------------------------------------------------------------------------------- /lib/plenoxel/include/random_util.cuh: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Alex Yu 2 | #pragma once 3 | #include 4 | #include 5 | 6 | // A custom xorshift random generator 7 | // Maybe replace with some CUDA internal stuff? 8 | struct RandomEngine32 { 9 | uint32_t x, y, z; 10 | 11 | // Inclusive both 12 | __host__ __device__ 13 | uint32_t randint(uint32_t lo, uint32_t hi) { 14 | if (hi <= lo) return lo; 15 | uint32_t z = (*this)(); 16 | return z % (hi - lo + 1) + lo; 17 | } 18 | 19 | __host__ __device__ 20 | void rand2(float* out1, float* out2) { 21 | const uint32_t z = (*this)(); 22 | const uint32_t fmax = (1 << 16); 23 | const uint32_t z1 = z >> 16; 24 | const uint32_t z2 = z & (fmax - 1); 25 | const float ifmax = 1.f / fmax; 26 | 27 | *out1 = z1 * ifmax; 28 | *out2 = z2 * ifmax; 29 | } 30 | 31 | __host__ __device__ 32 | float rand() { 33 | uint32_t z = (*this)(); 34 | return float(z) / (1LL << 32); 35 | } 36 | 37 | 38 | __host__ __device__ 39 | void randn2(float* out1, float* out2) { 40 | rand2(out1, out2); 41 | // Box-Muller transform 42 | const float srlog = sqrtf(-2 * logf(*out1 + 1e-32f)); 43 | *out2 *= 2 * M_PI; 44 | *out1 = srlog * cosf(*out2); 45 | *out2 = srlog * sinf(*out2); 46 | } 47 | 48 | __host__ __device__ 49 | float randn() { 50 | float x, y; 51 | rand2(&x, &y); 52 | // Box-Muller transform 53 | return sqrtf(-2 * logf(x + 1e-32f))* cosf(2 * M_PI * y); 54 | } 55 | 56 | __host__ __device__ 57 | uint32_t operator()() { 58 | uint32_t t; 59 | x ^= x << 16; 60 | x ^= x >> 5; 61 | x ^= x << 1; 62 | t = x; 63 | x = y; 64 | y = z; 65 | z = t ^ x ^ y; 66 | return z; 67 | } 68 | }; 69 | -------------------------------------------------------------------------------- /lib/plenoxel/include/util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // Changed from x.type().is_cuda() due to deprecation 3 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor") 4 | #define CHECK_CPU(x) TORCH_CHECK(!x.is_cuda(), #x " must be a CPU tensor") 5 | #define CHECK_CONTIGUOUS(x) \ 6 | TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 7 | #define CHECK_INPUT(x) \ 8 | CHECK_CUDA(x); \ 9 | CHECK_CONTIGUOUS(x) 10 | #define CHECK_CPU_INPUT(x) \ 11 | CHECK_CPU(x); \ 12 | CHECK_CONTIGUOUS(x) 13 | 14 | #if defined(__CUDACC__) 15 | // #define _EXP(x) expf(x) // SLOW EXP 16 | #define _EXP(x) __expf(x) // FAST EXP 17 | #define _SIGMOID(x) (1 / (1 + _EXP(-(x)))) 18 | 19 | #else 20 | 21 | #define _EXP(x) expf(x) 22 | #define _SIGMOID(x) (1 / (1 + expf(-(x)))) 23 | #endif 24 | #define _SQR(x) ((x) * (x)) 25 | -------------------------------------------------------------------------------- /lib/plenoxel/svox2.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Alex Yu 2 | 3 | // This file contains only Python bindings 4 | #include "data_spec.hpp" 5 | #include 6 | #include 7 | #include 8 | 9 | using torch::Tensor; 10 | 11 | std::tuple sample_grid(SparseGridSpec &, Tensor, 12 | bool); 13 | void sample_grid_backward(SparseGridSpec &, Tensor, Tensor, Tensor, Tensor, 14 | Tensor, bool); 15 | 16 | // ** NeRF rendering formula (trilerp) 17 | Tensor volume_render_cuvol(SparseGridSpec &, RaysSpec &, RenderOptions &); 18 | // Tensor volume_render_cuvol_image(SparseGridSpec &, CameraSpec &, 19 | // RenderOptions &); 20 | void volume_render_cuvol_backward(SparseGridSpec &, RaysSpec &, RenderOptions &, 21 | Tensor, Tensor, GridOutputGrads &); 22 | void volume_render_cuvol_fused( 23 | SparseGridSpec &, RaysSpec &, RenderOptions &, 24 | Tensor, float, float, bool, bool, Tensor, Tensor, GridOutputGrads & 25 | ); 26 | // Expected termination (depth) rendering 27 | torch::Tensor volume_render_expected_term(SparseGridSpec &, RaysSpec &, 28 | RenderOptions &); 29 | // Depth rendering based on sigma-threshold as in Dex-NeRF 30 | torch::Tensor volume_render_sigma_thresh(SparseGridSpec &, RaysSpec &, 31 | RenderOptions &, float); 32 | 33 | // ** NV rendering formula (trilerp) 34 | Tensor volume_render_nvol(SparseGridSpec &, RaysSpec &, RenderOptions &); 35 | void volume_render_nvol_backward(SparseGridSpec &, RaysSpec &, RenderOptions &, 36 | Tensor, Tensor, GridOutputGrads &); 37 | void volume_render_nvol_fused(SparseGridSpec &, RaysSpec &, RenderOptions &, 38 | Tensor, float, float, Tensor, GridOutputGrads &); 39 | 40 | // ** NeRF rendering formula (nearest-neighbor, infinitely many steps) 41 | Tensor volume_render_svox1(SparseGridSpec &, RaysSpec &, RenderOptions &); 42 | void volume_render_svox1_backward(SparseGridSpec &, RaysSpec &, RenderOptions &, 43 | Tensor, Tensor, GridOutputGrads &); 44 | void volume_render_svox1_fused(SparseGridSpec &, RaysSpec &, RenderOptions &, 45 | Tensor, float, float, Tensor, GridOutputGrads &); 46 | 47 | // Tensor volume_render_cuvol_image(SparseGridSpec &, CameraSpec &, 48 | // RenderOptions &); 49 | // 50 | // void volume_render_cuvol_image_backward(SparseGridSpec &, CameraSpec &, 51 | // RenderOptions &, Tensor, Tensor, 52 | // GridOutputGrads &); 53 | 54 | // Misc 55 | Tensor dilate(Tensor); 56 | void accel_dist_prop(Tensor); 57 | void grid_weight_render(Tensor, CameraSpec &, float, float, bool, Tensor, 58 | Tensor, Tensor); 59 | // void sample_cubemap(Tensor, Tensor, bool, Tensor); 60 | 61 | // Loss 62 | Tensor tv(Tensor, Tensor, int, int, bool, float, bool, float, float); 63 | void tv_grad(Tensor, Tensor, int, int, float, bool, float, bool, float, float, 64 | Tensor); 65 | void tv_grad_sparse(Tensor, Tensor, Tensor, Tensor, int, int, float, bool, 66 | float, bool, bool, float, float, Tensor); 67 | void msi_tv_grad_sparse(Tensor, Tensor, Tensor, Tensor, float, float, Tensor); 68 | void lumisphere_tv_grad_sparse(SparseGridSpec &, Tensor, Tensor, Tensor, float, 69 | float, float, float, GridOutputGrads &); 70 | 71 | // Optim 72 | void rmsprop_step(Tensor, Tensor, Tensor, Tensor, float, float, float, float, 73 | float); 74 | void sgd_step(Tensor, Tensor, Tensor, float, float); 75 | 76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 77 | #define _REG_FUNC(funname) m.def(#funname, &funname) 78 | _REG_FUNC(sample_grid); 79 | _REG_FUNC(sample_grid_backward); 80 | _REG_FUNC(volume_render_cuvol); 81 | // _REG_FUNC(volume_render_cuvol_image); 82 | _REG_FUNC(volume_render_cuvol_backward); 83 | _REG_FUNC(volume_render_cuvol_fused); 84 | _REG_FUNC(volume_render_expected_term); 85 | _REG_FUNC(volume_render_sigma_thresh); 86 | 87 | _REG_FUNC(volume_render_nvol); 88 | _REG_FUNC(volume_render_nvol_backward); 89 | _REG_FUNC(volume_render_nvol_fused); 90 | 91 | _REG_FUNC(volume_render_svox1); 92 | _REG_FUNC(volume_render_svox1_backward); 93 | _REG_FUNC(volume_render_svox1_fused); 94 | 95 | // _REG_FUNC(volume_render_cuvol_image); 96 | // _REG_FUNC(volume_render_cuvol_image_backward); 97 | 98 | // Loss 99 | _REG_FUNC(tv); 100 | _REG_FUNC(tv_grad); 101 | _REG_FUNC(tv_grad_sparse); 102 | _REG_FUNC(msi_tv_grad_sparse); 103 | _REG_FUNC(lumisphere_tv_grad_sparse); 104 | 105 | // Misc 106 | _REG_FUNC(dilate); 107 | _REG_FUNC(accel_dist_prop); 108 | _REG_FUNC(grid_weight_render); 109 | // _REG_FUNC(sample_cubemap); 110 | 111 | // Optimizer 112 | _REG_FUNC(rmsprop_step); 113 | _REG_FUNC(sgd_step); 114 | #undef _REG_FUNC 115 | 116 | py::class_(m, "SparseGridSpec") 117 | .def(py::init<>()) 118 | .def_readwrite("density_data", &SparseGridSpec::density_data) 119 | .def_readwrite("sh_data", &SparseGridSpec::sh_data) 120 | .def_readwrite("links", &SparseGridSpec::links) 121 | .def_readwrite("_offset", &SparseGridSpec::_offset) 122 | .def_readwrite("_scaling", &SparseGridSpec::_scaling) 123 | .def_readwrite("basis_dim", &SparseGridSpec::basis_dim) 124 | .def_readwrite("basis_type", &SparseGridSpec::basis_type) 125 | .def_readwrite("basis_data", &SparseGridSpec::basis_data) 126 | .def_readwrite("background_links", &SparseGridSpec::background_links) 127 | .def_readwrite("background_data", &SparseGridSpec::background_data); 128 | 129 | py::class_(m, "CameraSpec") 130 | .def(py::init<>()) 131 | .def_readwrite("c2w", &CameraSpec::c2w) 132 | .def_readwrite("fx", &CameraSpec::fx) 133 | .def_readwrite("fy", &CameraSpec::fy) 134 | .def_readwrite("cx", &CameraSpec::cx) 135 | .def_readwrite("cy", &CameraSpec::cy) 136 | .def_readwrite("width", &CameraSpec::width) 137 | .def_readwrite("height", &CameraSpec::height) 138 | .def_readwrite("ndc_coeffx", &CameraSpec::ndc_coeffx) 139 | .def_readwrite("ndc_coeffy", &CameraSpec::ndc_coeffy); 140 | 141 | py::class_(m, "RaysSpec") 142 | .def(py::init<>()) 143 | .def_readwrite("origins", &RaysSpec::origins) 144 | .def_readwrite("dirs", &RaysSpec::dirs); 145 | 146 | py::class_(m, "RenderOptions") 147 | .def(py::init<>()) 148 | .def_readwrite("background_brightness", 149 | &RenderOptions::background_brightness) 150 | .def_readwrite("step_size", &RenderOptions::step_size) 151 | .def_readwrite("sigma_thresh", &RenderOptions::sigma_thresh) 152 | .def_readwrite("stop_thresh", &RenderOptions::stop_thresh) 153 | .def_readwrite("near_clip", &RenderOptions::near_clip) 154 | .def_readwrite("use_spheric_clip", &RenderOptions::use_spheric_clip) 155 | .def_readwrite("last_sample_opaque", &RenderOptions::last_sample_opaque) 156 | .def_readwrite("mask_transmit_threshold", &RenderOptions::mask_transmit_threshold); 157 | 158 | py::class_(m, "GridOutputGrads") 159 | .def(py::init<>()) 160 | .def_readwrite("grad_density_out", &GridOutputGrads::grad_density_out) 161 | .def_readwrite("grad_sh_out", &GridOutputGrads::grad_sh_out) 162 | .def_readwrite("grad_basis_out", &GridOutputGrads::grad_basis_out) 163 | .def_readwrite("grad_background_out", 164 | &GridOutputGrads::grad_background_out) 165 | .def_readwrite("mask_out", &GridOutputGrads::mask_out) 166 | .def_readwrite("mask_background_out", 167 | &GridOutputGrads::mask_background_out); 168 | } 169 | -------------------------------------------------------------------------------- /lib/plenoxel/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.0.1.dev0+sphtexcub.lincolor.fast' 2 | -------------------------------------------------------------------------------- /model/interface.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | 3 | import numpy as np 4 | import torch 5 | import json 6 | 7 | from piqa.ssim import SSIM 8 | from piqa.lpips import LPIPS 9 | 10 | reshape_2d = lambda x: x.reshape((x.shape[0], -1)) 11 | clip_0_1 = lambda x: torch.clip(x, 0, 1).detach() 12 | 13 | class LitModel(pl.LightningModule): 14 | 15 | # Utils to reorganize output values from evaluation steps, 16 | # i.e., validation and test step. 17 | def alter_gather_cat(self, outputs, key, image_sizes): 18 | each = torch.cat([output[key] for output in outputs]) 19 | all = self.all_gather(each).detach() 20 | if all.dim() == 3: 21 | all = all.permute((1, 0, 2)).flatten(0, 1) 22 | ret, curr = [], 0 23 | for (h, w) in image_sizes: 24 | ret.append(all[curr:curr+h*w].reshape(h, w, -1)) 25 | curr += h * w 26 | return ret 27 | 28 | @torch.no_grad() 29 | def psnr_each(self, preds, gts): 30 | psnr_list = [] 31 | for (pred, gt) in zip(preds, gts): 32 | pred = torch.clip(pred, 0, 1) 33 | gt = torch.clip(gt, 0, 1) 34 | mse = torch.mean((pred - gt) ** 2) 35 | psnr = -10.0 * torch.log(mse) / np.log(10) 36 | psnr_list.append(psnr) 37 | return torch.stack(psnr_list) 38 | 39 | @torch.no_grad() 40 | def ssim_each(self, preds, gts): 41 | ssim_model = SSIM().to(device=self.device) 42 | ssim_list = [] 43 | for (pred, gt) in zip(preds, gts): 44 | pred = torch.clip( 45 | pred.permute((2, 0, 1)).unsqueeze(0).float(), 46 | 0, 1 47 | ) 48 | gt = torch.clip( 49 | gt.permute((2, 0, 1)).unsqueeze(0).float(), 50 | 0, 1 51 | ) 52 | ssim = ssim_model(pred, gt) 53 | ssim_list.append(ssim) 54 | del ssim_model 55 | return torch.stack(ssim_list) 56 | 57 | @torch.no_grad() 58 | def lpips_each(self, preds, gts): 59 | lpips_model = LPIPS(network="vgg").to(device=self.device) 60 | lpips_list = [] 61 | for (pred, gt) in zip(preds, gts): 62 | pred = torch.clip( 63 | pred.permute((2, 0, 1)).unsqueeze(0).float(), 64 | 0, 1 65 | ) 66 | gt = torch.clip( 67 | gt.permute((2, 0, 1)).unsqueeze(0).float(), 68 | 0, 1 69 | ) 70 | lpips = lpips_model(pred, gt) 71 | lpips_list.append(lpips) 72 | del lpips_model 73 | return torch.stack(lpips_list) 74 | 75 | @torch.no_grad() 76 | def psnr(self, preds, gts, i_train, i_val, i_test): 77 | ret = {} 78 | ret["name"] = "PSNR" 79 | psnr_list = self.psnr_each(preds, gts) 80 | ret["mean"] = psnr_list.mean().item() 81 | if self.trainer.datamodule.eval_test_only: 82 | ret["test"] = psnr_list.mean().item() 83 | else: 84 | ret["train"] = psnr_list[i_train].mean().item() 85 | ret["val"] = psnr_list[i_val].mean().item() 86 | ret["test"] = psnr_list[i_test].mean().item() 87 | 88 | return ret 89 | 90 | @torch.no_grad() 91 | def ssim(self, preds, gts, i_train, i_val, i_test): 92 | ret = {} 93 | ret["name"] = "SSIM" 94 | ssim_list = self.ssim_each(preds, gts) 95 | ret["mean"] = ssim_list.mean().item() 96 | if self.trainer.datamodule.eval_test_only: 97 | ret["test"] = ssim_list.mean().item() 98 | else: 99 | ret["train"] = ssim_list[i_train].mean().item() 100 | ret["val"] = ssim_list[i_val].mean().item() 101 | ret["test"] = ssim_list[i_test].mean().item() 102 | 103 | return ret 104 | 105 | @torch.no_grad() 106 | def lpips(self, preds, gts, i_train, i_val, i_test): 107 | ret = {} 108 | ret["name"] = "LPIPS" 109 | lpips_list = self.lpips_each(preds, gts) 110 | ret["mean"] = lpips_list.mean().item() 111 | if self.trainer.datamodule.eval_test_only: 112 | ret["test"] = lpips_list.mean().item() 113 | else: 114 | ret["train"] = lpips_list[i_train].mean().item() 115 | ret["val"] = lpips_list[i_val].mean().item() 116 | ret["test"] = lpips_list[i_test].mean().item() 117 | 118 | return ret 119 | 120 | def write_stats(self, fpath, *stats): 121 | 122 | d = {} 123 | for stat in stats: 124 | d[stat["name"]] = {k : float(w) for (k, w) in stat.items() if k != "name" and k != "scene_wise"} 125 | 126 | with open(fpath, 'w') as fp: 127 | json.dump(d, fp, indent=4, sort_keys=True) 128 | -------------------------------------------------------------------------------- /model/plenoxel_torch/__global__.py: -------------------------------------------------------------------------------- 1 | BASIS_TYPE_SH = 1 2 | BASIS_TYPE_3D_TEXTURE = 4 3 | BASIS_TYPE_MLP = 255 4 | 5 | 6 | def _get_c_extension(): 7 | from warnings import warn 8 | 9 | try: 10 | import lib.plenoxel as _C 11 | 12 | if not hasattr(_C, "sample_grid"): 13 | _C = None 14 | except: 15 | _C = None 16 | 17 | if _C is None: 18 | warn( 19 | "CUDA extension svox2.csrc could not be loaded! " 20 | + "Operations will be slow.\n" 21 | + "Please do not import svox in the svox2 source directory." 22 | ) 23 | return _C 24 | -------------------------------------------------------------------------------- /model/plenoxel_torch/autograd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.autograd as autograd 3 | import model.plenoxel_torch.utils as utils 4 | 5 | from typing import Tuple 6 | 7 | from model.plenoxel_torch.__global__ import ( 8 | BASIS_TYPE_SH, 9 | _get_c_extension, 10 | BASIS_TYPE_3D_TEXTURE, 11 | BASIS_TYPE_MLP, 12 | ) 13 | 14 | _C = _get_c_extension() 15 | 16 | # BEGIN Differentiable CUDA functions with custom gradient 17 | class _SampleGridAutogradFunction(autograd.Function): 18 | @staticmethod 19 | def forward( 20 | ctx, 21 | data_density: torch.Tensor, 22 | data_sh: torch.Tensor, 23 | grid, 24 | points: torch.Tensor, 25 | want_colors: bool, 26 | ): 27 | assert not points.requires_grad, "Point gradient not supported" 28 | out_density, out_sh = _C.sample_grid(grid, points, want_colors) 29 | ctx.save_for_backward(points) 30 | ctx.grid = grid 31 | ctx.want_colors = want_colors 32 | return out_density, out_sh 33 | 34 | @staticmethod 35 | def backward(ctx, grad_out_density, grad_out_sh): 36 | (points,) = ctx.saved_tensors 37 | grad_density_grid = torch.zeros_like(ctx.grid.density_data.data) 38 | grad_sh_grid = torch.zeros_like(ctx.grid.sh_data.data) 39 | _C.sample_grid_backward( 40 | ctx.grid, 41 | points, 42 | grad_out_density.contiguous(), 43 | grad_out_sh.contiguous(), 44 | grad_density_grid, 45 | grad_sh_grid, 46 | ctx.want_colors, 47 | ) 48 | if not ctx.needs_input_grad[0]: 49 | grad_density_grid = None 50 | if not ctx.needs_input_grad[1]: 51 | grad_sh_grid = None 52 | 53 | return grad_density_grid, grad_sh_grid, None, None, None 54 | 55 | 56 | class _VolumeRenderFunction(autograd.Function): 57 | @staticmethod 58 | def forward( 59 | ctx, 60 | data_density: torch.Tensor, 61 | data_sh: torch.Tensor, 62 | data_basis: torch.Tensor, 63 | data_background: torch.Tensor, 64 | grid, 65 | rays, 66 | opt, 67 | backend: str, 68 | ): 69 | cu_fn = _C.__dict__[f"volume_render_{backend}"] 70 | color, _ = cu_fn( 71 | grid, rays, opt 72 | ) 73 | ctx.save_for_backward(color) 74 | ctx.grid = grid 75 | ctx.rays = rays 76 | ctx.opt = opt 77 | ctx.backend = backend 78 | ctx.basis_data = data_basis 79 | return color 80 | 81 | @staticmethod 82 | def backward(ctx, grad_out): 83 | (color_cache,) = ctx.saved_tensors 84 | cu_fn = _C.__dict__[f"volume_render_{ctx.backend}_backward"] 85 | grad_density_grid = torch.zeros_like(ctx.grid.density_data.data) 86 | grad_sh_grid = torch.zeros_like(ctx.grid.sh_data.data) 87 | if ctx.grid.basis_type == BASIS_TYPE_MLP: 88 | grad_basis = torch.zeros_like(ctx.basis_data) 89 | elif ctx.grid.basis_type == BASIS_TYPE_3D_TEXTURE: 90 | grad_basis = torch.zeros_like(ctx.grid.basis_data.data) 91 | if ctx.grid.background_data is not None: 92 | grad_background = torch.zeros_like(ctx.grid.background_data.data) 93 | grad_holder = _C.GridOutputGrads() 94 | grad_holder.grad_density_out = grad_density_grid 95 | grad_holder.grad_sh_out = grad_sh_grid 96 | if ctx.needs_input_grad[2]: 97 | grad_holder.grad_basis_out = grad_basis 98 | if ctx.grid.background_data is not None and ctx.needs_input_grad[3]: 99 | grad_holder.grad_background_out = grad_background 100 | cu_fn( 101 | ctx.grid, ctx.rays, ctx.opt, grad_out.contiguous(), color_cache, grad_holder 102 | ) 103 | ctx.grid = ctx.rays = ctx.opt = None 104 | if not ctx.needs_input_grad[0]: 105 | grad_density_grid = None 106 | if not ctx.needs_input_grad[1]: 107 | grad_sh_grid = None 108 | if not ctx.needs_input_grad[2]: 109 | grad_basis = None 110 | if not ctx.needs_input_grad[3]: 111 | grad_background = None 112 | ctx.basis_data = None 113 | 114 | return ( 115 | grad_density_grid, 116 | grad_sh_grid, 117 | grad_basis, 118 | grad_background, 119 | None, 120 | None, 121 | None, 122 | None, 123 | ) 124 | 125 | 126 | class _TotalVariationFunction(autograd.Function): 127 | @staticmethod 128 | def forward( 129 | ctx, 130 | data: torch.Tensor, 131 | links: torch.Tensor, 132 | start_dim: int, 133 | end_dim: int, 134 | use_logalpha: bool, 135 | logalpha_delta: float, 136 | ignore_edge: bool, 137 | ndc_coeffs: Tuple[float, float], 138 | ): 139 | tv = _C.tv( 140 | links, 141 | data, 142 | start_dim, 143 | end_dim, 144 | use_logalpha, 145 | logalpha_delta, 146 | ignore_edge, 147 | ndc_coeffs[0], 148 | ndc_coeffs[1], 149 | ) 150 | ctx.save_for_backward(links, data) 151 | ctx.start_dim = start_dim 152 | ctx.end_dim = end_dim 153 | ctx.use_logalpha = use_logalpha 154 | ctx.logalpha_delta = logalpha_delta 155 | ctx.ignore_edge = ignore_edge 156 | ctx.ndc_coeffs = ndc_coeffs 157 | return tv 158 | 159 | @staticmethod 160 | def backward(ctx, grad_out): 161 | links, data = ctx.saved_tensors 162 | grad_grid = torch.zeros_like(data) 163 | _C.tv_grad( 164 | links, 165 | data, 166 | ctx.start_dim, 167 | ctx.end_dim, 168 | 1.0, 169 | ctx.use_logalpha, 170 | ctx.logalpha_delta, 171 | ctx.ignore_edge, 172 | ctx.ndc_coeffs[0], 173 | ctx.ndc_coeffs[1], 174 | grad_grid, 175 | ) 176 | ctx.start_dim = ctx.end_dim = None 177 | if not ctx.needs_input_grad[0]: 178 | grad_grid = None 179 | return grad_grid, None, None, None, None, None, None, None 180 | -------------------------------------------------------------------------------- /model/plenoxel_torch/dataclass.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from random import random 3 | from typing import List, Optional, Tuple, Union 4 | 5 | import torch 6 | 7 | import model.plenoxel_torch.utils as utils 8 | from model.plenoxel_torch.__global__ import _get_c_extension 9 | 10 | _C = _get_c_extension() 11 | 12 | 13 | @dataclass 14 | class RenderOptions: 15 | """ 16 | Rendering options, see comments 17 | available: 18 | :param backend: str, renderer backend 19 | :param background_brightness: float 20 | :param step_size: float, step size for rendering 21 | :param sigma_thresh: float 22 | :param stop_thresh: float 23 | """ 24 | 25 | def __init__( 26 | self, 27 | backend: str = "cuvol", 28 | background_brightness: float = 1.0, 29 | step_size: float = 0.5, 30 | sigma_thresh: float = 1e-10, 31 | stop_thresh: float = 1e-7, 32 | last_sample_opaque: bool = False, 33 | near_clip: float = 0.0, 34 | use_spheric_clip: bool = False, 35 | mask_transmit_threshold = 0.9, 36 | ): 37 | self.backend = backend 38 | self.background_brightness = background_brightness 39 | self.step_size = step_size 40 | self.sigma_thresh = sigma_thresh 41 | self.stop_thresh = stop_thresh 42 | self.last_sample_opaque = last_sample_opaque 43 | self.near_clip = near_clip 44 | self.use_spheric_clip = use_spheric_clip 45 | self.mask_transmit_threshold = mask_transmit_threshold 46 | 47 | def _to_cpp( 48 | self, randomize: bool = False 49 | ): 50 | """ 51 | Generate object to pass to C++ 52 | """ 53 | opt = _C.RenderOptions() 54 | opt.background_brightness = self.background_brightness 55 | opt.step_size = self.step_size 56 | opt.sigma_thresh = self.sigma_thresh 57 | opt.stop_thresh = self.stop_thresh 58 | opt.near_clip = self.near_clip 59 | opt.use_spheric_clip = self.use_spheric_clip 60 | opt.last_sample_opaque = self.last_sample_opaque 61 | opt.mask_transmit_threshold = self.mask_transmit_threshold 62 | 63 | return opt 64 | 65 | 66 | @dataclass 67 | class Rays: 68 | origins: torch.Tensor 69 | dirs: torch.Tensor 70 | 71 | def _to_cpp(self): 72 | """ 73 | Generate object to pass to C++ 74 | """ 75 | spec = _C.RaysSpec() 76 | spec.origins = self.origins 77 | spec.dirs = self.dirs 78 | return spec 79 | 80 | def __getitem__(self, key): 81 | return Rays(self.origins[key], self.dirs[key]) 82 | 83 | @property 84 | def is_cuda(self) -> bool: 85 | return self.origins.is_cuda and self.dirs.is_cuda 86 | 87 | 88 | @dataclass 89 | class Camera: 90 | c2w: torch.Tensor # OpenCV 91 | fx: float = 1111.11 92 | fy: Optional[float] = None 93 | cx: Optional[float] = None 94 | cy: Optional[float] = None 95 | width: int = 800 96 | height: int = 800 97 | 98 | ndc_coeffs: Union[Tuple[float, float], List[float]] = (-1.0, -1.0) 99 | 100 | @property 101 | def fx_val(self): 102 | return self.fx 103 | 104 | @property 105 | def fy_val(self): 106 | return self.fx if self.fy is None else self.fy 107 | 108 | @property 109 | def cx_val(self): 110 | return self.width * 0.5 if self.cx is None else self.cx 111 | 112 | @property 113 | def cy_val(self): 114 | return self.height * 0.5 if self.cy is None else self.cy 115 | 116 | @property 117 | def using_ndc(self): 118 | return self.ndc_coeffs[0] > 0.0 119 | 120 | def _to_cpp(self): 121 | """ 122 | Generate object to pass to C++ 123 | """ 124 | spec = _C.CameraSpec() 125 | spec.c2w = self.c2w.float() 126 | spec.fx = float(self.fx_val) 127 | spec.fy = float(self.fy_val) 128 | spec.cx = float(self.cx_val) 129 | spec.cy = float(self.cy_val) 130 | spec.width = int(self.width) 131 | spec.height = int(self.height) 132 | spec.ndc_coeffx = float(self.ndc_coeffs[0]) 133 | spec.ndc_coeffy = float(self.ndc_coeffs[1]) 134 | return spec 135 | 136 | @property 137 | def is_cuda(self) -> bool: 138 | return self.c2w.is_cuda 139 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import shutil 5 | from typing import * 6 | 7 | import gin 8 | import pytorch_lightning.loggers as pl_loggers 9 | import torch 10 | from pytorch_lightning import Trainer, seed_everything 11 | from pytorch_lightning.callbacks import ( 12 | LearningRateMonitor, 13 | ModelCheckpoint, 14 | TQDMProgressBar, 15 | ) 16 | from pytorch_lightning.plugins import DDPPlugin 17 | 18 | from utils.logger import RetryingWandbLogger 19 | from utils.select_option import select_callback, select_dataset, select_model 20 | 21 | 22 | def str2bool(v): 23 | if isinstance(v, bool): 24 | return v 25 | if v.lower() in ("yes", "true", "t", "y", "1"): 26 | return True 27 | elif v.lower() in ("no", "false", "f", "n", "0"): 28 | return False 29 | else: 30 | raise Exception("Boolean value expected.") 31 | 32 | 33 | @gin.configurable() 34 | def run( 35 | resume_training: bool = False, 36 | ckpt_path: Optional[str] = None, 37 | datadir: Optional[str] = None, 38 | logbase: Optional[str] = None, 39 | scene_name: Optional[str] = None, 40 | model_name: Optional[str] = None, 41 | proj_name: Optional[str] = None, 42 | dataset_name: Optional[str] = None, 43 | postfix: Optional[str] = None, 44 | entity: Optional[str] = None, 45 | # Optimization 46 | max_steps: int = 200000, 47 | precision: int = 32, 48 | # Logging 49 | log_every_n_steps: int = 1000, 50 | progressbar_refresh_rate: int = 5, 51 | # Run Mode 52 | run_train: bool = True, 53 | run_eval: bool = True, 54 | run_render: bool = False, 55 | accelerator: str = "gpu", 56 | num_gpus: Optional[int] = 1, 57 | num_tpus: Optional[int] = None, 58 | num_sanity_val_steps: int = 0, 59 | seed: int = 777, 60 | debug: bool = False, 61 | save_last_only: bool = False, 62 | check_val_every_n_epoch: int = 1, 63 | ): 64 | 65 | logging.getLogger("lightning").setLevel(logging.ERROR) 66 | datadir = datadir.rstrip("/") 67 | 68 | if scene_name is None and dataset_name == "co3d": 69 | scene_name = "349_36520_66801" 70 | 71 | if scene_name is None and dataset_name == "scannet": 72 | scene_name = "scene0000_00" 73 | 74 | exp_name = model_name + "_" + dataset_name + "_" + scene_name 75 | if postfix is not None: 76 | exp_name += "_" + str(postfix) 77 | if debug: 78 | exp_name += "_debug" 79 | 80 | if num_gpus is None: 81 | num_gpus = torch.cuda.device_count() 82 | 83 | os.makedirs(logbase, exist_ok=True) 84 | logdir = os.path.join(logbase, exp_name) 85 | 86 | os.makedirs(logdir, exist_ok=True) 87 | 88 | # WANDB fails when using TPUs 89 | wandb_logger = ( 90 | RetryingWandbLogger( 91 | name=exp_name, 92 | entity=entity, 93 | project=model_name if proj_name is None else proj_name, 94 | ) 95 | if accelerator == "gpu" 96 | else pl_loggers.TensorBoardLogger(save_dir=logdir, name=exp_name) 97 | ) 98 | 99 | seed_everything(seed, workers=True) 100 | 101 | lr_monitor = LearningRateMonitor(logging_interval="step") 102 | model_checkpoint = ModelCheckpoint( 103 | monitor="val/psnr", 104 | dirpath=logdir, 105 | filename="best", 106 | save_top_k=1, 107 | mode="max", 108 | save_last=save_last_only, 109 | ) 110 | tqdm_progrss = TQDMProgressBar(refresh_rate=progressbar_refresh_rate) 111 | 112 | callbacks = [lr_monitor, model_checkpoint, tqdm_progrss] 113 | callbacks += select_callback(model_name) 114 | 115 | trainer = Trainer( 116 | logger=wandb_logger if run_train or run_render else None, 117 | log_every_n_steps=log_every_n_steps, 118 | devices=num_gpus, 119 | max_steps=max_steps, 120 | replace_sampler_ddp=False, 121 | check_val_every_n_epoch=check_val_every_n_epoch, 122 | precision=precision, 123 | accelerator="gpu", 124 | num_sanity_val_steps=num_sanity_val_steps, 125 | callbacks=callbacks, 126 | ) 127 | 128 | if resume_training: 129 | if ckpt_path is None: 130 | ckpt_path = f"{logdir}/last.ckpt" 131 | 132 | data_module = select_dataset( 133 | dataset_name=dataset_name, 134 | scene_name=scene_name, 135 | datadir=datadir, 136 | accelerator="gpu", 137 | num_gpus=num_gpus, 138 | num_tpus=num_tpus, 139 | ) 140 | model = select_model(model_name=model_name) 141 | model.logdir = logdir 142 | if run_train: 143 | trainer.fit(model, data_module, ckpt_path=ckpt_path) 144 | if save_last_only: 145 | best_ckpt = os.path.join(logdir, "best.ckpt") 146 | if os.path.exists(best_ckpt): 147 | os.remove(best_ckpt) 148 | ckpt_path = f"{logdir}/best.ckpt" if not save_last_only else f"{logdir}/last.ckpt" 149 | if run_eval: 150 | trainer.test(model, data_module, ckpt_path=ckpt_path) 151 | 152 | if run_render: 153 | trainer.predict(model, data_module, ckpt_path=ckpt_path) 154 | 155 | 156 | if __name__ == "__main__": 157 | parser = argparse.ArgumentParser() 158 | parser.add_argument( 159 | "--ginc", 160 | action="append", 161 | help="gin config file", 162 | ) 163 | parser.add_argument( 164 | "--ginb", 165 | action="append", 166 | help="gin bindings", 167 | ) 168 | parser.add_argument( 169 | "--resume_training", 170 | type=str2bool, 171 | nargs="?", 172 | const=True, 173 | default=False, 174 | help="gin bindings", 175 | ) 176 | parser.add_argument( 177 | "--ckpt_path", 178 | type=str, 179 | default=None, 180 | help="path to checkpoints", 181 | ) 182 | parser.add_argument( 183 | "--scene_name", 184 | type=str, 185 | default=None, 186 | help="scene name", 187 | ) 188 | parser.add_argument( 189 | "--entity", 190 | type=str, 191 | default=None, 192 | help="entity", 193 | ) 194 | args = parser.parse_args() 195 | 196 | ginbs = [] 197 | if args.ginb: 198 | ginbs.extend(args.ginb) 199 | logging.info(f"Gin configuration files: {args.ginc}") 200 | logging.info(f"Gin bindings: {ginbs}") 201 | 202 | gin.parse_config_files_and_bindings(args.ginc, ginbs) 203 | run( 204 | resume_training=args.resume_training, 205 | ckpt_path=args.ckpt_path, 206 | scene_name=args.scene_name, 207 | entity=args.entity, 208 | ) 209 | -------------------------------------------------------------------------------- /sbatch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | #SBATCH -J icn # Job name 4 | #SBATCH -o sbatch_log/pytorch-1gpu.%j.out # Name of stdout output file (%j expands to %jobId) 5 | #SBATCH -p A100 # queue name or partiton name titanxp/titanrtx/2080ti 6 | #SBATCH -t 3-00:00:00 # Run time (hh:mm:ss) - 1.5 hours 7 | #SBATCH --gres=gpu:1 # number of gpus you want to use 8 | 9 | #SBATCH --nodes=1 10 | ##SBATCH --exclude=n13 11 | ##SBTACH --nodelist=n12 12 | 13 | ##SBTACH --ntasks=1 14 | ##SBATCH --tasks-per-node=1 15 | ##SBATCH --cpus-per-task=1 16 | 17 | cd $SLURM_SUBMIT_DIR 18 | 19 | echo "SLURM_SUBMIT_DIR=$SLURM_SUBMIT_DIR" 20 | echo "CUDA_HOME=$CUDA_HOME" 21 | echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" 22 | echo "CUDA_VERSION=$CUDA_VERSION" 23 | 24 | srun -l /bin/hostname 25 | srun -l /bin/pwd 26 | srun -l /bin/date 27 | 28 | module purge 29 | 30 | echo "Start" 31 | export NCCL_NSOCKS_PERTHREAD=4 32 | export NCCL_SOCKET_NTHREADS=2 33 | export WANDB_SPAWN_METHOD=fork 34 | 35 | 36 | nvidia-smi 37 | date 38 | squeue --job $SLURM_JOBID 39 | 40 | echo "##### END #####" -------------------------------------------------------------------------------- /script/collage.sh: -------------------------------------------------------------------------------- 1 | python3 -m run --ginc configs/co3d.gin --scene_name 290_30780_59102 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 2 | python3 -m run --ginc configs/co3d.gin --scene_name 349_36520_66801 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 3 | python3 -m run --ginc configs/co3d.gin --scene_name 12_109_707 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 4 | python3 -m run --ginc configs/co3d.gin --scene_name 423_58951_114401 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 5 | python3 -m run --ginc configs/co3d.gin --scene_name 412_56309_109304 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 6 | python3 -m run --ginc configs/co3d.gin --scene_name 236_24789_51101 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 7 | python3 -m run --ginc configs/co3d.gin --scene_name 373_41487_82902 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 8 | python3 -m run --ginc configs/co3d.gin --scene_name 386_46018_91908 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 9 | python3 -m run --ginc configs/co3d.gin --scene_name 374_41919_83809 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 10 | python3 -m run --ginc configs/co3d.gin --scene_name 402_52411_102607 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 11 | python3 -m run --ginc configs/co3d.gin --scene_name 197_21206_41908 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 12 | python3 -m run --ginc configs/co3d.gin --scene_name 47_2694_7604 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 13 | python3 -m run --ginc configs/co3d.gin --scene_name 430_60724_119000 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 14 | python3 -m run --ginc configs/co3d.gin --scene_name 399_51048_100100 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 15 | python3 -m run --ginc configs/co3d.gin --scene_name 396_49638_97805 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 16 | python3 -m run --ginc configs/co3d.gin --scene_name 399_51079_100203 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 17 | python3 -m run --ginc configs/co3d.gin --scene_name 414_56915_110208 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 18 | python3 -m run --ginc configs/co3d.gin --scene_name 395_49190_97202 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 19 | python3 -m run --ginc configs/co3d.gin --scene_name 163_17880_33208 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 20 | python3 -m run --ginc configs/co3d.gin --scene_name 397_50080_98510 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 21 | python3 -m run --ginc configs/co3d.gin --scene_name 396_49564_97706 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 22 | python3 -m run --ginc configs/co3d.gin --scene_name 416_57463_111200 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 23 | python3 -m run --ginc configs/co3d.gin --scene_name 386_46212_92202 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 24 | python3 -m run --ginc configs/co3d.gin --scene_name 117_13756_28310 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 25 | python3 -m run --ginc configs/co3d.gin --scene_name 374_41996_84006 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 26 | python3 -m run --ginc configs/co3d.gin --scene_name 411_56010_108204 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 27 | python3 -m run --ginc configs/co3d.gin --scene_name 391_46910_93404 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 28 | python3 -m run --ginc configs/co3d.gin --scene_name 399_50932_99902 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 29 | python3 -m run --ginc configs/co3d.gin --scene_name 424_59119_114602 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 30 | python3 -m run --ginc configs/co3d.gin --scene_name 58_3355_10203 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 31 | python3 -m run --ginc configs/co3d.gin --scene_name 153_16983_31803 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 32 | python3 -m run --ginc configs/co3d.gin --scene_name 262_28099_53708 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 33 | python3 -m run --ginc configs/co3d.gin --scene_name 378_44212_88104 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 34 | python3 -m run --ginc configs/co3d.gin --scene_name 395_49194_97208 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 35 | python3 -m run --ginc configs/co3d.gin --scene_name 385_45782_91506 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 36 | python3 -m run --ginc configs/co3d.gin --scene_name 386_45955_91804 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 37 | python3 -m run --ginc configs/co3d.gin --scene_name 380_44942_90000 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 38 | python3 -m run --ginc configs/co3d.gin --scene_name 31_1375_4208 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 39 | python3 -m run --ginc configs/co3d.gin --scene_name 216_22866_49900 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 40 | python3 -m run --ginc configs/co3d.gin --scene_name 385_45373_90905 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 41 | python3 -m run --ginc configs/co3d.gin --scene_name 372_41093_82000 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 42 | python3 -m run --ginc configs/co3d.gin --scene_name 70_5758_13307 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 43 | python3 -m run --ginc configs/co3d.gin --scene_name 40_1899_5810 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 44 | python3 -m run --ginc configs/co3d.gin --scene_name 366_39368_76707 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 45 | python3 -m run --ginc configs/co3d.gin --scene_name 397_49988_98402 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 46 | python3 -m run --ginc configs/co3d.gin --scene_name 106_12689_26708 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 47 | python3 -m run --ginc configs/co3d.gin --scene_name 396_49566_97710 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 48 | python3 -m run --ginc configs/co3d.gin --scene_name 351_37059_67904 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 49 | python3 -m run --ginc configs/co3d.gin --scene_name 165_18081_34406 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 50 | python3 -m run --ginc configs/co3d.gin --scene_name 28_967_2810 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True 51 | -------------------------------------------------------------------------------- /script/v1.sh: -------------------------------------------------------------------------------- 1 | python3 -m run --ginc configs/co3d.gin --scene_name 131_15149_29891 --ginb run.postfix=\"v1\" 2 | python3 -m run --ginc configs/co3d.gin --scene_name 113_13363_23419 --ginb run.postfix=\"v1\" 3 | python3 -m run --ginc configs/co3d.gin --scene_name 255_27516_55384 --ginb run.postfix=\"v1\" 4 | python3 -m run --ginc configs/co3d.gin --scene_name 28_991_2996 --ginb run.postfix=\"v1\" 5 | python3 -m run --ginc configs/co3d.gin --scene_name 349_36520_66801 --ginb run.postfix=\"v1\" -------------------------------------------------------------------------------- /script/v2.sh: -------------------------------------------------------------------------------- 1 | 2 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 131_15149_29891 3 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 113_13363_23419 4 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 255_27516_5538 5 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 28_991_2996 6 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 349_36520_66801 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | import os.path as osp 4 | import warnings 5 | 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | ROOT_DIR = osp.dirname(osp.abspath(__file__)) 9 | 10 | __version__ = None 11 | exec(open('lib/plenoxel/version.py', 'r').read()) 12 | 13 | CUDA_FLAGS = [] 14 | INSTALL_REQUIREMENTS = [] 15 | include_dirs = [osp.join(ROOT_DIR, "lib", "plenoxel", "include")] 16 | 17 | # From PyTorch3D 18 | cub_home = os.environ.get("CUB_HOME", None) 19 | if cub_home is None: 20 | prefix = os.environ.get("CONDA_PREFIX", None) 21 | if prefix is not None and os.path.isdir(prefix + "/include/cub"): 22 | cub_home = prefix + "/include" 23 | 24 | if cub_home is None: 25 | warnings.warn( 26 | "The environment variable `CUB_HOME` was not found." 27 | "Installation will fail if your system CUDA toolkit version is less than 11." 28 | "NVIDIA CUB can be downloaded " 29 | "from `https://github.com/NVIDIA/cub/releases`. You can unpack " 30 | "it to a location of your choice and set the environment variable " 31 | "`CUB_HOME` to the folder containing the `CMakeListst.txt` file." 32 | ) 33 | else: 34 | include_dirs.append(os.path.realpath(cub_home).replace("\\ ", " ")) 35 | 36 | try: 37 | ext_modules = [ 38 | CUDAExtension('lib.plenoxel', [ 39 | 'lib/plenoxel/svox2.cpp', 40 | 'lib/plenoxel/svox2_kernel.cu', 41 | 'lib/plenoxel/render_lerp_kernel_cuvol.cu', 42 | 'lib/plenoxel/render_lerp_kernel_nvol.cu', 43 | 'lib/plenoxel/render_svox1_kernel.cu', 44 | 'lib/plenoxel/misc_kernel.cu', 45 | 'lib/plenoxel/loss_kernel.cu', 46 | 'lib/plenoxel/optim_kernel.cu', 47 | ], include_dirs=include_dirs, 48 | optional=False), 49 | ] 50 | except: 51 | import warnings 52 | warnings.warn("Failed to build CUDA extension") 53 | ext_modules = [] 54 | 55 | setup( 56 | name='plenoxel', 57 | version=__version__, 58 | author='Alex Yu', 59 | author_email='alexyu99126@gmail.com', 60 | description='PyTorch sparse voxel volume extension, including custom CUDA kernels', 61 | long_description='PyTorch sparse voxel volume extension, including custom CUDA kernels', 62 | ext_modules=ext_modules, 63 | setup_requires=['pybind11>=2.5.0'], 64 | packages=['lib.plenoxel'], 65 | cmdclass={'build_ext': BuildExtension}, 66 | zip_safe=False, 67 | ) 68 | -------------------------------------------------------------------------------- /utils/SensorData.py: -------------------------------------------------------------------------------- 1 | import os 2 | import struct 3 | import zlib 4 | 5 | import cv2 6 | import imageio 7 | import numpy as np 8 | import png 9 | 10 | COMPRESSION_TYPE_COLOR = {-1: "unknown", 0: "raw", 1: "png", 2: "jpeg"} 11 | COMPRESSION_TYPE_DEPTH = { 12 | -1: "unknown", 13 | 0: "raw_ushort", 14 | 1: "zlib_ushort", 15 | 2: "occi_ushort", 16 | } 17 | 18 | 19 | class RGBDFrame: 20 | def load(self, file_handle): 21 | self.camera_to_world = np.asarray( 22 | struct.unpack("f" * 16, file_handle.read(16 * 4)), dtype=np.float32 23 | ).reshape(4, 4) 24 | self.timestamp_color = struct.unpack("Q", file_handle.read(8))[0] 25 | self.timestamp_depth = struct.unpack("Q", file_handle.read(8))[0] 26 | self.color_size_bytes = struct.unpack("Q", file_handle.read(8))[0] 27 | self.depth_size_bytes = struct.unpack("Q", file_handle.read(8))[0] 28 | self.color_data = b"".join( 29 | struct.unpack( 30 | "c" * self.color_size_bytes, file_handle.read(self.color_size_bytes) 31 | ) 32 | ) 33 | self.depth_data = b"".join( 34 | struct.unpack( 35 | "c" * self.depth_size_bytes, file_handle.read(self.depth_size_bytes) 36 | ) 37 | ) 38 | 39 | def decompress_depth(self, compression_type): 40 | if compression_type == "zlib_ushort": 41 | return self.decompress_depth_zlib() 42 | else: 43 | raise 44 | 45 | def decompress_depth_zlib(self): 46 | return zlib.decompress(self.depth_data) 47 | 48 | def decompress_color(self, compression_type): 49 | if compression_type == "jpeg": 50 | return self.decompress_color_jpeg() 51 | else: 52 | raise 53 | 54 | def decompress_color_jpeg(self): 55 | return imageio.imread(self.color_data) 56 | 57 | 58 | class SensorData: 59 | def __init__(self, filename): 60 | self.version = 4 61 | self.load(filename) 62 | 63 | def load(self, filename): 64 | with open(filename, "rb") as f: 65 | version = struct.unpack("I", f.read(4))[0] 66 | assert self.version == version 67 | strlen = struct.unpack("Q", f.read(8))[0] 68 | self.sensor_name = b"".join(struct.unpack("c" * strlen, f.read(strlen))) 69 | self.intrinsic_color = np.asarray( 70 | struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 71 | ).reshape(4, 4) 72 | self.extrinsic_color = np.asarray( 73 | struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 74 | ).reshape(4, 4) 75 | self.intrinsic_depth = np.asarray( 76 | struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 77 | ).reshape(4, 4) 78 | self.extrinsic_depth = np.asarray( 79 | struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 80 | ).reshape(4, 4) 81 | self.color_compression_type = COMPRESSION_TYPE_COLOR[ 82 | struct.unpack("i", f.read(4))[0] 83 | ] 84 | self.depth_compression_type = COMPRESSION_TYPE_DEPTH[ 85 | struct.unpack("i", f.read(4))[0] 86 | ] 87 | self.color_width = struct.unpack("I", f.read(4))[0] 88 | self.color_height = struct.unpack("I", f.read(4))[0] 89 | self.depth_width = struct.unpack("I", f.read(4))[0] 90 | self.depth_height = struct.unpack("I", f.read(4))[0] 91 | self.depth_shift = struct.unpack("f", f.read(4))[0] 92 | num_frames = struct.unpack("Q", f.read(8))[0] 93 | self.frames = [] 94 | for i in range(num_frames): 95 | frame = RGBDFrame() 96 | frame.load(f) 97 | self.frames.append(frame) 98 | 99 | def export_depth_images(self, output_path, image_size=None, frame_skip=1): 100 | if not os.path.exists(output_path): 101 | os.makedirs(output_path) 102 | print( 103 | "exporting", len(self.frames) // frame_skip, " depth frames to", output_path 104 | ) 105 | for f in range(0, len(self.frames), frame_skip): 106 | depth_data = self.frames[f].decompress_depth(self.depth_compression_type) 107 | depth = np.fromstring(depth_data, dtype=np.uint16).reshape( 108 | self.depth_height, self.depth_width 109 | ) 110 | if image_size is not None: 111 | depth = cv2.resize( 112 | depth, 113 | (image_size[1], image_size[0]), 114 | interpolation=cv2.INTER_NEAREST, 115 | ) 116 | # imageio.imwrite(os.path.join(output_path, str(f) + '.png'), depth) 117 | with open( 118 | os.path.join(output_path, f"{f:04d}" + ".png"), "wb" 119 | ) as f: # write 16-bit 120 | writer = png.Writer( 121 | width=depth.shape[1], height=depth.shape[0], bitdepth=16 122 | ) 123 | depth = depth.reshape(-1, depth.shape[1]).tolist() 124 | writer.write(f, depth) 125 | 126 | def export_color_images(self, output_path, image_size=None, frame_skip=1): 127 | if not os.path.exists(output_path): 128 | os.makedirs(output_path) 129 | print( 130 | "exporting", len(self.frames) // frame_skip, "color frames to", output_path 131 | ) 132 | for f in range(0, len(self.frames), frame_skip): 133 | color = self.frames[f].decompress_color(self.color_compression_type) 134 | if image_size is not None: 135 | color = cv2.resize( 136 | color, 137 | (image_size[1], image_size[0]), 138 | interpolation=cv2.INTER_NEAREST, 139 | ) 140 | imageio.imwrite(os.path.join(output_path, f"{f:04d}" + ".jpg"), color) 141 | 142 | def save_mat_to_file(self, matrix, filename): 143 | with open(filename, "w") as f: 144 | for line in matrix: 145 | np.savetxt(f, line[np.newaxis], fmt="%f") 146 | 147 | def export_poses(self, output_path, frame_skip=1): 148 | if not os.path.exists(output_path): 149 | os.makedirs(output_path) 150 | print( 151 | "exporting", len(self.frames) // frame_skip, "camera poses to", output_path 152 | ) 153 | for f in range(0, len(self.frames), frame_skip): 154 | self.save_mat_to_file( 155 | self.frames[f].camera_to_world, 156 | os.path.join(output_path, f"{f:04d}" + ".txt"), 157 | ) 158 | 159 | def export_intrinsics(self, output_path, image_size=None): 160 | if not os.path.exists(output_path): 161 | os.makedirs(output_path) 162 | intrinsic_color = self.intrinsic_color 163 | if image_size is not None: 164 | resize_scale = max( 165 | image_size[0] / self.color_height, image_size[1] / self.color_width 166 | ) 167 | intrinsic_color *= resize_scale 168 | intrinsic_color[[2, 3], [2, 3]] = 1 169 | print("exporting camera intrinsics to", output_path) 170 | self.save_mat_to_file( 171 | intrinsic_color, os.path.join(output_path, "intrinsic_color.txt") 172 | ) 173 | self.save_mat_to_file( 174 | self.extrinsic_color, os.path.join(output_path, "extrinsic_color.txt") 175 | ) 176 | self.save_mat_to_file( 177 | self.intrinsic_depth, os.path.join(output_path, "intrinsic_depth.txt") 178 | ) 179 | self.save_mat_to_file( 180 | self.extrinsic_depth, os.path.join(output_path, "extrinsic_depth.txt") 181 | ) 182 | -------------------------------------------------------------------------------- /utils/download_perf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import os 4 | 5 | curl_v1 = "https://storage.live.com/downloadfiles/V1/Zip?&authkey=AM3Xv7w16oqDDv0&application=1141147648" 6 | data_raw_format_v1 = "resIds=9C85B2C346F440CF%{}&canary=YcSqFbuNcf7ZJ8hEk3EWehB7amZnmUeDmCaKX9bO%2FeQ%3D2&authkey=AM3Xv7w16oqDDv0" 7 | 8 | # 9 | 10 | curl_v2 = "https://storage.live.com/downloadfiles/V1/Zip?authKey=%21ACaUbVBSIuDvCrI&application=1141147648" 11 | data_raw_format_v2 = "resIds=60A1A318FA7A3606%{}&canary=LerJFOBG2LJm%2FTP%2BoThDzUjrn%2BnHeGoiRiam4wV0IpA%3D8&authkey=%21ACaUbVBSIuDvCrI" 12 | 13 | def download(args): 14 | 15 | assert args.dataset in ["co3d", "scannet"] 16 | 17 | if args.dataset == "co3d": 18 | if args.chunks is None: 19 | chunks = [str(i).zfill(2) for i in range(100)] 20 | else: 21 | chunks = args.chunks.lstrip("[").rstrip("]").split(",") 22 | 23 | for chunk in chunks: 24 | chunk = chunk.zfill(2) 25 | chunk_int = int(chunk) 26 | outpath = os.path.join(args.outdir, chunk + ".zip") 27 | if chunk_int > 75: 28 | data_raw = data_raw_format_v2.format(str(211419 - 76 + chunk_int)) 29 | curl = curl_v2 30 | else: 31 | data_raw = data_raw_format_v1.format(str(21111 - 00 + chunk_int)) 32 | curl = curl_v1 33 | 34 | run_str = f"curl -L \"{curl}\" --data-raw \"{data_raw}\" --compressed --output {outpath}" 35 | print("Running:", run_str) 36 | os.system(run_str) 37 | 38 | if __name__ == "__main__": 39 | 40 | parser = argparse.ArgumentParser() 41 | parser.add_argument( 42 | "--dataset", 43 | type=str, 44 | choices = ["co3d", "scannet"] 45 | ) 46 | parser.add_argument( 47 | "--chunks", 48 | type=str, 49 | default=None 50 | ) 51 | parser.add_argument( 52 | "--outdir", 53 | type=str, 54 | default="." 55 | ) 56 | args = parser.parse_args() 57 | 58 | download(args) -------------------------------------------------------------------------------- /utils/extract_scannet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | 5 | from utils.SensorData import SensorData 6 | 7 | # params 8 | parser = argparse.ArgumentParser() 9 | # data paths 10 | parser.add_argument("--input_path", required=True, help="path to sens file to read") 11 | parser.add_argument("--output_path", required=True, help="path to output folder") 12 | parser.add_argument( 13 | "--export_depth_images", dest="export_depth_images", action="store_true" 14 | ) 15 | parser.add_argument( 16 | "--export_color_images", dest="export_color_images", action="store_true" 17 | ) 18 | parser.add_argument("--export_poses", dest="export_poses", action="store_true") 19 | parser.add_argument( 20 | "--export_intrinsics", dest="export_intrinsics", action="store_true" 21 | ) 22 | parser.set_defaults( 23 | export_depth_images=False, 24 | export_color_images=False, 25 | export_poses=False, 26 | export_intrinsics=False, 27 | ) 28 | 29 | opt = parser.parse_args() 30 | print(opt) 31 | 32 | 33 | def main(scene_name): 34 | print(f"processing {scene_name}") 35 | sens_file = os.path.join(opt.input_path, scene_name, f"{scene_name}.sens") 36 | outpath = os.path.join(opt.output_path, scene_name) 37 | if not os.path.exists(outpath): 38 | os.makedirs(outpath) 39 | 40 | # load the data 41 | imsize = [480, 640] 42 | sys.stdout.write("loading %s..." % sens_file) 43 | sd = SensorData(sens_file) 44 | sys.stdout.write("loaded!\n") 45 | if opt.export_depth_images: 46 | sd.export_depth_images(os.path.join(outpath, "depth"), image_size=imsize) 47 | if opt.export_color_images: 48 | sd.export_color_images(os.path.join(outpath, "color"), image_size=imsize) 49 | if opt.export_poses: 50 | sd.export_poses(os.path.join(outpath, "pose")) 51 | if opt.export_intrinsics: 52 | sd.export_intrinsics(os.path.join(outpath, "intrinsic"), image_size=imsize) 53 | 54 | 55 | if __name__ == "__main__": 56 | from multiprocessing import Pool 57 | 58 | scene_names = os.listdir("/root/data/scannet/scans") 59 | scene_names = sorted(scene_names) 60 | 61 | pool = Pool(processes=16) 62 | pool.map(main, scene_names) 63 | pool.close() 64 | pool.join() 65 | -------------------------------------------------------------------------------- /utils/extract_stats.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | 5 | import numpy as np 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument( 10 | "--datadir", default=".", type=str, help="path to the directory" 11 | ) 12 | args = parser.parse_args() 13 | 14 | scenes = os.listdir(args.datadir) 15 | print(f"Total {len(scenes)} scenes from {args.datadir}") 16 | psnr, ssim, lpips = [], [], [] 17 | 18 | results, missing = [], [] 19 | for scene in scenes: 20 | json_file = os.path.join(args.datadir, scene, "results.json") 21 | if not os.path.exists(json_file): 22 | print(f"Not exist: {json_file}") 23 | missing.append(scene) 24 | continue 25 | else: 26 | with open(json_file, "r") as f: 27 | data = json.load(f) 28 | results.append( 29 | dict( 30 | scene=scene, 31 | psnr=data["PSNR"]["mean"], 32 | ssim=data["SSIM"]["mean"], 33 | lpips=data["LPIPS"]["mean"], 34 | ) 35 | ) 36 | 37 | score_name = ("psnr", "ssim", "lpips") 38 | 39 | for name in score_name: 40 | # print(f"{name} : {np.array(eval(name)).mean()}") 41 | metrics = np.array([r[name] for r in results]) 42 | metrics = metrics[~np.isnan(metrics)] 43 | if name == "psnr": 44 | print( 45 | f"{name:>5}: {np.mean(metrics):.3f}+-{np.std(metrics):.3f}, > 15: {np.mean(metrics > 15):.3f}, > 20: {np.mean(metrics > 20):.3f}, > 25: {np.mean(metrics > 25):.3f}, max: {np.max(metrics):.3f}, min: {np.min(metrics):.3f}, 25th percentile: {np.percentile(metrics, 25):.3f}, 50th percentile: {np.percentile(metrics, 50):.3f}, 75th percentile: {np.percentile(metrics, 75):.3f}, 90th: {np.percentile(metrics, 90):.3f}, 95th: {np.percentile(metrics, 95):.3f}" 46 | ) 47 | else: 48 | print( 49 | f"avg {name:>5}: {np.mean(metrics):.3f}, max: {np.max(metrics):.3f}, min: {np.min(metrics):.3f}" 50 | ) 51 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import wandb 5 | from pytorch_lightning.loggers import WandbLogger 6 | from pytorch_lightning.loggers.base import rank_zero_experiment 7 | from wandb.wandb_run import Run 8 | 9 | MAX_RETRY = 100 10 | 11 | 12 | class RetryingWandbLogger(WandbLogger): 13 | @property 14 | @rank_zero_experiment 15 | def experiment(self) -> Run: 16 | r""" 17 | Actual wandb object. To use wandb features in your 18 | :class:`~pytorch_lightning.core.lightning.LightningModule` do the following. 19 | Example:: 20 | self.logger.experiment.some_wandb_function() 21 | """ 22 | if self._experiment is None: 23 | if self._offline: 24 | os.environ["WANDB_MODE"] = "dryrun" 25 | 26 | print("Initializing wandb") 27 | for i in range(MAX_RETRY): 28 | try: 29 | self._experiment = wandb.init( 30 | **self._wandb_init, 31 | ) 32 | break 33 | except ( 34 | TimeoutError, 35 | ConnectionError, 36 | wandb.errors.UsageError, 37 | wandb.errors.CommError, 38 | ) as e: 39 | print(f"Error {e}. Retrying in 5 sec") 40 | time.sleep(5) 41 | 42 | # save checkpoints in wandb dir to upload on W&B servers 43 | if self._log_model: 44 | self._save_dir = self._experiment.dir 45 | return self._experiment -------------------------------------------------------------------------------- /utils/notebooks/co3d_lists.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Generating Co3D Data List" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The code below loads whole Co3D dataset to " 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 5, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "path_to_co3d = \"/home/yoonwoo/data/perfception/co3d\"" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 6, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import json\n", 33 | "import os \n", 34 | "project_path = \"../..\"" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 7, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "frame_num_mapper = {}\n", 44 | "\n", 45 | "cls_list = [cls_name for cls_name in sorted(os.listdir(path_to_co3d))]\n", 46 | "for cls_name in cls_list: \n", 47 | " cls_path = os.path.join(path_to_co3d, cls_name)\n", 48 | " for frame_num in sorted(os.listdir(cls_path)):\n", 49 | " frame_path = os.path.join(cls_path, frame_num)\n", 50 | " if not os.path.isdir(frame_path): \n", 51 | " continue\n", 52 | " frame_num_mapper[frame_num] = cls_name" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 8, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\", \"w\") as fp:\n", 62 | " json.dump(frame_num_mapper, fp, indent=4, sort_keys=True)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 18, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "import json\n", 72 | "import yaml\n", 73 | "\n", 74 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\") as fp:\n", 75 | " co3d_lists = json.load(fp)\n", 76 | "\n", 77 | "co3d_scene_lists = sorted(co3d_lists.keys())\n", 78 | "\n", 79 | "def generate_sweep_co3d():\n", 80 | " params = dict()\n", 81 | " params[\"scene_name\"] = dict(values=co3d_scene_lists)\n", 82 | " params[\"entity\"] = dict(value=\"postech_cvlab\")\n", 83 | " config = dict()\n", 84 | " config[\"method\"] = \"grid\"\n", 85 | " config[\"program\"] = \"run.py\"\n", 86 | " config[\"parameters\"] = params\n", 87 | " with open(\"../../cache/co3d.yaml\", \"w\") as fp:\n", 88 | " yaml.dump(config, fp)\n", 89 | "\n", 90 | "generate_sweep_co3d()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [] 99 | } 100 | ], 101 | "metadata": { 102 | "kernelspec": { 103 | "display_name": "Python 3.8.10 64-bit", 104 | "language": "python", 105 | "name": "python3" 106 | }, 107 | "language_info": { 108 | "codemirror_mode": { 109 | "name": "ipython", 110 | "version": 3 111 | }, 112 | "file_extension": ".py", 113 | "mimetype": "text/x-python", 114 | "name": "python", 115 | "nbconvert_exporter": "python", 116 | "pygments_lexer": "ipython3", 117 | "version": "3.8.10" 118 | }, 119 | "orig_nbformat": 4, 120 | "vscode": { 121 | "interpreter": { 122 | "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" 123 | } 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 2 128 | } 129 | -------------------------------------------------------------------------------- /utils/notebooks/collect_results.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 26, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import json\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 27, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "path_to_co3d_perf = \"/home/yoonwoo/data/perfception/perfception_co3d\"" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 28, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "psnr, ssim, lpips = [], [], []\n", 30 | "nan = []\n", 31 | "missing = []\n", 32 | "\n", 33 | "for scene in sorted(os.listdir(path_to_co3d_perf)):\n", 34 | " scene_path = os.path.join(path_to_co3d_perf, scene)\n", 35 | " result_path = os.path.join(scene_path, \"results.json\")\n", 36 | " if os.path.exists(result_path):\n", 37 | " with open(result_path, \"r\") as fp:\n", 38 | " json_file = json.load(fp)\n", 39 | " psnr.append(json_file[\"PSNR\"][\"test\"])\n", 40 | " ssim.append(json_file[\"SSIM\"][\"test\"])\n", 41 | " lpips_curr = json_file[\"LPIPS\"][\"test\"]\n", 42 | " if np.isnan(lpips_curr):\n", 43 | " nan.append(scene_path)\n", 44 | " else:\n", 45 | " lpips.append(json_file[\"LPIPS\"][\"test\"])\n", 46 | " else:\n", 47 | " missing.append(scene_path)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 33, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "PSNR : 28.816426871881763\n", 60 | "SSIM : 0.8564840038082171\n", 61 | "LPIPS : 0.34507738965239027\n", 62 | "0.00021483430903915354\n", 63 | "PSNR > 20 0.9980664912186477\n", 64 | "PSNR > 20 0.9824372952360492\n", 65 | "PSNR > 20 0.872603254739782\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "psnr = np.array(psnr)\n", 71 | "ssim = np.array(ssim)\n", 72 | "lpips = np.array(lpips)\n", 73 | "\n", 74 | "print(\"PSNR : \", psnr.mean())\n", 75 | "print(\"SSIM : \", ssim.mean())\n", 76 | "print(\"LPIPS : \", lpips.mean())\n", 77 | "print(len(nan) / len(psnr))\n", 78 | "print(\"PSNR > 20\", (psnr > 15).mean())\n", 79 | "print(\"PSNR > 20\", (psnr > 20).mean())\n", 80 | "print(\"PSNR > 20\", (psnr > 25).mean())" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 31, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "[]" 92 | ] 93 | }, 94 | "execution_count": 31, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "missing" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [] 109 | } 110 | ], 111 | "metadata": { 112 | "interpreter": { 113 | "hash": "68327cb5290cd066fc32988c700987102dfc6b9931a40b8ea9c47728386b26ed" 114 | }, 115 | "kernelspec": { 116 | "display_name": "Python 3.8.5 ('nerf_factory')", 117 | "language": "python", 118 | "name": "python3" 119 | }, 120 | "language_info": { 121 | "codemirror_mode": { 122 | "name": "ipython", 123 | "version": 3 124 | }, 125 | "file_extension": ".py", 126 | "mimetype": "text/x-python", 127 | "name": "python", 128 | "nbconvert_exporter": "python", 129 | "pygments_lexer": "ipython3", 130 | "version": "3.8.5" 131 | }, 132 | "orig_nbformat": 4 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 2 136 | } 137 | -------------------------------------------------------------------------------- /utils/notebooks/find_missing_logs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import shutil\n", 11 | "import json\n", 12 | "import yaml" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "path_to_perf_co3d = \"/home/yoonwoo/data/perfception/perfception_co3d\"" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "cnt = 0\n", 31 | "co3d_lists = os.listdir(path_to_perf_co3d)\n", 32 | "for co3d_dir_name in co3d_lists:\n", 33 | " co3d_dir_path = os.path.join(path_to_perf_co3d, co3d_dir_name)\n", 34 | " results_path = os.path.join(co3d_dir_path, \"results.json\")\n", 35 | " if not os.path.exists(results_path):\n", 36 | " # shutil.rmtree(co3d_dir_path)\n", 37 | " cnt += 1\n", 38 | "print(cnt, len(co3d_lists))" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\") as fp:\n", 48 | " co3d_all_lists = json.load(fp)\n", 49 | "\n", 50 | "co3d_lists = os.listdir(path_to_perf_co3d)\n", 51 | "print(co3d_lists[0], list(co3d_all_lists.keys())[0])\n", 52 | "co3d_missing_scene_list = []\n", 53 | "for co3d_dir_name in co3d_all_lists.keys():\n", 54 | " if \"plenoxel_co3d_\" + co3d_dir_name not in co3d_lists:\n", 55 | " co3d_missing_scene_list.append(co3d_dir_name)\n", 56 | "\n", 57 | "def generate_sweep_co3d():\n", 58 | " params = dict()\n", 59 | " params[\"scene_name\"] = dict(values=co3d_missing_scene_list)\n", 60 | " params[\"entity\"] = dict(value=\"postech_cvlab\")\n", 61 | " config = dict()\n", 62 | " config[\"method\"] = \"grid\"\n", 63 | " config[\"program\"] = \"run.py\"\n", 64 | " config[\"parameters\"] = params\n", 65 | " with open(\"../../cache/co3d_missing.yaml\", \"w\") as fp:\n", 66 | " yaml.dump(config, fp)\n", 67 | "\n", 68 | "generate_sweep_co3d()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "# Find missing logs for rendering" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 22, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "import os\n", 85 | "import json\n", 86 | "import yaml\n", 87 | "import shutil" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 30, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "rendered_data_path = \"/home/yoonwoo/data/render\"\n", 97 | "assert os.path.exists(rendered_data_path)\n", 98 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\") as fp:\n", 99 | " co3d_all_lists = json.load(fp)\n", 100 | "\n", 101 | "co3d_missing_scene_list = []\n", 102 | "for (scene_number, class_name) in co3d_all_lists.items():\n", 103 | " dirpath = os.path.join(rendered_data_path, class_name, scene_number)\n", 104 | " bgpath = os.path.join(dirpath, \"bg\")\n", 105 | " if not os.path.exists(os.path.join(bgpath, \"image049.jpg\")): \n", 106 | " co3d_missing_scene_list.append(scene_number)\n", 107 | " # if os.path.exists(dirpath):\n", 108 | " # shutil.rmtree(dirpath)\n", 109 | " print(bgpath)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 20, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "def generate_sweep_co3d():\n", 119 | " params = dict()\n", 120 | " params[\"scene_name\"] = dict(values=co3d_missing_scene_list)\n", 121 | " params[\"entity\"] = dict(value=\"postech_cvlab\")\n", 122 | " config = dict()\n", 123 | " config[\"method\"] = \"grid\"\n", 124 | " config[\"program\"] = \"run.py\"\n", 125 | " config[\"parameters\"] = params\n", 126 | " with open(\"../../cache/co3d_missing.yaml\", \"w\") as fp:\n", 127 | " yaml.dump(config, fp)\n", 128 | "\n", 129 | "generate_sweep_co3d()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 29, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "# Move the rendered results back\n", 139 | "import os\n", 140 | "rendered_dir = \"../../render\"\n", 141 | "for cls_name in os.listdir(rendered_dir):\n", 142 | " cls_path = os.path.join(rendered_dir, cls_name) \n", 143 | " for scene_num in os.listdir(cls_path):\n", 144 | " scene_path = os.path.join(cls_path, scene_num)\n", 145 | " scene_mv_to = os.path.join(rendered_data_path, cls_name, scene_num)\n", 146 | " shutil.copytree(scene_path, scene_mv_to)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 48, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "# Merge the bg and fg/bg\n", 156 | "import os\n", 157 | "\n", 158 | "co3d_2d_orig = \"/home/yoonwoo/data/perfception/perfception_co3d_2d\"\n", 159 | "assert os.path.exists(co3d_2d_orig)\n", 160 | "\n", 161 | "for cls_name in os.listdir(co3d_2d_orig):\n", 162 | " cls_path = os.path.join(co3d_2d_orig, cls_name)\n", 163 | " for scene_num in os.listdir(cls_path):\n", 164 | " scene_path = os.path.join(cls_path, scene_num)\n", 165 | " bg_path = os.path.join(scene_path, \"bg\")\n", 166 | " # if os.path.exists(bg_path): \n", 167 | " # shutil.rmtree(bg_path)\n", 168 | "\n", 169 | "for cls_name in os.listdir(co3d_2d_orig):\n", 170 | " cls_path = os.path.join(co3d_2d_orig, cls_name)\n", 171 | " for scene_num in os.listdir(cls_path): \n", 172 | " scene_path = os.path.join(cls_path, scene_num)\n", 173 | " scene_path_src = os.path.join(\"/home/yoonwoo/data/render\", cls_name, scene_num)\n", 174 | " pose_path = os.path.join(scene_path_src, \"poses.npy\")\n", 175 | " intrinsics_path = os.path.join(scene_path_src, \"intrinsics.npy\")\n", 176 | " bg_path = os.path.join(scene_path_src, \"bg\")\n", 177 | " assert os.path.exists(pose_path)\n", 178 | " assert os.path.exists(intrinsics_path)\n", 179 | " assert os.path.exists(bg_path)\n", 180 | " assert os.path.exists(scene_path)\n", 181 | " shutil.move(pose_path, os.path.join(scene_path, \"poses.npy\"))\n", 182 | " shutil.move(intrinsics_path, os.path.join(scene_path, \"intrinsics.npy\"))\n", 183 | " shutil.move(bg_path, os.path.join(scene_path, \"bg\"))" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [] 192 | } 193 | ], 194 | "metadata": { 195 | "interpreter": { 196 | "hash": "68f3718ea42175570df1c98c46be3c56e287215895583cda485fe92d043b42e4" 197 | }, 198 | "kernelspec": { 199 | "display_name": "Python 3.8.5 ('atp')", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.8.5" 214 | }, 215 | "orig_nbformat": 4 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 2 219 | } 220 | -------------------------------------------------------------------------------- /utils/notebooks/make_collage_list.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 27, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import json\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 28, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "logpath = \"../../logs\"\n", 21 | "loglist = os.listdir(logpath)\n", 22 | "loglistpath = [os.path.join(logpath, f) for f in loglist]" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 47, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "cls_list, ssim_list = [], []\n", 32 | "\n", 33 | "for fpath in loglistpath:\n", 34 | " json_path = os.path.join(fpath, \"results.json\")\n", 35 | " txt_path = os.path.join(fpath, \"class_info.txt\")\n", 36 | " with open(json_path) as fp:\n", 37 | " json_file = json.load(fp)\n", 38 | " with open(txt_path) as fp:\n", 39 | " txt_path = fp.readline()\n", 40 | " ssim_list.append(json_file[\"SSIM\"][\"test\"])\n", 41 | " cls_list.append(txt_path)\n", 42 | "\n", 43 | "ssim_list = np.array(ssim_list)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 48, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "0.9771251082420349\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "argsort = np.argsort(ssim_list)[::-1]\n", 61 | "print(ssim_list[argsort[0]])\n", 62 | "\n", 63 | "scene_list, cls_scene_list = [], []\n", 64 | "for idx in argsort:\n", 65 | " if len(scene_list) == 50:\n", 66 | " break\n", 67 | " cls_name = cls_list[idx]\n", 68 | " ssim_score = ssim_list[idx]\n", 69 | " if cls_name in cls_scene_list: continue\n", 70 | " cls_scene_list.append(cls_name)\n", 71 | " scene_list.append(loglist[idx])\n", 72 | "\n", 73 | "parsed_scene_name = [\"_\".join(scene_name.split(\"_\")[2:]) for scene_name in scene_list]" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 50, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "with open(\"collage.sh\", \"w\") as fp:\n", 83 | " for scene in parsed_scene_name:\n", 84 | " fp.write(\n", 85 | " f\"python3 -m run --ginc configs/co3d.gin --scene_name {scene} --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True\\n\"\n", 86 | " )" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "Python 3.8.13 ('perfception')", 100 | "language": "python", 101 | "name": "python3" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.8.13" 114 | }, 115 | "orig_nbformat": 4, 116 | "vscode": { 117 | "interpreter": { 118 | "hash": "b471587bbe11c509fa4c273e3c27dad8d0f74f51357e50e66bf733daf1d8a5fb" 119 | } 120 | } 121 | }, 122 | "nbformat": 4, 123 | "nbformat_minor": 2 124 | } 125 | -------------------------------------------------------------------------------- /utils/notebooks/make_video.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import imageio\n", 11 | "from PIL import Image\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 14, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stderr", 22 | "output_type": "stream", 23 | "text": [ 24 | "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (168, 300) to (176, 304) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n", 25 | "[swscaler @ 0x6977780] Warning: data is not aligned! This can lead to a speed loss\n", 26 | "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (168, 300) to (176, 304) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n", 27 | "[swscaler @ 0x6414a00] Warning: data is not aligned! This can lead to a speed loss\n", 28 | "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (168, 300) to (176, 304) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n", 29 | "[swscaler @ 0x6e96a00] Warning: data is not aligned! This can lead to a speed loss\n" 30 | ] 31 | }, 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "remote 117_13756_28310\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "render_path = \"../../render\"\n", 42 | "cls_list = os.listdir(render_path)\n", 43 | "for cls_name in cls_list: \n", 44 | " cls_path = os.path.join(render_path, cls_name)\n", 45 | " seq_list = os.listdir(cls_path)\n", 46 | " for seq in seq_list: \n", 47 | " seq_path = os.path.join(cls_path, seq)\n", 48 | " assets_list = [\"fg\", \"fgbg\", \"mask\"]\n", 49 | " for asset in assets_list:\n", 50 | " asset_path = os.path.join(seq_path, asset)\n", 51 | " img_path_list = os.listdir(asset_path)\n", 52 | " img_list = []\n", 53 | " for img in img_path_list:\n", 54 | " if img.endswith(\".mp4\"):\n", 55 | " continue\n", 56 | " imgpath = os.path.join(asset_path, img)\n", 57 | " img_list.append(np.asarray(Image.open(imgpath)))\n", 58 | " imageio.mimwrite(os.path.join(asset_path, f\"{seq}_{asset}.mp4\"), img_list)\n", 59 | " print(cls_name, seq)\n", 60 | " break\n", 61 | " break\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3.8.13 ('perfception')", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.8.13" 89 | }, 90 | "orig_nbformat": 4, 91 | "vscode": { 92 | "interpreter": { 93 | "hash": "b471587bbe11c509fa4c273e3c27dad8d0f74f51357e50e66bf733daf1d8a5fb" 94 | } 95 | } 96 | }, 97 | "nbformat": 4, 98 | "nbformat_minor": 2 99 | } 100 | -------------------------------------------------------------------------------- /utils/notebooks/masked_out.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "from PIL import Image \n", 11 | "import numpy as np\n", 12 | "import tqdm\n", 13 | "import json\n", 14 | "import gzip" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 19, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\") as fp:\n", 24 | " co3d_lists = json.load(fp)\n", 25 | "\n", 26 | "ours_path = os.path.join(\"/home/yoonwoo/data/perfception/perfception_co3d\")\n", 27 | "co3d_path = os.path.join(\"/home/yoonwoo/data/perfception/co3d\")\n", 28 | "\n", 29 | "\n", 30 | "for (k, v) in co3d_lists.items():\n", 31 | " basedir = os.path.join(co3d_path, v)\n", 32 | " co3d_curr = os.path.join(co3d_path, v)\n", 33 | " json_path = os.path.join(basedir, \"frame_annotations.jgz\")\n", 34 | " with gzip.open(json_path, \"r\") as fp:\n", 35 | " all_frames_data = json.load(fp)\n", 36 | "\n", 37 | " frame_data, images, intrinsics, extrinsics, image_sizes = [], [], [], [], []\n", 38 | "\n", 39 | " for temporal_data in all_frames_data:\n", 40 | " if temporal_data[\"sequence_name\"] == k:\n", 41 | " frame_data.append(temporal_data)\n", 42 | "\n", 43 | " test_frame_data = frame_data[::10]\n", 44 | " test_fname = [os.path.join(co3d_path, frame[\"image\"][\"path\"]) for frame in test_frame_data]\n", 45 | " mask_fname = [os.path.join(co3d_path, frame[\"mask\"][\"path\"]) for frame in test_frame_data]\n", 46 | "\n", 47 | " test_img = [np.asarray(Image.open(fname)) for fname in test_fname]\n", 48 | " mask_img = [np.asarray(Image.open(fname)) for fname in mask_fname]\n", 49 | "\n", 50 | " ours_curr = os.path.join(ours_path, \"render_model\")\n", 51 | " break" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 21, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "(2000, 1086)" 63 | ] 64 | }, 65 | "execution_count": 21, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "mask_img[0].shape" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [] 80 | } 81 | ], 82 | "metadata": { 83 | "interpreter": { 84 | "hash": "68f3718ea42175570df1c98c46be3c56e287215895583cda485fe92d043b42e4" 85 | }, 86 | "kernelspec": { 87 | "display_name": "Python 3.8.5 ('atp')", 88 | "language": "python", 89 | "name": "python3" 90 | }, 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython", 94 | "version": 3 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python", 100 | "pygments_lexer": "ipython3", 101 | "version": "3.8.5" 102 | }, 103 | "orig_nbformat": 4 104 | }, 105 | "nbformat": 4, 106 | "nbformat_minor": 2 107 | } 108 | -------------------------------------------------------------------------------- /utils/notebooks/v1_v2_score.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 9, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import json\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 5, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "v1_v2_path = \"/home/yoonwoo/data/perfception/single_v2_log/\"\n", 21 | "scene_list = os.listdir(v1_v2_path)\n", 22 | "v1_list, v2_list = [], []\n", 23 | "for scene in scene_list:\n", 24 | " scene_path = os.path.join(v1_v2_path, scene)\n", 25 | " if \"v1\" in scene:\n", 26 | " v1_list.append(scene_path)\n", 27 | " assert os.path.exists(os.path.join(scene_path, \"results.json\"))\n", 28 | " if \"v2\" in scene:\n", 29 | " v2_list.append(scene_path)\n", 30 | " assert os.path.exists(os.path.join(scene_path, \"results.json\"))" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 10, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "28.818891472286648\n", 43 | "0.8507727715704176\n", 44 | "0.3533864857421981\n", 45 | "29.86061403486464\n", 46 | "0.8563342110978233\n", 47 | "0.3178743819395701\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "select_list = [v1_list, v2_list]\n", 53 | "for v_list in select_list:\n", 54 | " psnr, ssim, lpips = [], [], []\n", 55 | " for scene in v_list:\n", 56 | " with open(os.path.join(scene, \"results.json\")) as fp:\n", 57 | " json_file = json.load(fp)\n", 58 | " psnr.append(json_file[\"PSNR\"][\"test\"])\n", 59 | " ssim.append(json_file[\"SSIM\"][\"test\"])\n", 60 | " lpips.append(json_file[\"LPIPS\"][\"test\"])\n", 61 | " print(np.mean(np.array(psnr)))\n", 62 | " print(np.mean(np.array(ssim)))\n", 63 | " print(np.mean(np.array(lpips)))" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "Python 3.8.13 ('perfception')", 84 | "language": "python", 85 | "name": "python3" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.8.13" 98 | }, 99 | "orig_nbformat": 4, 100 | "vscode": { 101 | "interpreter": { 102 | "hash": "b471587bbe11c509fa4c273e3c27dad8d0f74f51357e50e66bf733daf1d8a5fb" 103 | } 104 | } 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 2 108 | } 109 | -------------------------------------------------------------------------------- /utils/notebooks/visualize_voxel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Code for visualizing Plenoxel voxel" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import json\n", 18 | "# os.environ['DISPLAY']=\":1\"\n", 19 | "\n", 20 | "import numpy as np\n", 21 | "import open3d as o3d\n", 22 | "import torch" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "def voxel2pcd(ckpt_dir):\n", 32 | "\n", 33 | " path_candidate = os.path.join(ckpt_dir, \"last.ckpt\")\n", 34 | " if not os.path.exists(path_candidate):\n", 35 | " path_candidate = os.path.join(ckpt_dir, \"data.ckpt\")\n", 36 | " ckpt = torch.load(path_candidate)\n", 37 | "\n", 38 | " density = ckpt[\"state_dict\"][\"model.density_data\"].detach().cpu()\n", 39 | " links_idx = ckpt[\"state_dict\"][\"model.links_idx\"].detach().cpu()\n", 40 | " valid = torch.where(density > 0.0)[0].long()\n", 41 | " density, links_idx = density[valid], links_idx[valid].long()\n", 42 | "\n", 43 | " resolution = (\n", 44 | " ckpt[\"reso_list\"][\"reso_idx\"] \n", 45 | " if \"reso_list\" in ckpt.keys() else \n", 46 | " [256, 256, 256]\n", 47 | " )\n", 48 | " \n", 49 | " links_idx = torch.stack(\n", 50 | " [\n", 51 | " links_idx // (resolution[1] * resolution[2]),\n", 52 | " links_idx % (resolution[1] * resolution[2]) // resolution[2],\n", 53 | " -links_idx % resolution[2],\n", 54 | " ],\n", 55 | " -1,\n", 56 | " )\n", 57 | " pts = links_idx.numpy().astype(np.float64)\n", 58 | " pts_color = (density - density.min()) / (density.max() - density.min())\n", 59 | " pts_color = pts_color.numpy().astype(np.float64).repeat(3, axis=-1)\n", 60 | "\n", 61 | " pts = np.concatenate([pts], axis=0)\n", 62 | " pts_color = np.concatenate([pts_color], axis=0)\n", 63 | "\n", 64 | " pcd = o3d.geometry.PointCloud()\n", 65 | " pcd.points = o3d.utility.Vector3dVector(pts)\n", 66 | " pcd.colors = o3d.utility.Vector3dVector(pts_color)\n", 67 | " o3d.visualization.draw_geometries([pcd])" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 3, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "voxel2pcd(\"/home/yoonwoo/Workspace/NeRF-Factory/logs/plenoxel_co3d_106_12648_23157\")" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 72, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "path_to_ckpt_dir = \"\"\n", 86 | "ckpt_dirs = []\n", 87 | "# ckpt_dirs = [\"plenoxel_torch_110_13051_23361\"]\n", 88 | "path_to_ckpt_dir = \"/home/yoonwoo/Workspace/NeRF-Factory/logs\"\n", 89 | "path_to_v1 = \"/home/yoonwoo/data/perfception/perfception_co3d\"\n", 90 | "ckpt_dirs = [\"plenoxel_co3d_115_13559_29303\"]\n", 91 | "ckpt_dirs = [f for f in sorted(os.listdir(\"../../logs/\")) if \"plenoxel\" in f]" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 80, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "v2_psnr, v1_psnr = [], []\n", 101 | "v2_ssim, v1_ssim = [], []\n", 102 | "v2_lpips, v1_lpips = [], []\n", 103 | "\n", 104 | "for ckpt_dir in ckpt_dirs: \n", 105 | " path_ckpt_dir = os.path.join(path_to_ckpt_dir, ckpt_dir)\n", 106 | " path_v1_dir = os.path.join(path_to_v1, ckpt_dir.replace(\"co3d\", \"torch\"))\n", 107 | " if not os.path.exists(path_v1_dir):\n", 108 | " continue\n", 109 | " # voxel2pcd(path_ckpt_dir)\n", 110 | " # voxel2pcd(path_v1_dir)\n", 111 | " result_v2 = os.path.join(path_to_ckpt_dir,ckpt_dir, \"results.json\")\n", 112 | " result_v1 = os.path.join(path_to_v1, ckpt_dir.replace(\"co3d\", \"torch\"), \"results.json\")\n", 113 | " with open(result_v2) as fp:\n", 114 | " json_file = json.load(fp)\n", 115 | " v2_psnr.append(json_file[\"PSNR\"][\"test\"])\n", 116 | " v2_ssim.append(json_file[\"SSIM\"][\"test\"])\n", 117 | " v2_lpips.append(json_file[\"LPIPS\"][\"test\"])\n", 118 | " with open(result_v1) as fp:\n", 119 | " json_file = json.load(fp)\n", 120 | " v1_psnr.append(json_file[\"PSNR\"][\"test_mean\"])\n", 121 | " v1_ssim.append(json_file[\"SSIM\"][\"test_mean\"])\n", 122 | " v1_lpips.append(json_file[\"LPIPS-VGG\"][\"test_mean\"])" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 83, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "v2, v1 PSNR: 27.934632050363643 26.406888197799503\n", 135 | "v2, v1 SSIM: 0.8345804088994077 0.7940645335536254\n", 136 | "v2, v1 LPIPS: 0.36206666929157155 0.4027399397210071\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "print(\"v2, v1 PSNR: \", np.array(v2_psnr).mean(), \" \", np.array(v1_psnr).mean())\n", 142 | "print(\"v2, v1 SSIM: \", np.array(v2_ssim).mean(), \" \", np.array(v1_ssim).mean())\n", 143 | "print(\"v2, v1 LPIPS: \", np.array(v2_lpips).mean(), \" \", np.array(v1_lpips).mean())" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [] 152 | } 153 | ], 154 | "metadata": { 155 | "interpreter": { 156 | "hash": "68327cb5290cd066fc32988c700987102dfc6b9931a40b8ea9c47728386b26ed" 157 | }, 158 | "kernelspec": { 159 | "display_name": "Python 3.8.5 ('nerf_factory')", 160 | "language": "python", 161 | "name": "python3" 162 | }, 163 | "language_info": { 164 | "codemirror_mode": { 165 | "name": "ipython", 166 | "version": 3 167 | }, 168 | "file_extension": ".py", 169 | "mimetype": "text/x-python", 170 | "name": "python", 171 | "nbconvert_exporter": "python", 172 | "pygments_lexer": "ipython3", 173 | "version": "3.8.5" 174 | }, 175 | "orig_nbformat": 4 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 2 179 | } 180 | -------------------------------------------------------------------------------- /utils/ray.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | def convert_to_ndc( 5 | origins, 6 | directions, 7 | ndc_coeffs, 8 | near: float = 1.0 9 | ): 10 | """Convert a set of rays to NDC coordinates.""" 11 | t = (near - origins[Ellipsis, 2]) / directions[Ellipsis, 2] 12 | origins = origins + t[Ellipsis, None] * directions 13 | 14 | dx, dy, dz = directions[:, 0], directions[:, 1], directions[:, 2] 15 | ox, oy, oz = origins[:, 0], origins[:, 1], origins[:, 2] 16 | o0 = ndc_coeffs[0] * (ox / oz) 17 | o1 = ndc_coeffs[1] * (oy / oz) 18 | o2 = 1 - 2 * near / oz 19 | d0 = ndc_coeffs[0] * (dx / dz - ox / oz) 20 | d1 = ndc_coeffs[1] * (dy / dz - oy / oz) 21 | d2 = 2 * near / oz 22 | 23 | origins = torch.stack([o0, o1, o2], -1) 24 | directions = torch.stack([d0, d1, d2], -1) 25 | 26 | return origins, directions 27 | 28 | 29 | def batchified_get_rays( 30 | intrinsics, 31 | extrinsics, 32 | image_sizes, 33 | use_pixel_centers 34 | ): 35 | center = 0.5 if use_pixel_centers else 0. 36 | mesh_grids = [np.meshgrid( 37 | np.arange(w, dtype=np.float32) + center, 38 | np.arange(h, dtype=np.float32) + center, 39 | indexing="xy" 40 | ) for (h, w) in image_sizes] 41 | 42 | i_coords = [mesh_grid[0] for mesh_grid in mesh_grids] 43 | j_coords = [mesh_grid[1] for mesh_grid in mesh_grids] 44 | 45 | dirs = [ 46 | np.stack( 47 | [ 48 | (i - intrinsic[0][2]) / intrinsic[0][0], 49 | (j - intrinsic[1][2]) / intrinsic[1][1], 50 | np.ones_like(i) 51 | ], -1 52 | ) for (intrinsic, i, j) in zip(intrinsics, i_coords, j_coords) 53 | ] 54 | 55 | rays_o = np.concatenate([ 56 | np.tile( 57 | extrinsic[np.newaxis, :3, 3], (1, h * w, 1) 58 | ).reshape(-1, 3) for (extrinsic, (h, w)) in zip(extrinsics, image_sizes) 59 | ]).astype(np.float32) 60 | 61 | rays_d = np.concatenate([ 62 | np.einsum( 63 | "hwc, rc -> hwr", dir, extrinsic[:3, :3] 64 | ).reshape(-1, 3) for (dir, extrinsic) in zip(dirs, extrinsics) 65 | ]).astype(np.float32) 66 | 67 | rays_d /= np.linalg.norm(rays_d, axis=-1, keepdims=True) 68 | 69 | return rays_o, rays_d -------------------------------------------------------------------------------- /utils/select_option.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import * 3 | 4 | import gdown 5 | 6 | from dataloader.litdata import LitDataCo3D, LitDataScannet 7 | from model.plenoxel_torch.model import LitPlenoxel, ResampleCallBack 8 | 9 | url_co3d_list = "https://drive.google.com/uc?id=1jCDaA41ZddkgPl4Yw2h-XI7mt9o56kb7" 10 | 11 | def select_model( 12 | model_name: str, 13 | ): 14 | return LitPlenoxel() 15 | 16 | 17 | def select_dataset( 18 | dataset_name: str, 19 | datadir: str, 20 | scene_name: str, 21 | accelerator: str, 22 | num_gpus: int, 23 | num_tpus: int, 24 | ): 25 | if dataset_name == "co3d": 26 | data_fun = LitDataCo3D 27 | co3d_list_json_path = os.path.join("dataloader/co3d_lists/co3d_list.json") 28 | if not os.path.exists(co3d_list_json_path): 29 | gdown.download(url_co3d_list, co3d_list_json_path) 30 | elif dataset_name == "scannet": 31 | data_fun = LitDataScannet 32 | 33 | return data_fun( 34 | datadir=datadir, 35 | scene_name=scene_name, 36 | accelerator=accelerator, 37 | num_gpus=num_gpus, 38 | num_tpus=num_tpus, 39 | ) 40 | 41 | def select_callback(model_name): 42 | 43 | return [ResampleCallBack()] 44 | -------------------------------------------------------------------------------- /utils/store_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import imageio 4 | import numpy as np 5 | import torch 6 | from PIL import Image 7 | 8 | 9 | def to8b(x): 10 | return (255 * np.clip(x, 0, 1)).astype(np.uint8) 11 | 12 | 13 | def binary(x): 14 | x = np.round(x) 15 | return (255 * np.clip(x, 0, 1)).astype(np.uint8) 16 | 17 | 18 | def norm8b(x): 19 | x = (x - x.min()) / (x.max() - x.min()) 20 | return to8b(x) 21 | 22 | 23 | def store_image(dirpath, rgbs): 24 | for (i, rgb) in enumerate(rgbs): 25 | imgname = f"image{str(i).zfill(3)}.jpg" 26 | rgbimg = Image.fromarray(to8b(rgb.detach().cpu().numpy())) 27 | imgpath = os.path.join(dirpath, imgname) 28 | rgbimg.save(imgpath) 29 | 30 | 31 | def store_depth(dirpath, depths): 32 | for (i, depth) in enumerate(depths): 33 | depthname = f"depth{str(i).zfill(3)}.jpg" 34 | disparity = torch.zeros_like(depth) 35 | disparity[torch.where(depth != 0)] = torch.log( 36 | (1 / (depth[torch.where(depth != 0)] + 1e-6)) 37 | ) 38 | img = norm8b(disparity.detach().cpu().numpy().repeat(3, axis=-1)) 39 | depthimg = Image.fromarray(img) 40 | depthpath = os.path.join(dirpath, depthname) 41 | depthimg.save(depthpath) 42 | 43 | 44 | def store_video(dirpath, rgbs): 45 | rgbimgs = [to8b(rgb.detach().cpu().numpy()) for rgb in rgbs] 46 | video_dir = os.path.join(dirpath, "videos") 47 | os.makedirs(video_dir, exist_ok=True) 48 | imageio.mimwrite(os.path.join(video_dir, "images.mp4"), rgbimgs, fps=30, quality=8) 49 | 50 | 51 | def store_mask(dirpath, masks): 52 | for (i, mask) in enumerate(masks): 53 | maskname = f"mask{str(i).zfill(3)}.jpg" 54 | maskimg = Image.fromarray(binary(mask.detach().cpu().numpy())) 55 | maskpath = os.path.join(dirpath, maskname) 56 | maskimg.save(maskpath) 57 | -------------------------------------------------------------------------------- /utils/tsdf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import cv2 5 | import numpy as np 6 | import open3d as o3d 7 | import tqdm 8 | 9 | from dataloader.data_util.common import connected_component_filter, find_files 10 | from dataloader.data_util.scannet import detect_blur_fft 11 | 12 | 13 | def integrate( 14 | scene_name, 15 | outdir, 16 | max_frame, 17 | skip_frame, 18 | blur_thresh, 19 | max_image_dim, 20 | voxel_size, 21 | max_depth=4.5, 22 | debug=False, 23 | ): 24 | print(f"processing {scene_name}") 25 | # setup dir 26 | scenedir = os.path.join(outdir, scene_name) 27 | if not os.path.exists(scenedir): 28 | os.makedirs(scenedir, exist_ok=True) 29 | 30 | if os.path.exists(os.path.join(scenedir, f"tsdf_pcd_{voxel_size}.pcd")): 31 | print(f"skip exist {scene_name}") 32 | return 33 | 34 | files = find_files(os.path.join(scenedir, "color"), exts=["*.jpg"]) 35 | assert len(files) > 0, f"{scenedir} does not contain color images." 36 | frame_ids = sorted([os.path.basename(f).rstrip(".jpg") for f in files]) 37 | frame_ids = np.array(frame_ids) 38 | 39 | # filter invalid poses 40 | poses = np.stack( 41 | [np.loadtxt(os.path.join(scenedir, "pose", f"{f}.txt")) for f in frame_ids], 42 | axis=0, 43 | ) 44 | poses = poses.astype(np.float32) 45 | numerics = np.all( 46 | (~np.isinf(poses) * ~np.isnan(poses) * ~np.isneginf(poses)).reshape(-1, 16), 47 | axis=1, 48 | ) 49 | 50 | # load images 51 | print(f"loading images - {len(frame_ids)}") 52 | colors = np.stack( 53 | [cv2.imread(os.path.join(scenedir, "color", f"{f}.jpg")) for f in frame_ids], 54 | axis=0, 55 | ) 56 | colors = colors.astype(np.float32) / 255.0 57 | 58 | # load depths 59 | print(f"loading depths - {len(frame_ids)}") 60 | depth_shift = 1000.0 61 | depths = np.stack( 62 | [ 63 | cv2.imread( 64 | os.path.join(scenedir, "depth", f"{f}.png"), cv2.IMREAD_UNCHANGED 65 | ) 66 | for f in frame_ids 67 | ], 68 | axis=0, 69 | ) 70 | depths = depths.astype(np.float32) / depth_shift 71 | 72 | # load intrinsics 73 | print(f"loading intrinsic") 74 | _intrinsic = np.loadtxt(os.path.join(scenedir, "intrinsic", "intrinsic_color.txt")) 75 | _intrinsic = _intrinsic.astype(np.float32) 76 | 77 | # filter blurry images 78 | print(f"filter blurry images") 79 | if not os.path.exists(os.path.join(scenedir, "blur.npy")): 80 | blurness = np.stack( 81 | [detect_blur_fft(c, thresh=blur_thresh)[0] for c in colors], axis=0 82 | ).reshape(-1) 83 | np.save(os.path.join(scenedir, "blur.npy"), blurness) 84 | else: 85 | blurness = np.load(os.path.join(scenedir, "blur.npy")) 86 | num_valid = min(150, int(0.2 * len(frame_ids))) 87 | ths = np.sort(blurness)[num_valid] 88 | is_valid = np.logical_and(blurness > ths, numerics) 89 | print(f"filtered {is_valid.sum()} out of {len(is_valid)} images") 90 | 91 | colors, depths, poses = ( 92 | colors[is_valid][::skip_frame], 93 | depths[is_valid][::skip_frame], 94 | poses[is_valid][::skip_frame], 95 | ) 96 | frame_ids = frame_ids[is_valid][::skip_frame] 97 | 98 | # setup TSDF volume 99 | intrinsic = o3d.camera.PinholeCameraIntrinsic() 100 | intrinsic.set_intrinsics( 101 | colors.shape[2], 102 | colors.shape[1], 103 | _intrinsic[0, 0], 104 | _intrinsic[1, 1], 105 | _intrinsic[0, 2], 106 | _intrinsic[1, 2], 107 | ) 108 | volume = o3d.pipelines.integration.ScalableTSDFVolume( 109 | voxel_length=voxel_size, 110 | sdf_trunc=0.04, 111 | color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8, 112 | ) 113 | 114 | # integration 115 | for image, pose, depth in tqdm.tqdm(zip(colors, poses, depths)): 116 | image *= 255.0 117 | image = image.astype(np.uint8) 118 | image_o3d = o3d.geometry.Image(image) 119 | depth_o3d = o3d.geometry.Image(depth) 120 | 121 | rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( 122 | image_o3d, 123 | depth_o3d, 124 | depth_scale=1.0, 125 | depth_trunc=max_depth, 126 | convert_rgb_to_intensity=False, 127 | ) 128 | volume.integrate(rgbd, intrinsic, np.linalg.inv(pose)) 129 | 130 | # extract geometery 131 | pcd = volume.extract_point_cloud() 132 | xyz = np.asarray(pcd.points) 133 | sel = connected_component_filter(xyz, 0.05) 134 | 135 | points = np.asarray(pcd.points)[sel].astype(np.float32) 136 | colors = np.asarray(pcd.colors)[sel].astype(np.float32) 137 | 138 | np.savez( 139 | os.path.join(scenedir, f"tsdf_pcd_{voxel_size}.npz"), 140 | xyz=points, 141 | color=colors, 142 | ) 143 | print(f">> processed {scene_name}") 144 | 145 | 146 | if __name__ == "__main__": 147 | from functools import partial 148 | from multiprocessing import Pool 149 | 150 | parser = argparse.ArgumentParser() 151 | parser.add_argument("--scene_name", type=str, required=True) 152 | parser.add_argument("--max_frame", type=int, default=1000) 153 | parser.add_argument("--skip_frame", type=int, default=1) 154 | parser.add_argument("--blur_thresh", type=float, default=10) 155 | parser.add_argument("--max_depth", type=float, default=4.5) 156 | parser.add_argument("--max_image_dim", type=int, default=640) 157 | parser.add_argument("--voxel_size", type=float, default=0.025) 158 | parser.add_argument("--basedir", type=str, default="./tsdf_results") 159 | parser.add_argument("--outdir", type=str, default="/root/data/scannet/scans") 160 | parser.add_argument("--num_workers", type=int, default=8) 161 | parser.add_argument("--offset", type=int, default=0) 162 | 163 | args = parser.parse_args() 164 | 165 | if args.scene_name == "all": 166 | scene_list = sorted(os.listdir(args.outdir)) 167 | else: 168 | scene_list = [args.scene_name] 169 | 170 | if args.scene_name == "all": 171 | integrate_partial = partial( 172 | integrate, 173 | outdir=args.outdir, 174 | max_frame=args.max_frame, 175 | skip_frame=args.skip_frame, 176 | blur_thresh=args.blur_thresh, 177 | max_image_dim=args.max_image_dim, 178 | voxel_size=args.voxel_size, 179 | max_depth=args.max_depth, 180 | ) 181 | scene_list_cur = scene_list[args.offset :: args.num_workers] 182 | for scene in scene_list_cur: 183 | integrate_partial(scene) 184 | else: 185 | for scene in scene_list: 186 | integrate( 187 | scene, 188 | args.outdir, 189 | args.max_frame, 190 | args.skip_frame, 191 | args.blur_thresh, 192 | args.max_image_dim, 193 | args.voxel_size, 194 | args.max_depth, 195 | ) 196 | -------------------------------------------------------------------------------- /utils/tsdf_cuda.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import open3d as o3d 6 | import open3d.core as o3c 7 | import tqdm 8 | 9 | from dataloader.data_util.common import connected_component_filter, find_files 10 | 11 | 12 | def integrate(scene_name, outdir, reso=512, max_depth=4.5): 13 | device = o3c.Device(o3c.Device.CUDA, 0) 14 | print(f"processing {scene_name}") 15 | 16 | # setup dir 17 | scenedir = os.path.join(outdir, scene_name) 18 | if not os.path.exists(scenedir): 19 | os.makedirs(scenedir, exist_ok=True) 20 | 21 | if os.path.exists(os.path.join(scenedir, f"tsdf_pcd_{reso}.npy")): 22 | print(f"skip exist {scene_name}") 23 | return 24 | 25 | files = find_files(os.path.join(scenedir, "color"), exts=["*.jpg"]) 26 | if len(files) == 0: 27 | print(f"{scenedir} does not contain color images. skip.") 28 | return 29 | frame_ids = sorted([os.path.basename(f).rstrip(".jpg") for f in files]) 30 | frame_ids = np.array(frame_ids) 31 | 32 | # filter invalid poses 33 | poses = np.stack( 34 | [np.loadtxt(os.path.join(scenedir, "pose", f"{f}.txt")) for f in frame_ids], 35 | axis=0, 36 | ) 37 | poses = poses.astype(np.float32) 38 | numerics = np.all( 39 | ( 40 | ~np.isinf(poses) 41 | * ~np.isnan(poses) 42 | * ~np.isneginf(poses) 43 | * (np.abs(poses) < 30) 44 | ).reshape(-1, 16), 45 | axis=1, 46 | ) 47 | poses = poses[numerics] 48 | frame_ids = frame_ids[numerics] 49 | 50 | skip_frame = 1 51 | if len(frame_ids) > 3000: 52 | skip_frame = 2 53 | if len(frame_ids) > 5000: 54 | skip_frame = 3 55 | 56 | depth_shift = 1000.0 57 | 58 | # load intrinsics 59 | print(f"loading intrinsic") 60 | _intrinsic = np.loadtxt(os.path.join(scenedir, "intrinsic", "intrinsic_color.txt")) 61 | _intrinsic = _intrinsic.astype(np.float32) 62 | 63 | poses = poses[::skip_frame] 64 | frame_ids = frame_ids[::skip_frame] 65 | 66 | # setup voxel block grid 67 | vbg = o3d.t.geometry.VoxelBlockGrid( 68 | attr_names=("tsdf", "weight"), 69 | attr_dtypes=(o3c.float32, o3c.float32), 70 | attr_channels=((1), (1)), 71 | voxel_size=3.0 / reso, 72 | block_resolution=16, 73 | block_count=100000, 74 | device=device, 75 | ) 76 | intrinsic = o3d.camera.PinholeCameraIntrinsic() 77 | intrinsic.set_intrinsics( 78 | 640, 79 | 480, 80 | _intrinsic[0, 0], 81 | _intrinsic[1, 1], 82 | _intrinsic[0, 2], 83 | _intrinsic[1, 2], 84 | ) 85 | intrinsic_tensor = o3c.Tensor(intrinsic.intrinsic_matrix, o3c.Dtype.Float64) 86 | 87 | for i, (fid, E) in tqdm.tqdm( 88 | enumerate(zip(frame_ids, poses)), total=len(frame_ids) 89 | ): 90 | # print(f"integraing frame {i+1}/{len(frame_ids)} for scene {scene_name}") 91 | depth = o3d.t.io.read_image(os.path.join(scenedir, "depth", f"{fid}.png")).to( 92 | device 93 | ) 94 | extrinsic = o3c.Tensor(E, o3c.Dtype.Float64) 95 | extrinsic = o3c.inv(extrinsic).contiguous() 96 | frustum_block_coords = vbg.compute_unique_block_coordinates( 97 | depth, intrinsic_tensor, extrinsic, depth_shift, max_depth 98 | ) 99 | 100 | vbg.integrate( 101 | frustum_block_coords, 102 | depth, 103 | intrinsic_tensor, 104 | extrinsic, 105 | depth_shift, 106 | max_depth, 107 | ) 108 | 109 | # extract geometery 110 | pcd_tensor = vbg.extract_point_cloud() 111 | pcd = pcd_tensor.to_legacy() 112 | xyz = np.asarray(pcd.points) 113 | sel = connected_component_filter(xyz, 0.05) 114 | 115 | points = np.asarray(pcd.points)[sel].astype(np.float32) 116 | colors = None 117 | if pcd.has_colors(): 118 | colors = np.asarray(pcd.colors)[sel].astype(np.float32) 119 | 120 | np.save(os.path.join(scenedir, f"tsdf_pcd_{reso}.npy"), points) 121 | print(f">> processed {scene_name}") 122 | 123 | 124 | if __name__ == "__main__": 125 | parser = argparse.ArgumentParser() 126 | parser.add_argument("--scene_name", type=str, required=True) 127 | parser.add_argument("--max_depth", type=float, default=4.5) 128 | parser.add_argument("--reso", type=int, default=1024) 129 | parser.add_argument("--outdir", type=str, default="/root/data/scannet/scans") 130 | parser.add_argument("--offset", type=int, default=0) 131 | 132 | args = parser.parse_args() 133 | 134 | if args.scene_name == "all": 135 | scene_list = sorted(os.listdir(args.outdir)) 136 | else: 137 | scene_list = [args.scene_name] 138 | 139 | for scene in scene_list: 140 | integrate( 141 | scene, 142 | outdir=args.outdir, 143 | reso=args.reso, 144 | max_depth=args.max_depth, 145 | ) 146 | --------------------------------------------------------------------------------