├── .gitignore
├── LICENSE
├── LICENSE-Plenoxels
├── README.md
├── assets
├── 2D_score.png
├── 3D_score.png
└── teaser.png
├── cache
└── .gitkeep
├── configs
├── co3d_v1.gin
├── co3d_v2.gin
├── rebuttal_quant
│ ├── co3d_16bit.gin
│ ├── co3d_2bit.gin
│ ├── co3d_2bit_clip.gin
│ ├── co3d_2bit_density.gin
│ ├── co3d_4bit.gin
│ ├── co3d_4bit_clip.gin
│ ├── co3d_4bit_density.gin
│ ├── co3d_8bit_clip.gin
│ └── co3d_8bit_density.gin
├── rebuttal_reso
│ ├── co3d_128.gin
│ ├── co3d_256.gin
│ ├── co3d_384.gin
│ └── co3d_64.gin
├── rebuttal_sigma_weight
│ ├── co3d_sigma_10.gin
│ ├── co3d_sigma_100.gin
│ ├── co3d_sigma_5.gin
│ ├── co3d_weight_1.28.gin
│ ├── co3d_weight_2.56.gin
│ └── co3d_weight_none.gin
├── scannet.gin
├── scannet_depth.gin
├── scannet_tsdf.gin
├── scannet_tsdf_reso1024_pad3.gin
├── scannet_tsdf_reso256_pad1.gin
├── scannet_tsdf_reso256_pad2.gin
├── scannet_tsdf_reso512_pad1.gin
└── scannet_tsdf_reso512_pad2.gin
├── data
└── .gitkeep
├── dataloader
├── co3d_lists
│ └── .gitkeep
├── data_util
│ ├── co3d.py
│ ├── common.py
│ └── scannet.py
├── interface.py
├── litdata.py
├── random_pose.py
├── sampler.py
└── spherical_poses.py
├── lib
└── plenoxel
│ ├── CMakeLists.txt
│ ├── include
│ ├── cubemap_util.cuh
│ ├── cuda_util.cuh
│ ├── data_spec.hpp
│ ├── data_spec_packed.cuh
│ ├── random_util.cuh
│ ├── render_util.cuh
│ └── util.hpp
│ ├── loss_kernel.cu
│ ├── misc_kernel.cu
│ ├── optim_kernel.cu
│ ├── render_lerp_kernel_cuvol.cu
│ ├── render_lerp_kernel_nvol.cu
│ ├── render_svox1_kernel.cu
│ ├── svox2.cpp
│ ├── svox2_kernel.cu
│ └── version.py
├── model
├── interface.py
└── plenoxel_torch
│ ├── __global__.py
│ ├── autograd.py
│ ├── dataclass.py
│ ├── model.py
│ ├── sparse_grid.py
│ └── utils.py
├── run.py
├── sbatch.sh
├── script
├── collage.sh
├── v1.sh
└── v2.sh
├── setup.py
└── utils
├── SensorData.py
├── download_perf.py
├── extract_scannet.py
├── extract_stats.py
├── logger.py
├── notebooks
├── co3d_lists.ipynb
├── co3d_v2.ipynb
├── collect_results.ipynb
├── find_missing_logs.ipynb
├── make_collage_list.ipynb
├── make_video.ipynb
├── masked_out.ipynb
├── v1_v2_score.ipynb
└── visualize_voxel.ipynb
├── ray.py
├── select_option.py
├── store_util.py
├── tsdf.py
└── tsdf_cuda.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *logs/*
2 | logs
3 | *wandb/*
4 | *vscode*
5 | *pycache*
6 | *swap-pane*
7 | *.idea*
8 | logs_collection
9 | *build*
10 | *.egg-info*
11 | *.eggs*
12 | *_debug*
13 |
14 | cache/*
15 | !cache/.gitkeep
16 | data/*
17 | !data/.gitkeep
18 | dataloader/co3d_lists/*
19 | !dataloader/co3d_lists/.gitkeep
20 | *.ipynb_checkpoints*
21 |
22 | playground/*
23 | render/*
24 | tsdf_results/
25 |
--------------------------------------------------------------------------------
/LICENSE-Plenoxels:
--------------------------------------------------------------------------------
1 | BSD 2-Clause License
2 |
3 | Copyright (c) 2021, the Plenoxels authors
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PeRFception - Perception using Radiance Fields
2 |
3 |
4 |

5 |
6 |
7 | ### Quick Access
8 |
9 | [Project Page](https://postech-cvlab.github.io/PeRFception) | [Paper](https://arxiv.org/abs/2208.11537) | [Supplementary Materials](https://openreview.net/attachment?id=MzaPEKHv-0J&name=supplementary_material)
10 | ### Author Info
11 |
12 | - [Yoonwoo Jeong](https://yoonwooinfo.notion.site) [[Google Scholar](https://scholar.google.com/citations?user=HQ1PMggAAAAJ&hl=en)]
13 | - [Seungjoo Shin](https://seungjooshin.github.io/) [[Google Scholar](https://scholar.google.com/citations?user=io7PSDIAAAAJ&hl=en)]
14 | - [Junha Lee](https://junha-l.github.io/) [[Google Scholar](https://scholar.google.com/citations?user=RB7qMm4AAAAJ&hl=en)]
15 | - [Chris Choy](https://chrischoy.org) [[Google Scholar](https://scholar.google.com/citations?user=2u8G5ksAAAAJ&hl=en&oi=ao)]
16 | - [Animashree Anandkumar](http://tensorlab.cms.caltech.edu/users/anima/) [[Google Scholar](https://scholar.google.com/citations?user=bEcLezcAAAAJ&hl=en&oi=ao)]
17 | - [Minsu Cho](http://cvlab.postech.ac.kr/~mcho/) [[Google Scholar](https://scholar.google.com/citations?user=5TyoF5QAAAAJ&hl=en&oi=ao)]
18 | - [Jaesik Park](http://jaesik.info/) [[Google Scholar](https://scholar.google.com/citations?user=_3q6KBIAAAAJ&hl=en&oi=ao)]
19 |
20 | ### News
21 | - 22.10.27: Added HuggingFace repo for ScanNet.
22 | - 22.10.25: Moved PeRFception datasets from OneDrive to HuggingFace.
23 | - 22.08.04: The first version of PeRFception has been released.
24 |
25 |
26 |

27 |
28 |
29 | ## Abstract
30 |
31 | The recent progress in implicit 3D representation, i.e., Neural Radiance Fields (NeRFs), has made accurate and photorealistic 3D reconstruction possible in a differentiable manner. This new representation can effectively convey the information of hundreds of high-resolution images in one compact format and allows photorealistic synthesis of novel views. In this work, using the variant of NeRF called Plenoxels, we create the first large-scale implicit representation datasets for perception tasks, called PeRFception, which consists of two parts that incorporate both object-centric and scene-centric scans for classification and segmentation. It shows a significant memory compression rate (96.4%) from the original dataset, while containing both 2D and 3D information in a unified form. We construct the classification and segmentation models that directly take as input this implicit format and also propose a novel augmentation technique to avoid overfitting on backgrounds of images. The code and data will be publicly available.
32 |
33 | ## Downloading PeRFception-Datastes [[CO3D-link1](https://huggingface.co/datasets/YWjimmy/PeRFception-v1-1)] [[CO3D-link2](https://huggingface.co/datasets/YWjimmy/PeRFception-v1-2)] [[CO3D-link3](https://huggingface.co/datasets/YWjimmy/PeRFception-v1-3)] [[ScanNet]( https://huggingface.co/datasets/YWjimmy/PeRFception-ScanNet)]
34 |
35 | ```
36 | # Link1 - PeRFception-CO3D-v1
37 | git clone https://huggingface.co/datasets/YWjimmy/PeRFception-v1-1
38 | # Link2 - PeRFception-CO3D-v1
39 | git clone https://huggingface.co/datasets/YWjimmy/PeRFception-v1-2
40 | # Link3 - PeRFception-CO3D-v1
41 | git clone https://huggingface.co/datasets/YWjimmy/PeRFception-v1-3
42 | # Link1 - PeRFception-ScanNet
43 | git clone https://huggingface.co/datasets/YWjimmy/PeRFception-ScanNet
44 | ```
45 | ### Downloading specific chunks
46 | ```
47 | mkdir
48 | cd
49 | git init
50 | git remote add -f origin [link]
51 | git config core.sparseCheckout true
52 | echo "some/dir/" >> .git/info/sparse-checkout
53 | echo "another/sub/tree" >> .git/info/sparse-checkout
54 | git pull origin main
55 |
56 | # ex) If you want to download data only from 288_30460_58530
57 | echo "30/plenoxel_co3d_288_30460_58530" >> .git/info/sparse-checkout
58 | ```
59 | ### PeRFception-CO3D
60 |
61 | |Dataset| # Scenes | # Frames | 3D Shape | Features | 3D-BKGD | Memory | Memoery(Rel)
62 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
63 | |CO3D| 18.6K | 1.5M | pcd | C | X | 1.44TB | $$\pm0.00\%$$
64 | |PeRFception-CO3D| 18.6K | $$\infty$$ | voxel | SH + D | O | 1.33TB | $$-6.94\%$$
65 |
66 | ### PeRFception-ScanNet
67 |
68 | |Dataset| # Scenes | # Frames | 3D Shape | Features | 3D-BKGD | Memory | Memoery(Rel)
69 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
70 | |ScanNet| 1.5K | 2.5M | pcd | C | X | 966GB | $$\pm0.00\%$$
71 | |PeRFception-ScanNet| 1.5K | $$\infty$$ | voxel | SH + D | O | 35GB | $$-96.4\%$$
72 |
73 |
74 | ## Get Ready (Installation)
75 |
76 | Our code is verified on Ubuntu 20.04 with a CUDA version 11.1.
77 |
78 | ```
79 | conda create -n perfception -c anaconda python=3.8 -y
80 | conda activate perfception
81 | conda install pytorch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 cudatoolkit=11.1 -c pytorch -c conda-forge -y
82 | pip3 install imageio tqdm requests configargparse scikit-image imageio-ffmpeg piqa wandb pytorch_lightning==1.5.5 opencv-python gin-config gdown plyfile
83 | pip3 install .
84 | ```
85 |
86 |
87 | ## Demo
88 | We provide a short demo for rendering a scene on CO3D or ScanNet. After installing the requirements, you could run the demo with the codes below:
89 | ```
90 | # CO3D demo
91 | python3 -m run --ginc configs/co3d.gin
92 | # ScanNet demo
93 | python3 -m run --ginc configs/scannet.gin
94 | ```
95 |
96 | ## Rendering CO3D and ScanNet
97 | We deliver the full code to reproduce the performance reported in the main paper. To run the code, you should first put the dataset on a proper location.
98 | ```
99 | data
100 | |
101 | |--- co3d
102 | -- apple
103 | -- banana
104 | ...
105 | |
106 | |--- scannet
107 | -- scene000_00
108 | -- scene000_01
109 | ...
110 | ```
111 | ScanNet-v2 can be downloaded in [here](http://www.scan-net.org/) and CO3D-v1 can be downloaded in [here](https://github.com/facebookresearch/co3d). Thanks to great functions in `wandb`, we could manage tremendous scripts. You can download the `sweep` file [here](https://1drv.ms/u/s!As9A9EbDsoWcj6toSOfdeWMaHhqF3Q?e=1INfNg).
112 |
113 |
114 | ## Downstream Tasks
115 |
116 | ### Codes for downstream tasks: https://github.com/POSTECH-CVLab/NeRF-Downstream
117 |
118 | ### 2D object classification (PeRFception-CO3D)
119 |
120 | We benchmark several 2D classification models on rendered PeRFception-CO3D. For faster reproducing, we also provide the rendered images from PeRFception-CO3D on the link [link](https://1drv.ms/u/s!AgY2evoYo6FggthVfVngtHinq3czqQ?e=crnTlu). Before running the code, be sure that you had put the downloaded dataset on `data/perfcepton_2d`. You can easily reproduce the scores using the scripts of `scripts/downstream/2d_cls/[model].sh`. Details for the training pipeline and models are elaborated in the main paper.
121 |
122 | The pretrained models can be reached with the links below:
123 |
124 |

125 |
126 |
127 |
128 | ### 3D object classification (PeRFception-CO3D)
129 |
130 | We also benchmark several 3D classification models on PeRFception-CO3D. We provide the full code on the link [](). You can downloa
131 |
132 |
133 |

134 |
135 |
136 |
137 | ### 3D semantic segmentation (PeRFception-ScanNet)
138 | In PeRFception-ScanNet, we have evaluated several 3D semantic segmentation models with depth-supervised labels.
139 |
140 | ## Plans for v2
141 |
142 | According to the official CO3D repository[[link](https://github.com/facebookresearch/co3d)], authors provided an improved version, v2, of CO3D, which would result in better rendering quality and more accurate geometries in our model. We are planning to extend this work to PeRFception-CO3D-v2 from the CO3D-v2.
143 |
144 | ## Citation
145 | ```bib
146 | @article{jeong2022perfception,
147 | title = {PeRFception: Perception using Radiance Fields},
148 | author = {Jeong, Yoonwoo and Shin, Seungjoo and Lee, Junha and Choy, Chris and Anandkumar, Anima and Cho, Minsu and Park, Jaesik}
149 | year = {2022}
150 | }
151 | ```
152 |
153 | ## Acknowledgement
154 | We appreciate for the reviewers for their constructive comments and suggestions.
155 |
--------------------------------------------------------------------------------
/assets/2D_score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/assets/2D_score.png
--------------------------------------------------------------------------------
/assets/3D_score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/assets/3D_score.png
--------------------------------------------------------------------------------
/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/assets/teaser.png
--------------------------------------------------------------------------------
/cache/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/cache/.gitkeep
--------------------------------------------------------------------------------
/configs/co3d_v1.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "v1"
--------------------------------------------------------------------------------
/configs/co3d_v2.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d_v2"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 |
41 | load_co3d_data.v2_mode = True
42 | run.postfix = "v2"
--------------------------------------------------------------------------------
/configs/rebuttal_quant/co3d_16bit.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 | LitPlenoxel.quant_bit = 16
28 |
29 | ResampleCallBack.upsamp_every = 25600
30 |
31 | run.max_steps = 76800
32 | run.save_last_only = True
33 | run.proj_name = "co3d"
34 | run.dataset_name = "co3d"
35 | run.datadir = "data/co3d"
36 | run.logbase = "logs"
37 | run.log_every_n_steps = 100
38 | run.model_name = "plenoxel"
39 | run.seed = 0
40 | run.progressbar_refresh_rate = 100
41 | run.postfix = "16bit"
--------------------------------------------------------------------------------
/configs/rebuttal_quant/co3d_2bit.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 | LitPlenoxel.quant_bit = 2
28 |
29 | ResampleCallBack.upsamp_every = 25600
30 |
31 | run.max_steps = 76800
32 | run.save_last_only = True
33 | run.proj_name = "co3d"
34 | run.dataset_name = "co3d"
35 | run.datadir = "data/co3d"
36 | run.logbase = "logs"
37 | run.log_every_n_steps = 100
38 | run.model_name = "plenoxel"
39 | run.seed = 0
40 | run.progressbar_refresh_rate = 100
41 | run.postfix = "2bit"
--------------------------------------------------------------------------------
/configs/rebuttal_quant/co3d_2bit_clip.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 | LitPlenoxel.quant_bit = 2
28 | LitPlenoxel.clip_quant = True
29 |
30 | ResampleCallBack.upsamp_every = 25600
31 |
32 | run.max_steps = 76800
33 | run.save_last_only = True
34 | run.proj_name = "co3d"
35 | run.dataset_name = "co3d"
36 | run.datadir = "data/co3d"
37 | run.logbase = "logs"
38 | run.log_every_n_steps = 100
39 | run.model_name = "plenoxel"
40 | run.seed = 0
41 | run.progressbar_refresh_rate = 100
42 | run.postfix = "clip_2bit"
--------------------------------------------------------------------------------
/configs/rebuttal_quant/co3d_2bit_density.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 | LitPlenoxel.quantize_density = True
28 | LitPlenoxel.quant_bit = 2
29 |
30 | ResampleCallBack.upsamp_every = 25600
31 |
32 | run.max_steps = 76800
33 | run.save_last_only = True
34 | run.proj_name = "co3d"
35 | run.dataset_name = "co3d"
36 | run.datadir = "data/co3d"
37 | run.logbase = "logs"
38 | run.log_every_n_steps = 100
39 | run.model_name = "plenoxel"
40 | run.seed = 0
41 | run.progressbar_refresh_rate = 100
42 | run.postfix = "density_2bit"
--------------------------------------------------------------------------------
/configs/rebuttal_quant/co3d_4bit.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 | LitPlenoxel.quant_bit = 4
28 |
29 | ResampleCallBack.upsamp_every = 25600
30 |
31 | run.max_steps = 76800
32 | run.save_last_only = True
33 | run.proj_name = "co3d"
34 | run.dataset_name = "co3d"
35 | run.datadir = "data/co3d"
36 | run.logbase = "logs"
37 | run.log_every_n_steps = 100
38 | run.model_name = "plenoxel"
39 | run.seed = 0
40 | run.progressbar_refresh_rate = 100
41 | run.postfix = "4bit"
--------------------------------------------------------------------------------
/configs/rebuttal_quant/co3d_4bit_clip.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 | LitPlenoxel.quant_bit = 4
28 | LitPlenoxel.clip_quant = True
29 |
30 | ResampleCallBack.upsamp_every = 25600
31 |
32 | run.max_steps = 76800
33 | run.save_last_only = True
34 | run.proj_name = "co3d"
35 | run.dataset_name = "co3d"
36 | run.datadir = "data/co3d"
37 | run.logbase = "logs"
38 | run.log_every_n_steps = 100
39 | run.model_name = "plenoxel"
40 | run.seed = 0
41 | run.progressbar_refresh_rate = 100
42 | run.postfix = "clip_4bit"
--------------------------------------------------------------------------------
/configs/rebuttal_quant/co3d_4bit_density.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 | LitPlenoxel.quantize_density = True
28 | LitPlenoxel.quant_bit = 4
29 |
30 |
31 | ResampleCallBack.upsamp_every = 25600
32 |
33 | run.max_steps = 76800
34 | run.save_last_only = True
35 | run.proj_name = "co3d"
36 | run.dataset_name = "co3d"
37 | run.datadir = "data/co3d"
38 | run.logbase = "logs"
39 | run.log_every_n_steps = 100
40 | run.model_name = "plenoxel"
41 | run.seed = 0
42 | run.progressbar_refresh_rate = 100
43 | run.postfix = "density_4bit"
--------------------------------------------------------------------------------
/configs/rebuttal_quant/co3d_8bit_clip.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 | LitPlenoxel.clip_quant = True
28 |
29 | ResampleCallBack.upsamp_every = 25600
30 |
31 | run.max_steps = 76800
32 | run.save_last_only = True
33 | run.proj_name = "co3d"
34 | run.dataset_name = "co3d"
35 | run.datadir = "data/co3d"
36 | run.logbase = "logs"
37 | run.log_every_n_steps = 100
38 | run.model_name = "plenoxel"
39 | run.seed = 0
40 | run.progressbar_refresh_rate = 100
41 | run.postfix = "clip_8bit"
--------------------------------------------------------------------------------
/configs/rebuttal_quant/co3d_8bit_density.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 | LitPlenoxel.quantize_density = True
28 |
29 |
30 | ResampleCallBack.upsamp_every = 25600
31 |
32 | run.max_steps = 76800
33 | run.save_last_only = True
34 | run.proj_name = "co3d"
35 | run.dataset_name = "co3d"
36 | run.datadir = "data/co3d"
37 | run.logbase = "logs"
38 | run.log_every_n_steps = 100
39 | run.model_name = "plenoxel"
40 | run.seed = 0
41 | run.progressbar_refresh_rate = 100
42 | run.postfix = "density_8bit"
--------------------------------------------------------------------------------
/configs/rebuttal_reso/co3d_128.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[64, 64, 64], [128, 128, 128]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "128"
--------------------------------------------------------------------------------
/configs/rebuttal_reso/co3d_256.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "256"
--------------------------------------------------------------------------------
/configs/rebuttal_reso/co3d_384.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[96, 96, 96], [192, 192, 192], [384, 384, 384]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "384"
--------------------------------------------------------------------------------
/configs/rebuttal_reso/co3d_64.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[32, 32, 32], [64, 64, 64]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 20.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "64"
--------------------------------------------------------------------------------
/configs/rebuttal_sigma_weight/co3d_sigma_10.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 10.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "sigma10"
--------------------------------------------------------------------------------
/configs/rebuttal_sigma_weight/co3d_sigma_100.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 100.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "sigma100"
--------------------------------------------------------------------------------
/configs/rebuttal_sigma_weight/co3d_sigma_5.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.density_thresh = 5.0
15 | LitPlenoxel.thresh_type = "sigma"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "sigma5"
--------------------------------------------------------------------------------
/configs/rebuttal_sigma_weight/co3d_weight_1.28.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.weight_thresh = 1.28
15 | LitPlenoxel.thresh_type = "weight"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "weight1.28"
--------------------------------------------------------------------------------
/configs/rebuttal_sigma_weight/co3d_weight_2.56.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.weight_thresh = 2.56
15 | LitPlenoxel.thresh_type = "weight"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 | LitPlenoxel.quantize = True
27 |
28 | ResampleCallBack.upsamp_every = 25600
29 |
30 | run.max_steps = 76800
31 | run.save_last_only = True
32 | run.proj_name = "co3d"
33 | run.dataset_name = "co3d"
34 | run.datadir = "data/co3d"
35 | run.logbase = "logs"
36 | run.log_every_n_steps = 100
37 | run.model_name = "plenoxel"
38 | run.seed = 0
39 | run.progressbar_refresh_rate = 100
40 | run.postfix = "weight2.56"
--------------------------------------------------------------------------------
/configs/rebuttal_sigma_weight/co3d_weight_none.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitPlenoxel.reso = [[128, 128, 128], [256, 256, 256]]
8 | LitPlenoxel.background_nlayers = 16
9 | LitPlenoxel.background_reso = 512
10 | LitPlenoxel.lr_sigma = 3.0e+1
11 | LitPlenoxel.lr_sh = 1.0e-2
12 | LitPlenoxel.lr_sigma_delay_steps = 0
13 | LitPlenoxel.lr_fg_begin_step = 1000
14 | LitPlenoxel.weight_thresh = -10000000
15 | LitPlenoxel.thresh_type = "weight"
16 | LitPlenoxel.near_clip = 0.35
17 | LitPlenoxel.lambda_tv = 5.0e-5
18 | LitPlenoxel.lambda_tv_sh = 5.0e-3
19 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
20 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
21 | LitPlenoxel.lambda_beta = 1.0e-5
22 | LitPlenoxel.lambda_sparsity = 1.0e-10
23 | LitPlenoxel.background_brightness = 0.5
24 | LitPlenoxel.tv_early_only = 0
25 | LitPlenoxel.tv_decay = 0.5
26 |
27 | ResampleCallBack.upsamp_every = 25600
28 |
29 | run.max_steps = 76800
30 | run.save_last_only = True
31 | run.proj_name = "co3d"
32 | run.dataset_name = "co3d"
33 | run.datadir = "data/co3d"
34 | run.logbase = "logs"
35 | run.log_every_n_steps = 100
36 | run.model_name = "plenoxel"
37 | run.seed = 0
38 | run.progressbar_refresh_rate = 100
39 | run.postfix = "none"
--------------------------------------------------------------------------------
/configs/scannet.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitDataScannet.frame_skip = 1
8 | LitDataScannet.max_frame = 1500
9 | LitDataScannet.max_image_dim = 640
10 | LitDataScannet.cam_scale_factor = 1.0
11 | LitDataScannet.blur_thresh = 10
12 | LitDataScannet.use_depth = True
13 | LitDataScannet.use_scans = True
14 |
15 | LitPlenoxel.reso = [[256, 256, 256], [256, 256, 256]]
16 | LitPlenoxel.background_nlayers = 0
17 | LitPlenoxel.background_reso = 512
18 | LitPlenoxel.lr_sigma = 3.0e+1
19 | LitPlenoxel.lr_sh = 1.0e-2
20 | LitPlenoxel.lr_sigma_delay_steps = 0
21 | LitPlenoxel.lr_fg_begin_step = 1000
22 | LitPlenoxel.density_thresh = 5.0
23 | LitPlenoxel.thresh_type = "sigma"
24 | LitPlenoxel.near_clip = 0.35
25 | LitPlenoxel.lambda_tv = 5.0e-5
26 | LitPlenoxel.lambda_tv_sh = 5.0e-3
27 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
28 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
29 | LitPlenoxel.lambda_beta = 1.0e-5
30 | LitPlenoxel.lambda_sparsity = 1.0e-10
31 | LitPlenoxel.background_brightness = 0.5
32 | LitPlenoxel.tv_early_only = 0
33 | LitPlenoxel.tv_decay = 0.5
34 | LitPlenoxel.quantize = True
35 | LitPlenoxel.init_grid_with_pcd=True
36 |
37 | ResampleCallBack.upsamp_every = 25600
38 |
39 | run.max_steps = 51200
40 | run.save_last_only = True
41 | run.proj_name = "scannet"
42 | run.dataset_name = "scannet"
43 | run.datadir = "data/scannet"
44 | run.logbase = "logs"
45 | run.log_every_n_steps = 100
46 | run.model_name = "plenoxel"
47 | run.seed = 0
48 | run.progressbar_refresh_rate = 100
49 | run.check_val_every_n_epoch = 999 # no validation
50 | run.run_render = False
--------------------------------------------------------------------------------
/configs/scannet_depth.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitDataScannet.frame_skip = 1
8 | LitDataScannet.max_frame = 1500
9 | LitDataScannet.max_image_dim = 640
10 | LitDataScannet.cam_scale_factor = 1.0
11 | LitDataScannet.blur_thresh = 10
12 | LitDataScannet.use_depth = True
13 | LitDataScannet.use_scans = True
14 | LitDataScannet.pcd_name = 'tsdf_pcd_0.015.pcd'
15 |
16 | LitPlenoxel.reso = [[256, 256, 256]]
17 | LitPlenoxel.background_nlayers = 0
18 | LitPlenoxel.background_reso = 512
19 | LitPlenoxel.lr_sigma = 3.0e+1
20 | LitPlenoxel.lr_sh = 1.0e-2
21 | LitPlenoxel.lr_sigma_delay_steps = 0
22 | LitPlenoxel.lr_fg_begin_step = 1000
23 | LitPlenoxel.density_thresh = 5.0
24 | LitPlenoxel.thresh_type = "sigma"
25 | LitPlenoxel.near_clip = 0.35
26 | LitPlenoxel.lambda_tv = 5.0e-5
27 | LitPlenoxel.lambda_tv_sh = 5.0e-3
28 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
29 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
30 | LitPlenoxel.lambda_beta = 1.0e-5
31 | LitPlenoxel.lambda_sparsity = 1.0e-10
32 | LitPlenoxel.background_brightness = 0.5
33 | LitPlenoxel.tv_early_only = 0
34 | LitPlenoxel.tv_decay = 0.5
35 | LitPlenoxel.quantize = True
36 |
37 | ResampleCallBack.upsamp_every = 25600
38 |
39 | run.max_steps = 51200
40 | run.save_last_only = True
41 | run.proj_name = "scannet"
42 | run.dataset_name = "scannet"
43 | run.datadir = "data/scannet_extracted"
44 | run.logbase = "logs/depth_reso256_pad1"
45 | run.log_every_n_steps = 100
46 | run.model_name = "plenoxel"
47 | run.seed = 0
48 | run.progressbar_refresh_rate = 100
49 | run.check_val_every_n_epoch = 999 # no validation
50 | run.run_render = False
51 |
--------------------------------------------------------------------------------
/configs/scannet_tsdf.gin:
--------------------------------------------------------------------------------
1 | LitData.batch_sampler = "all_images"
2 | LitData.epoch_size = 25600
3 | LitData.batch_size = 5000
4 | LitData.chunk = 10000
5 | LitData.use_pixel_centers = True
6 |
7 | LitDataScannet.frame_skip = 1
8 | LitDataScannet.max_frame = 1500
9 | LitDataScannet.max_image_dim = 640
10 | LitDataScannet.cam_scale_factor = 1.0
11 | LitDataScannet.blur_thresh = 10
12 | LitDataScannet.use_depth = False
13 | LitDataScannet.use_scans = True
14 | LitDataScannet.pcd_name = 'tsdf_pcd_512.npy'
15 |
16 | LitPlenoxel.reso = [[256, 256, 256]]
17 | LitPlenoxel.background_nlayers = 0
18 | LitPlenoxel.background_reso = 512
19 | LitPlenoxel.lr_sigma = 3.0e+1
20 | LitPlenoxel.lr_sh = 1.0e-2
21 | LitPlenoxel.lr_sigma_delay_steps = 0
22 | LitPlenoxel.lr_fg_begin_step = 1000
23 | LitPlenoxel.density_thresh = 5.0
24 | LitPlenoxel.thresh_type = "sigma"
25 | LitPlenoxel.near_clip = 0.35
26 | LitPlenoxel.lambda_tv = 5.0e-5
27 | LitPlenoxel.lambda_tv_sh = 5.0e-3
28 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
29 | LitPlenoxel.lambda_tv_background_color = 1.0e-3
30 | LitPlenoxel.lambda_beta = 1.0e-5
31 | LitPlenoxel.lambda_sparsity = 1.0e-10
32 | LitPlenoxel.background_brightness = 0.5
33 | LitPlenoxel.tv_early_only = 0
34 | LitPlenoxel.tv_decay = 0.5
35 | LitPlenoxel.quantize = True
36 |
37 | ResampleCallBack.upsamp_every = 25600
38 |
39 | run.max_steps = 51200
40 | run.save_last_only = True
41 | run.proj_name = "scannet"
42 | run.dataset_name = "scannet"
43 | run.datadir = "data/scannet_extracted"
44 | run.logbase = "logs"
45 | run.log_every_n_steps = 100
46 | run.model_name = "plenoxel"
47 | run.seed = 0
48 | run.progressbar_refresh_rate = 100
49 | run.check_val_every_n_epoch = 999 # no validation
50 | run.run_render = False
--------------------------------------------------------------------------------
/configs/scannet_tsdf_reso1024_pad3.gin:
--------------------------------------------------------------------------------
1 | include "./configs/scannet_tsdf.gin"
2 |
3 | LitPlenoxel.reso = [[1024, 1024, 1024]]
4 | LitPlenoxel.upsample_stride = 3
5 |
6 | run.logbase = "logs/tsdf_reso1024_pad3"
7 |
--------------------------------------------------------------------------------
/configs/scannet_tsdf_reso256_pad1.gin:
--------------------------------------------------------------------------------
1 | include "./configs/scannet_tsdf.gin"
2 |
3 | LitPlenoxel.reso = [[256, 256, 256]]
4 | LitPlenoxel.upsample_stride = 1
5 |
6 | run.logbase = "logs/tsdf_reso256_pad1"
--------------------------------------------------------------------------------
/configs/scannet_tsdf_reso256_pad2.gin:
--------------------------------------------------------------------------------
1 | include "./configs/scannet_tsdf.gin"
2 |
3 | LitPlenoxel.reso = [[256, 256, 256]]
4 | LitPlenoxel.upsample_stride = 2
5 |
6 | run.logbase = "logs/tsdf_reso256_pad2"
--------------------------------------------------------------------------------
/configs/scannet_tsdf_reso512_pad1.gin:
--------------------------------------------------------------------------------
1 | include "./configs/scannet_tsdf.gin"
2 |
3 | LitPlenoxel.reso = [[512, 512, 512]]
4 | LitPlenoxel.upsample_stride = 1
5 |
6 | run.logbase = "logs/tsdf_reso512_pad1"
--------------------------------------------------------------------------------
/configs/scannet_tsdf_reso512_pad2.gin:
--------------------------------------------------------------------------------
1 | include "./configs/scannet_tsdf.gin"
2 |
3 | LitPlenoxel.reso = [[512, 512, 512]]
4 | LitPlenoxel.upsample_stride = 2
5 |
6 | run.logbase = "logs/tsdf_reso512_pad2"
--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/data/.gitkeep
--------------------------------------------------------------------------------
/dataloader/co3d_lists/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/POSTECH-CVLab/PeRFception/a3c34d4997ce6807a5b4e3debe086238cc951e61/dataloader/co3d_lists/.gitkeep
--------------------------------------------------------------------------------
/dataloader/data_util/co3d.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import gzip
3 | import json
4 | import os
5 |
6 | import cv2
7 | import numpy as np
8 | import scipy as sp
9 | import gin
10 | import torch
11 |
12 | from dataloader.random_pose import random_pose, pose_interp
13 | from dataloader.spherical_poses import spherical_poses
14 |
15 |
16 | def find_files(dir, exts):
17 | if os.path.isdir(dir):
18 | files_grabbed = []
19 | for ext in exts:
20 | files_grabbed.extend(glob.glob(os.path.join(dir, ext)))
21 | if len(files_grabbed) > 0:
22 | files_grabbed = sorted(files_grabbed)
23 | return files_grabbed
24 | else:
25 | return []
26 |
27 |
28 | def similarity_from_cameras(c2w, fix_rot=False):
29 | """
30 | Get a similarity transform to normalize dataset
31 | from c2w (OpenCV convention) cameras
32 | :param c2w: (N, 4)
33 | :return T (4,4) , scale (float)
34 | """
35 | t = c2w[:, :3, 3]
36 | R = c2w[:, :3, :3]
37 |
38 | # (1) Rotate the world so that z+ is the up axis
39 | # we estimate the up axis by averaging the camera up axes
40 | ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1)
41 | world_up = np.mean(ups, axis=0)
42 | world_up /= np.linalg.norm(world_up)
43 |
44 | up_camspace = np.array([0.0, -1.0, 0.0])
45 | c = (up_camspace * world_up).sum()
46 | cross = np.cross(world_up, up_camspace)
47 | skew = np.array(
48 | [
49 | [0.0, -cross[2], cross[1]],
50 | [cross[2], 0.0, -cross[0]],
51 | [-cross[1], cross[0], 0.0],
52 | ]
53 | )
54 | if c > -1:
55 | R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c)
56 | else:
57 | # In the unlikely case the original data has y+ up axis,
58 | # rotate 180-deg about x axis
59 | R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
60 |
61 | if fix_rot:
62 | R_align = np.eye(3)
63 | R = np.eye(3)
64 | else:
65 | R = R_align @ R
66 | fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1)
67 | t = (R_align @ t[..., None])[..., 0]
68 |
69 | # (2) Recenter the scene using camera center rays
70 | # find the closest point to the origin for each camera's center ray
71 | nearest = t + (fwds * -t).sum(-1)[:, None] * fwds
72 |
73 | # median for more robustness
74 | translate = -np.median(nearest, axis=0)
75 |
76 | # translate = -np.mean(t, axis=0) # DEBUG
77 |
78 | transform = np.eye(4)
79 | transform[:3, 3] = translate
80 | transform[:3, :3] = R_align
81 |
82 | # (3) Rescale the scene using camera distances
83 | scale = 1.0 / np.median(np.linalg.norm(t + translate, axis=-1))
84 | return transform, scale
85 |
86 |
87 | @gin.configurable()
88 | def load_co3d_data(
89 | datadir: str,
90 | scene_name: str,
91 | max_image_dim: int,
92 | cam_scale_factor: float,
93 | render_scene_interp: bool = False,
94 | render_random_pose: bool = True,
95 | interp_fac: int = 5,
96 | v2_mode: bool = False
97 | ):
98 |
99 | with open("dataloader/co3d_lists/co3d_list.json") as fp:
100 | co3d_lists = json.load(fp)
101 |
102 | datadir = datadir.rstrip("/")
103 | cls_name = co3d_lists[scene_name]
104 | basedir = os.path.join(datadir, cls_name, scene_name)
105 | cam_trans = np.diag(np.array([-1, -1, 1, 1], dtype=np.float32))
106 |
107 | scene_number = basedir.split("/")[-1]
108 |
109 | json_path = os.path.join(basedir, "..", "frame_annotations.jgz")
110 | with gzip.open(json_path, "r") as fp:
111 | all_frames_data = json.load(fp)
112 |
113 | frame_data, images, intrinsics, extrinsics, image_sizes = [], [], [], [], []
114 |
115 | for temporal_data in all_frames_data:
116 | if temporal_data["sequence_name"] == scene_number:
117 | frame_data.append(temporal_data)
118 |
119 | used = []
120 | for (i, frame) in enumerate(frame_data):
121 | img = cv2.imread(os.path.join(datadir, frame["image"]["path"]))
122 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
123 |
124 | H, W = frame["image"]["size"]
125 | max_hw = max(H, W)
126 | approx_scale = max_image_dim / max_hw
127 |
128 | if approx_scale < 1.0:
129 | H2 = int(approx_scale * H)
130 | W2 = int(approx_scale * W)
131 | img = cv2.resize(img, (W2, H2), interpolation=cv2.INTER_AREA)
132 | else:
133 | H2 = H
134 | W2 = W
135 |
136 | image_size = np.array([H2, W2])
137 | fxy = np.array(frame["viewpoint"]["focal_length"])
138 | cxy = np.array(frame["viewpoint"]["principal_point"])
139 | R = np.array(frame["viewpoint"]["R"])
140 | T = np.array(frame["viewpoint"]["T"])
141 |
142 | if v2_mode:
143 | min_HW = min(W2, H2)
144 | image_size_half = np.array([W2 * 0.5, H2 * 0.5], dtype=np.float32)
145 | scale_arr = np.array([min_HW * 0.5, min_HW * 0.5], dtype=np.float32)
146 | fxy_x = fxy * scale_arr
147 | prp_x = np.array([W2 * 0.5, H2 * 0.5], dtype=np.float32) - cxy * scale_arr
148 | cxy = (image_size_half - prp_x) / image_size_half
149 | fxy = fxy_x / image_size_half
150 |
151 | scale_arr = np.array([W2 * 0.5, H2 * 0.5], dtype=np.float32)
152 | focal = fxy * scale_arr
153 | prp = -1.0 * (cxy - 1.0) * scale_arr
154 |
155 | pose = np.eye(4)
156 | pose[:3, :3] = R
157 | pose[:3, 3:] = -R @ T[..., None]
158 | pose = pose @ cam_trans
159 | intrinsic = np.array(
160 | [
161 | [focal[0], 0.0, prp[0], 0.0],
162 | [0.0, focal[1], prp[1], 0.0],
163 | [0.0, 0.0, 1.0, 0.0],
164 | [0.0, 0.0, 0.0, 1.0],
165 | ]
166 | )
167 |
168 | if any([np.all(pose == _pose) for _pose in extrinsics]):
169 | continue
170 |
171 | used.append(i)
172 | image_sizes.append(image_size)
173 | intrinsics.append(intrinsic)
174 | extrinsics.append(pose)
175 | images.append(img)
176 |
177 | intrinsics = np.stack(intrinsics)
178 | extrinsics = np.stack(extrinsics)
179 | image_sizes = np.stack(image_sizes)
180 |
181 | H_median, W_median = np.median(
182 | np.stack([image_size for image_size in image_sizes]), axis=0
183 | )
184 |
185 | H_inlier = np.abs(image_sizes[:, 0] - H_median) / H_median < 0.1
186 | W_inlier = np.abs(image_sizes[:, 1] - W_median) / W_median < 0.1
187 | inlier = np.logical_and(H_inlier, W_inlier)
188 | dists = np.linalg.norm(
189 | extrinsics[:, :3, 3] - np.median(extrinsics[:, :3, 3], axis=0), axis=-1
190 | )
191 | med = np.median(dists)
192 | good_mask = dists < (med * 5.0)
193 | inlier = np.logical_and(inlier, good_mask)
194 |
195 | if inlier.sum() != 0:
196 | intrinsics = intrinsics[inlier]
197 | extrinsics = extrinsics[inlier]
198 | image_sizes = image_sizes[inlier]
199 | images = [images[i] for i in range(len(inlier)) if inlier[i]]
200 |
201 | extrinsics = np.stack(extrinsics)
202 | T, sscale = similarity_from_cameras(extrinsics)
203 | extrinsics = T @ extrinsics
204 |
205 | extrinsics[:, :3, 3] *= sscale * cam_scale_factor
206 |
207 | num_frames = len(extrinsics)
208 |
209 | i_all = np.arange(num_frames)
210 | i_test = i_all[::10]
211 | i_val = i_test
212 | i_train = np.array([i for i in i_all if not i in i_test])
213 | i_split = (i_train, i_val, i_test, i_all)
214 |
215 | if render_random_pose:
216 | render_poses = random_pose(extrinsics[i_all], 50)
217 | elif render_scene_interp:
218 | render_poses = pose_interp(extrinsics[i_all], interp_fac)
219 | # render_poses = spherical_poses(sscale * cam_scale_factor * np.eye(4))
220 |
221 | near, far = 0., 1.
222 | ndc_coeffs = (-1., -1.)
223 |
224 | label_info = {}
225 | label_info["T"] = T
226 | label_info["sscale"] = sscale * cam_scale_factor
227 | label_info["class_label"] = basedir.rstrip("/").split("/")[-2]
228 | label_info["extrinsics"] = extrinsics
229 | label_info["intrinsics"] = intrinsics
230 | label_info["image_sizes"] = image_sizes
231 |
232 | return (
233 | images,
234 | intrinsics,
235 | extrinsics,
236 | image_sizes,
237 | near,
238 | far,
239 | ndc_coeffs,
240 | i_split,
241 | render_poses,
242 | label_info
243 | )
244 |
--------------------------------------------------------------------------------
/dataloader/data_util/common.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 |
4 | import cc3d
5 | import MinkowskiEngine as ME
6 | import numpy as np
7 | import torch
8 |
9 |
10 | def find_files(dir, exts):
11 | if os.path.isdir(dir):
12 | files_grabbed = []
13 | for ext in exts:
14 | files_grabbed.extend(glob.glob(os.path.join(dir, ext)))
15 | if len(files_grabbed) > 0:
16 | files_grabbed = sorted(files_grabbed)
17 | return files_grabbed
18 | else:
19 | return []
20 |
21 |
22 | def similarity_from_cameras(c2w):
23 | """
24 | Get a similarity transform to normalize dataset
25 | from c2w (OpenCV convention) cameras
26 | :param c2w: (N, 4)
27 | :return T (4,4) , scale (float)
28 | """
29 | t = c2w[:, :3, 3]
30 | R = c2w[:, :3, :3]
31 |
32 | # (1) Rotate the world so that z+ is the up axis
33 | # we estimate the up axis by averaging the camera up axes
34 | ups = np.sum(R * np.array([0, -1.0, 0]), axis=-1)
35 | world_up = np.mean(ups, axis=0)
36 | world_up /= np.linalg.norm(world_up)
37 |
38 | up_camspace = np.array([0.0, -1.0, 0.0])
39 | c = (up_camspace * world_up).sum()
40 | cross = np.cross(world_up, up_camspace)
41 | skew = np.array(
42 | [
43 | [0.0, -cross[2], cross[1]],
44 | [cross[2], 0.0, -cross[0]],
45 | [-cross[1], cross[0], 0.0],
46 | ]
47 | )
48 | if c > -1:
49 | R_align = np.eye(3) + skew + (skew @ skew) * 1 / (1 + c)
50 | else:
51 | # In the unlikely case the original data has y+ up axis,
52 | # rotate 180-deg about x axis
53 | R_align = np.array([[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
54 |
55 | R = R_align @ R
56 | fwds = np.sum(R * np.array([0, 0.0, 1.0]), axis=-1)
57 | t = (R_align @ t[..., None])[..., 0]
58 |
59 | # (2) Recenter the scene using camera center rays
60 | # find the closest point to the origin for each camera's center ray
61 | nearest = t + (fwds * -t).sum(-1)[:, None] * fwds
62 |
63 | # median for more robustness
64 | translate = -np.median(nearest, axis=0)
65 |
66 | # translate = -np.mean(t, axis=0) # DEBUG
67 |
68 | transform = np.eye(4)
69 | transform[:3, 3] = translate
70 | transform[:3, :3] = R_align
71 |
72 | # (3) Rescale the scene using camera distances
73 | scale = 1.0 / np.median(np.linalg.norm(t + translate, axis=-1))
74 | return transform, scale
75 |
76 |
77 | def connected_component_filter(xyz, voxel_size):
78 | svoxel, idx, idx_inverse = ME.utils.sparse_quantize(
79 | xyz / voxel_size, return_index=True, return_inverse=True
80 | )
81 | svoxel -= svoxel.min(0, keepdim=True).values
82 | svoxel = svoxel.long()
83 | dvoxel = torch.zeros((svoxel + 1).max(0).values.tolist())
84 | dvoxel[svoxel[:, 0], svoxel[:, 1], svoxel[:, 2]] = 1
85 | labels_out = cc3d.connected_components(dvoxel.numpy(), connectivity=26)
86 | labels_out = labels_out[svoxel[:, 0], svoxel[:, 1], svoxel[:, 2]]
87 | counts = np.bincount(labels_out)
88 | argmax = np.argmax(counts)
89 | labels_inverse = labels_out[idx_inverse]
90 | sel = labels_inverse == argmax
91 | print(
92 | f">>>> connected component filtering, from {xyz.shape[0]} to {sel.sum()} <<<<"
93 | )
94 | return sel
95 |
--------------------------------------------------------------------------------
/dataloader/litdata.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import gin
4 | import numpy as np
5 |
6 | from dataloader.data_util.co3d import load_co3d_data
7 | from dataloader.data_util.scannet import load_scannet_data, load_scannet_data_ext
8 | from dataloader.interface import LitData
9 |
10 |
11 | @gin.configurable()
12 | class LitDataCo3D(LitData):
13 | def __init__(
14 | self,
15 | datadir: str,
16 | scene_name: str,
17 | accelerator: bool,
18 | num_gpus: int,
19 | num_tpus: int,
20 | # Co3D specific arguments
21 | max_image_dim: int = 800,
22 | cam_scale_factor: float = 1.50,
23 | ):
24 | (
25 | self.images,
26 | self.intrinsics,
27 | self.extrinsics,
28 | self.image_sizes,
29 | self.near,
30 | self.far,
31 | self.ndc_coeffs,
32 | (self.i_train, self.i_val, self.i_test, self.i_all),
33 | self.render_poses,
34 | self.label_info,
35 | ) = load_co3d_data(
36 | datadir=datadir,
37 | scene_name=scene_name,
38 | max_image_dim=max_image_dim,
39 | cam_scale_factor=cam_scale_factor,
40 | )
41 |
42 | self.render_scale = 300 / max(self.image_sizes[0][0], self.image_sizes[0][1])
43 |
44 | super(LitDataCo3D, self).__init__(
45 | datadir=datadir,
46 | accelerator=accelerator,
47 | num_gpus=num_gpus,
48 | num_tpus=num_tpus,
49 | )
50 |
51 |
52 | @gin.configurable()
53 | class LitDataScannet(LitData):
54 | def __init__(
55 | self,
56 | datadir: str,
57 | scene_name: str,
58 | accelerator: bool,
59 | num_gpus: int,
60 | num_tpus: int,
61 | # scannet specific arguments
62 | frame_skip: int = 1,
63 | max_frame: int = 1500,
64 | max_image_dim: int = 800,
65 | cam_scale_factor: float = 1.50,
66 | use_depth: bool = True,
67 | use_scans: bool = True,
68 | blur_thresh: float = 10.0,
69 | pcd_name: str = "tsdf_pcd.pcd",
70 | ):
71 | super(LitDataScannet, self).__init__(
72 | datadir=datadir,
73 | accelerator=accelerator,
74 | num_gpus=num_gpus,
75 | num_tpus=num_tpus,
76 | )
77 |
78 | (
79 | images,
80 | extrinsics,
81 | render_poses,
82 | (h, w),
83 | intrinsics,
84 | i_split,
85 | depths,
86 | trans_info,
87 | ) = load_scannet_data_ext(
88 | os.path.join(datadir, scene_name),
89 | cam_scale_factor=cam_scale_factor,
90 | frame_skip=frame_skip,
91 | max_frame=max_frame,
92 | max_image_dim=max_image_dim,
93 | blur_thresh=blur_thresh,
94 | use_depth=use_depth,
95 | pcd_name=pcd_name,
96 | )
97 | i_train, i_val, i_test = i_split
98 |
99 | print(f"loaded scannet, image with size: {h} * {w}")
100 | self.scene_name = scene_name
101 | self.images = images
102 | self.intrinsics = intrinsics.reshape(-1, 4, 4).repeat(len(images), axis=0)
103 | self.extrinsics = extrinsics
104 | self.image_sizes = np.array([h, w]).reshape(1, 2).repeat(len(images), axis=0)
105 | self.near = 0.0
106 | self.far = 1.0
107 | self.ndc_coeffs = (-1.0, -1.0)
108 | self.i_train, self.i_val, self.i_test = i_train, i_val, i_test
109 | self.i_all = np.arange(len(images))
110 | self.render_poses = render_poses
111 | self.trans_info = trans_info
112 | self.use_sphere_bound = False
113 |
--------------------------------------------------------------------------------
/dataloader/random_pose.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def random_pose(poses, num_frames=50):
5 |
6 | rot_diff = np.einsum("ilk, jlm -> ijkm", poses[:, :3, :3], poses[:, :3, :3])
7 | rot_angle = (
8 | np.arccos(
9 | np.clip(
10 | (rot_diff[:, :, 0, 0] + rot_diff[:, :, 1, 1] + rot_diff[:, :, 2, 2] - 1)
11 | / 2,
12 | -1.0,
13 | 1.0,
14 | )
15 | )
16 | / np.pi
17 | * 180
18 | )
19 | ignore_self = np.logical_not(np.eye(len(rot_diff), dtype=np.bool))
20 |
21 | trans_mask = (
22 | np.linalg.norm(poses[:, None, :3, 3] - poses[None, :, :3, 3], axis=-1) < 0.5
23 | )
24 | rot_idx = np.where(
25 | np.logical_and(trans_mask, np.logical_and(rot_angle < 40, ignore_self))
26 | )
27 | n_candidates = len(rot_idx[0])
28 | ret = np.zeros((num_frames, 4, 4))
29 | indices = np.random.choice(n_candidates, num_frames, replace=True)
30 | t = np.random.rand(num_frames)
31 | axis, angle = R_to_axis_angle(rot_diff[rot_idx[0][indices], rot_idx[1][indices]])
32 | angle = angle * t
33 | pose_rot = R_axis_angle(angle, axis)
34 |
35 | trans_t = (
36 | t[:, None] * poses[rot_idx[0][indices], :3, 3]
37 | + (1 - t)[:, None] * poses[rot_idx[1][indices], :3, 3]
38 | )
39 | ret[:, :3, :3] = np.einsum(
40 | "ijk, ikl -> ijl", poses[rot_idx[0][indices], :3, :3], pose_rot
41 | )
42 | ret[:, :3, 3] = trans_t
43 | ret[:, 3, 3] = 1.0
44 |
45 | return ret
46 |
47 | def pose_interp(poses, factor):
48 |
49 | pose_list = []
50 | for i in range(len(poses)):
51 | pose_list.append(poses[i])
52 |
53 | if i == len(poses) - 1:
54 | factor = 4 * factor
55 |
56 | next_idx = (i+1) % len(poses)
57 | axis, angle = R_to_axis_angle((poses[next_idx, :3, :3] @ poses[i, :3, :3].T)[None])
58 | for j in range(factor-1):
59 | ret = np.eye(4)
60 | j_fact = (j + 1) / factor
61 | angle_j = angle * j_fact
62 | pose_rot = R_axis_angle(angle_j, axis)
63 | ret[:3, :3] = pose_rot @ poses[i, :3, :3]
64 | trans_t = (
65 | (1 - j_fact) * poses[i, :3, 3]
66 | + (j_fact) * poses[next_idx, :3, 3]
67 | )
68 | ret[:3, 3] = trans_t
69 | pose_list.append(ret)
70 |
71 | return np.stack(pose_list)
72 |
73 |
74 |
75 | def R_axis_angle(angle, axis):
76 | """Generate the rotation matrix from the axis-angle notation.
77 | Conversion equations
78 | ====================
79 | From Wikipedia (http://en.wikipedia.org/wiki/Rotation_matrix), the conversion is given by::
80 | c = cos(angle); s = sin(angle); C = 1-c
81 | xs = x*s; ys = y*s; zs = z*s
82 | xC = x*C; yC = y*C; zC = z*C
83 | xyC = x*yC; yzC = y*zC; zxC = z*xC
84 | [ x*xC+c xyC-zs zxC+ys ]
85 | [ xyC+zs y*yC+c yzC-xs ]
86 | [ zxC-ys yzC+xs z*zC+c ]
87 | @param matrix: The 3x3 rotation matrix to update.
88 | @type matrix: 3x3 numpy array
89 | @param axis: The 3D rotation axis.
90 | @type axis: numpy array, len 3
91 | @param angle: The rotation angle.
92 | @type angle: float
93 | """
94 | len_angle = len(angle)
95 | matrix = np.zeros((len_angle, 3, 3))
96 |
97 | # Trig factors.
98 | ca = np.cos(angle)
99 | sa = np.sin(angle)
100 | C = 1 - ca
101 |
102 | # Depack the axis.
103 | x, y, z = axis[:, 0], axis[:, 1], axis[:, 2]
104 |
105 | # Multiplications (to remove duplicate calculations).
106 | xs = x * sa
107 | ys = y * sa
108 | zs = z * sa
109 | xC = x * C
110 | yC = y * C
111 | zC = z * C
112 | xyC = x * yC
113 | yzC = y * zC
114 | zxC = z * xC
115 |
116 | # Update the rotation matrix.
117 | matrix[:, 0, 0] = x * xC + ca
118 | matrix[:, 0, 1] = xyC - zs
119 | matrix[:, 0, 2] = zxC + ys
120 | matrix[:, 1, 0] = xyC + zs
121 | matrix[:, 1, 1] = y * yC + ca
122 | matrix[:, 1, 2] = yzC - xs
123 | matrix[:, 2, 0] = zxC - ys
124 | matrix[:, 2, 1] = yzC + xs
125 | matrix[:, 2, 2] = z * zC + ca
126 |
127 | return matrix
128 |
129 |
130 | def R_to_axis_angle(matrix):
131 | """Convert the rotation matrix into the axis-angle notation.
132 | Conversion equations
133 | ====================
134 | From Wikipedia (http://en.wikipedia.org/wiki/Rotation_matrix), the conversion is given by::
135 | x = Qzy-Qyz
136 | y = Qxz-Qzx
137 | z = Qyx-Qxy
138 | r = hypot(x,hypot(y,z))
139 | t = Qxx+Qyy+Qzz
140 | theta = atan2(r,t-1)
141 | @param matrix: The 3x3 rotation matrix to update.
142 | @type matrix: 3x3 numpy array
143 | @return: The 3D rotation axis and angle.
144 | @rtype: numpy 3D rank-1 array, float
145 | """
146 |
147 | # Axes.
148 | len_matrix = len(matrix)
149 | axis = np.zeros((len_matrix, 3))
150 | axis[:, 0] = matrix[:, 2, 1] - matrix[:, 1, 2]
151 | axis[:, 1] = matrix[:, 0, 2] - matrix[:, 2, 0]
152 | axis[:, 2] = matrix[:, 1, 0] - matrix[:, 0, 1]
153 |
154 | # Angle.
155 | r = np.hypot(axis[:, 0], np.hypot(axis[:, 1], axis[:, 2]))
156 | t = matrix[:, 0, 0] + matrix[:, 1, 1] + matrix[:, 2, 2]
157 | theta = np.arctan2(r, t - 1)
158 |
159 | # Normalise the axis.
160 | axis = axis / r[:, None]
161 |
162 | # Return the data.
163 | return axis, theta
164 |
--------------------------------------------------------------------------------
/dataloader/sampler.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from torch.utils.data import Dataset
4 | from torch.utils.data.distributed import DistributedSampler
5 | from torch.utils.data.sampler import SequentialSampler
6 | import torch.distributed as dist
7 |
8 | class DDPSampler(SequentialSampler):
9 |
10 | def __init__(self, batch_size, num_replicas, rank, tpu):
11 | self.data_source=None
12 | self.batch_size = batch_size
13 | self.drop_last=False
14 | ngpus = torch.cuda.device_count()
15 | if ngpus == 1 and not tpu:
16 | rank, num_replicas = 0, 1
17 | else:
18 | if num_replicas is None:
19 | if not dist.is_available():
20 | raise RuntimeError("Requires distributed package to be available")
21 | num_replicas = dist.get_world_size()
22 | if rank is None:
23 | if not dist.is_available():
24 | raise RuntimeError("Requires distributed package to be available")
25 | rank = dist.get_rank()
26 | self.rank = rank
27 | self.num_replicas = num_replicas
28 |
29 |
30 | class DDPSequnetialSampler(DDPSampler):
31 |
32 | def __init__(self, batch_size, num_replicas, rank, N_total, tpu):
33 | self.N_total = N_total
34 | super(DDPSequnetialSampler, self).__init__(batch_size, num_replicas, rank, tpu)
35 |
36 | def __iter__(self):
37 | idx_list = np.arange(self.N_total)
38 | return iter(idx_list[self.rank::self.num_replicas])
39 |
40 | def __len__(self):
41 | return int(np.ceil(self.N_total / self.num_replicas))
42 |
43 |
44 | class SingleImageDDPSampler(DDPSampler):
45 |
46 | def __init__(
47 | self,
48 | batch_size,
49 | num_replicas,
50 | rank,
51 | N_img,
52 | N_pixels,
53 | epoch_size,
54 | tpu,
55 | precrop,
56 | precrop_steps,
57 | ):
58 | super(SingleImageDDPSampler, self).__init__(batch_size, num_replicas, rank, tpu)
59 | self.N_pixels = N_pixels
60 | self.N_img = N_img
61 | self.epoch_size = epoch_size
62 | self.precrop = precrop
63 | self.precrop_steps = precrop_steps
64 |
65 | def __iter__(self):
66 | image_choice = np.random.choice(
67 | np.arange(self.N_img),
68 | self.epoch_size,
69 | replace=True
70 | )
71 | image_shape = self.N_pixels[image_choice]
72 | if not self.precrop:
73 | idx_choice = [
74 | np.random.choice(np.arange(image_shape[i, 0] * image_shape[i, 1]), self.batch_size)
75 | for i in range(self.epoch_size)
76 | ]
77 | else:
78 | idx_choice = []
79 | h_pick = [
80 | np.random.choice(
81 | np.arange(image_shape[i, 0] // 2), self.batch_size
82 | ) + image_shape[i, 0] // 4 for i in range(self.precrop_steps)
83 | ]
84 | w_pick = [
85 | np.random.choice(
86 | np.arange(image_shape[i, 1] // 2), self.batch_size
87 | ) + image_shape[i, 1] // 4 for i in range(self.precrop_steps)
88 | ]
89 | idx_choice = [h_pick[i] * image_shape[i, 1] + w_pick[i] for i in range(self.precrop_steps)]
90 |
91 | idx_choice += [
92 | np.random.choice(np.arange(image_shape[i, 0] * image_shape[i, 1]), self.batch_size)
93 | for i in range(self.epoch_size - self.precrop_steps)
94 | ]
95 | self.precrop = False
96 |
97 | for ((h, w), image_idx, idx) in zip(image_shape, image_choice, idx_choice):
98 | idx_ret = image_idx * h * w + idx
99 | yield idx_ret[self.rank::self.num_replicas]
100 |
101 | def __len__(self):
102 | return self.epoch_size
103 |
104 |
105 | class MultipleImageDDPSampler(DDPSampler):
106 | def __init__(self, batch_size, num_replicas, rank, total_len, epoch_size, tpu):
107 | super(MultipleImageDDPSampler, self).__init__(batch_size, num_replicas, rank, tpu)
108 | self.total_len = total_len
109 | self.epoch_size = epoch_size
110 |
111 | def __iter__(self):
112 | full_index = np.arange(self.total_len)
113 | indices = [
114 | np.random.choice(full_index, self.batch_size) \
115 | for _ in range(self.epoch_size)
116 | ]
117 | for batch in indices:
118 | yield batch[self.rank::self.num_replicas]
119 |
120 | def __len__(self):
121 | return self.epoch_size
122 |
123 |
124 | class MultipleImageWOReplaceDDPSampler(MultipleImageDDPSampler):
125 |
126 | def __init__(self, batch_size, num_replicas, rank, total_len, epoch_size, tpu):
127 | super(MultipleImageWOReplaceDDPSampler, self).__init__(
128 | batch_size, num_replicas, rank, total_len, epoch_size, tpu
129 | )
130 |
131 | def __iter__(self):
132 | indices = [
133 | np.random.permutation(self.total_len) \
134 | for _ in range(int(
135 | np.ceil(self.epoch_size * self.batch_size / self.total_len)
136 | ))
137 | ]
138 | indices = np.concatenate(indices)[:self.epoch_size * self.batch_size]
139 | indices = indices.reshape(self.epoch_size, self.batch_size)
140 |
141 | for batch in indices:
142 | yield batch[self.rank::self.num_replicas]
143 |
144 | def __len__(self):
145 | return self.epoch_size
146 |
147 |
148 | class RaySet(Dataset):
149 |
150 | def __init__(self, images=None, rays=None):
151 | self.images = images
152 | self.images_exist = self.images is not None
153 | assert rays is not None
154 | rays[:, 1] = rays[:, 1] / np.linalg.norm(rays[:, 1], axis=1)[:, np.newaxis]
155 | self.rays = rays
156 |
157 | self.N = len(rays)
158 |
159 | def __getitem__(self, index):
160 | ret = {"ray": torch.from_numpy(self.rays[index])}
161 | if self.images_exist:
162 | ret["target"] = torch.from_numpy(self.images[index])
163 | return ret
164 |
165 | def __len__(self):
166 | return self.N
167 |
--------------------------------------------------------------------------------
/dataloader/spherical_poses.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | trans_t = lambda t : np.array([
5 | [1,0,0,0],
6 | [0,1,0,0],
7 | [0,0,1,t],
8 | [0,0,0,1]], dtype=np.float32)
9 |
10 | rot_phi = lambda phi : np.array([
11 | [1,0,0,0],
12 | [0,np.cos(phi),-np.sin(phi),0],
13 | [0,np.sin(phi), np.cos(phi),0],
14 | [0,0,0,1]], dtype=np.float32)
15 |
16 | rot_theta = lambda th : np.array([
17 | [np.cos(th),0,-np.sin(th),0],
18 | [0,1,0,0],
19 | [np.sin(th),0, np.cos(th),0],
20 | [0,0,0,1]], dtype=np.float32)
21 |
22 |
23 | def spherical_pose(theta, phi, radius, torch_output=False):
24 | c2w = trans_t(radius)
25 | c2w = rot_phi(phi/180.*np.pi) @ c2w
26 | c2w = rot_theta(theta/180.*np.pi) @ c2w
27 | c2w = np.array([[-1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]], dtype=np.float32) @ c2w
28 | # c2w = np.array([[1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]], dtype=np.float32) @ c2w
29 |
30 | return torch.from_numpy(c2w) if torch_output else c2w
31 |
32 |
33 | def spherical_poses(cam_trans):
34 | return np.stack(
35 | [
36 | spherical_pose(angle, -30.0, 0.5) @ cam_trans
37 | for angle in np.linspace(-180,180,40+1)[:-1]
38 | ], 0
39 | )
--------------------------------------------------------------------------------
/lib/plenoxel/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright 2021 PlenOctree Authors.
2 | #
3 | # Redistribution and use in source and binary forms, with or without
4 | # modification, are permitted provided that the following conditions are met:
5 | #
6 | # 1. Redistributions of source code must retain the above copyright notice,
7 | # this list of conditions and the following disclaimer.
8 | #
9 | # 2. Redistributions in binary form must reproduce the above copyright notice,
10 | # this list of conditions and the following disclaimer in the documentation
11 | # and/or other materials provided with the distribution.
12 | #
13 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
23 | # POSSIBILITY OF SUCH DAMAGE.
24 |
25 | # NOTE: This CMakeLists is for development purposes only
26 | # (To check CUDA compile errors)
27 | # It is NOT necessary to use this for installation. Just use pip install .
28 | cmake_minimum_required( VERSION 3.3 )
29 |
30 | if(NOT CMAKE_BUILD_TYPE)
31 | set(CMAKE_BUILD_TYPE Release)
32 | endif()
33 | if (POLICY CMP0048)
34 | cmake_policy(SET CMP0048 NEW)
35 | endif (POLICY CMP0048)
36 | if (POLICY CMP0069)
37 | cmake_policy(SET CMP0069 NEW)
38 | endif (POLICY CMP0069)
39 | if (POLICY CMP0072)
40 | cmake_policy(SET CMP0072 NEW)
41 | endif (POLICY CMP0072)
42 |
43 | project( svox2 )
44 |
45 | set(CMAKE_CXX_STANDARD 14)
46 | enable_language(CUDA)
47 | message(STATUS "CUDA enabled")
48 | set( CMAKE_CUDA_STANDARD 14 )
49 | set( CMAKE_CUDA_STANDARD_REQUIRED ON)
50 | set( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -Xcudafe \"--display_error_number --diag_suppress=3057 --diag_suppress=3058 --diag_suppress=3059 --diag_suppress=3060\" -lineinfo -arch=sm_75 ")
51 | # -Xptxas=\"-v\"
52 |
53 | set( INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include" )
54 |
55 | if( MSVC )
56 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
57 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT /GLT /Ox")
58 | set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -Xcompiler=\"/MT\"" )
59 | endif()
60 |
61 | file(GLOB SOURCES
62 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
63 | ${CMAKE_CURRENT_SOURCE_DIR}/*.cu)
64 |
65 | find_package(pybind11 REQUIRED)
66 | find_package(Torch REQUIRED)
67 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
68 |
69 | include_directories (${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
70 |
71 | pybind11_add_module(svox2-test SHARED ${SOURCES})
72 | target_link_libraries(svox2-test PRIVATE "${TORCH_LIBRARIES}")
73 | target_include_directories(svox2-test PRIVATE "${INCLUDE_DIR}")
74 |
75 | if (MSVC)
76 | file(GLOB TORCH_DLLS "${TORCH_INSTALL_PREFIX}/lib/*.dll")
77 | add_custom_command(TARGET svox2-test
78 | POST_BUILD
79 | COMMAND ${CMAKE_COMMAND} -E copy_if_different
80 | ${TORCH_DLLS}
81 | $)
82 | endif (MSVC)
83 |
--------------------------------------------------------------------------------
/lib/plenoxel/include/cubemap_util.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "cuda_util.cuh"
3 | #include
4 | #include
5 |
6 | #define _AXIS(x) (x>>1)
7 | #define _ORI(x) (x&1)
8 | #define _FACE(axis, ori) uint8_t((axis << 1) | ori)
9 |
10 | namespace {
11 | namespace device {
12 |
13 | struct CubemapCoord {
14 | uint8_t face;
15 | float uv[2];
16 | };
17 |
18 | struct CubemapLocation {
19 | uint8_t face;
20 | int16_t uv[2];
21 | };
22 |
23 | struct CubemapBilerpQuery {
24 | CubemapLocation ptr[2][2];
25 | float duv[2];
26 | };
27 |
28 | __device__ __inline__ void
29 | invert_cubemap(int u, int v, float r,
30 | int reso,
31 | float* __restrict__ out) {
32 | const float u_norm = (u + 0.5f) / reso * 2 - 1;
33 | const float v_norm = (v + 0.5f) / reso * 2 - 1;
34 | // EAC
35 | const float tx = tanf((M_PI / 4) * u_norm);
36 | const float ty = tanf((M_PI / 4) * v_norm);
37 | const float common = r * rnorm3df(1.f, tx, ty);
38 | out[0] = tx * common;
39 | out[1] = ty * common;
40 | out[2] = common;
41 | }
42 |
43 | __device__ __inline__ void
44 | invert_cubemap_traditional(int u, int v, float r,
45 | int reso,
46 | float* __restrict__ out) {
47 | const float u_norm = (u + 0.5f) / reso * 2 - 1;
48 | const float v_norm = (v + 0.5f) / reso * 2 - 1;
49 | const float common = r * rnorm3df(1.f, u_norm, v_norm);
50 | out[0] = u_norm * common;
51 | out[1] = v_norm * common;
52 | out[2] = common;
53 | }
54 |
55 | __device__ __host__ __inline__ CubemapCoord
56 | dir_to_cubemap_coord(const float* __restrict__ xyz_o,
57 | int face_reso,
58 | bool eac = true) {
59 | float maxv;
60 | int ax;
61 | float xyz[3] = {xyz_o[0], xyz_o[1], xyz_o[2]};
62 | if (fabsf(xyz[0]) >= fabsf(xyz[1]) && fabsf(xyz[0]) >= fabsf(xyz[2])) {
63 | ax = 0; maxv = xyz[0];
64 | } else if (fabsf(xyz[1]) >= fabsf(xyz[2])) {
65 | ax = 1; maxv = xyz[1];
66 | } else {
67 | ax = 2; maxv = xyz[2];
68 | }
69 | const float recip = 1.f / fabsf(maxv);
70 | xyz[0] *= recip;
71 | xyz[1] *= recip;
72 | xyz[2] *= recip;
73 |
74 | if (eac) {
75 | #pragma unroll 3
76 | for (int i = 0; i < 3; ++i) {
77 | xyz[i] = atanf(xyz[i]) * (4 * M_1_PI);
78 | }
79 | }
80 |
81 | CubemapCoord idx;
82 | idx.uv[0] = ((xyz[(ax ^ 1) & 1] + 1) * face_reso - 1) * 0.5;
83 | idx.uv[1] = ((xyz[(ax ^ 2) & 2] + 1) * face_reso - 1) * 0.5;
84 | const int ori = xyz[ax] >= 0;
85 | idx.face = _FACE(ax, ori);
86 |
87 | return idx;
88 | }
89 |
90 | __device__ __host__ __inline__ CubemapBilerpQuery
91 | cubemap_build_query(
92 | const CubemapCoord& idx,
93 | int face_reso) {
94 | const int uv_idx[2] ={ (int)floorf(idx.uv[0]), (int)floorf(idx.uv[1]) };
95 |
96 | bool m[2][2];
97 | m[0][0] = uv_idx[0] < 0;
98 | m[0][1] = uv_idx[0] > face_reso - 2;
99 | m[1][0] = uv_idx[1] < 0;
100 | m[1][1] = uv_idx[1] > face_reso - 2;
101 |
102 | const int face = idx.face;
103 | const int ax = _AXIS(face);
104 | const int ori = _ORI(face);
105 | // if ax is one of {0, 1, 2}, this trick gets the 2
106 | // of {0, 1, 2} other than ax
107 | const int uvd[2] = {((ax ^ 1) & 1), ((ax ^ 2) & 2)};
108 | int uv_ori[2];
109 |
110 | CubemapBilerpQuery result;
111 | result.duv[0] = idx.uv[0] - uv_idx[0];
112 | result.duv[1] = idx.uv[1] - uv_idx[1];
113 |
114 | #pragma unroll 2
115 | for (uv_ori[0] = 0; uv_ori[0] < 2; ++uv_ori[0]) {
116 | #pragma unroll 2
117 | for (uv_ori[1] = 0; uv_ori[1] < 2; ++uv_ori[1]) {
118 | CubemapLocation& nidx = result.ptr[uv_ori[0]][uv_ori[1]];
119 | nidx.face = face;
120 | nidx.uv[0] = uv_idx[0] + uv_ori[0];
121 | nidx.uv[1] = uv_idx[1] + uv_ori[1];
122 |
123 | const bool mu = m[0][uv_ori[0]];
124 | const bool mv = m[1][uv_ori[1]];
125 |
126 | int edge_idx = -1;
127 | if (mu) {
128 | // Crosses edge in u-axis
129 | if (mv) {
130 | // FIXME: deal with corners properly, right now
131 | // just clamps, resulting in a little artifact
132 | // at each cube corner
133 | nidx.uv[0] = min(max(nidx.uv[0], 0), face_reso - 1);
134 | nidx.uv[1] = min(max(nidx.uv[1], 0), face_reso - 1);
135 | } else {
136 | edge_idx = 0;
137 | }
138 | } else if (mv) {
139 | // Crosses edge in v-axis
140 | edge_idx = 1;
141 | }
142 | if (~edge_idx) {
143 | const int nax = uvd[edge_idx];
144 | const int16_t other_coord = nidx.uv[1 - edge_idx];
145 |
146 | // Determine directions in the new face
147 | const int nud = (nax ^ 1) & 1;
148 | // const int nvd = (nax ^ 2) & 2;
149 |
150 | if (nud == ax) {
151 | nidx.uv[0] = ori ? (face_reso - 1) : 0;
152 | nidx.uv[1] = other_coord;
153 | } else {
154 | nidx.uv[0] = other_coord;
155 | nidx.uv[1] = ori ? (face_reso - 1) : 0;
156 | }
157 |
158 | nidx.face = _FACE(nax, uv_ori[edge_idx]);
159 | }
160 | // Interior point: nothing needs to be done
161 |
162 | }
163 | }
164 |
165 | return result;
166 | }
167 |
168 | __device__ __host__ __inline__ float
169 | cubemap_sample(
170 | const float* __restrict__ cubemap, // (6, face_reso, face_reso, n_channels)
171 | const CubemapBilerpQuery& query,
172 | int face_reso,
173 | int n_channels,
174 | int chnl_id) {
175 |
176 | // NOTE: assuming address will fit in int32
177 | const int stride1 = face_reso * n_channels;
178 | const int stride0 = face_reso * stride1;
179 | const CubemapLocation& p00 = query.ptr[0][0];
180 | const float v00 = cubemap[p00.face * stride0 + p00.uv[0] * stride1 + p00.uv[1] * n_channels + chnl_id];
181 | const CubemapLocation& p01 = query.ptr[0][1];
182 | const float v01 = cubemap[p01.face * stride0 + p01.uv[0] * stride1 + p01.uv[1] * n_channels + chnl_id];
183 | const CubemapLocation& p10 = query.ptr[1][0];
184 | const float v10 = cubemap[p10.face * stride0 + p10.uv[0] * stride1 + p10.uv[1] * n_channels + chnl_id];
185 | const CubemapLocation& p11 = query.ptr[1][1];
186 | const float v11 = cubemap[p11.face * stride0 + p11.uv[0] * stride1 + p11.uv[1] * n_channels + chnl_id];
187 |
188 | const float val0 = lerp(v00, v01, query.duv[1]);
189 | const float val1 = lerp(v10, v11, query.duv[1]);
190 |
191 | return lerp(val0, val1, query.duv[0]);
192 | }
193 |
194 | __device__ __inline__ void
195 | cubemap_sample_backward(
196 | float* __restrict__ cubemap_grad, // (6, face_reso, face_reso, n_channels)
197 | const CubemapBilerpQuery& query,
198 | int face_reso,
199 | int n_channels,
200 | float grad_out,
201 | int chnl_id,
202 | bool* __restrict__ mask_out = nullptr) {
203 |
204 | // NOTE: assuming address will fit in int32
205 | const float bu = query.duv[0], bv = query.duv[1];
206 | const float au = 1.f - bu, av = 1.f - bv;
207 |
208 | #define _ADD_CUBEVERT(i, j, val) { \
209 | const CubemapLocation& p00 = query.ptr[i][j]; \
210 | const int idx = (p00.face * face_reso + p00.uv[0]) * face_reso + p00.uv[1]; \
211 | float* __restrict__ v00 = &cubemap_grad[idx * n_channels + chnl_id]; \
212 | atomicAdd(v00, val); \
213 | if (mask_out != nullptr) { \
214 | mask_out[idx] = true; \
215 | } \
216 | }
217 |
218 | _ADD_CUBEVERT(0, 0, au * av * grad_out);
219 | _ADD_CUBEVERT(0, 1, au * bv * grad_out);
220 | _ADD_CUBEVERT(1, 0, bu * av * grad_out);
221 | _ADD_CUBEVERT(1, 1, bu * bv * grad_out);
222 | #undef _ADD_CUBEVERT
223 |
224 | }
225 |
226 | __device__ __host__ __inline__ float
227 | multi_cubemap_sample(
228 | const float* __restrict__ cubemap1, // (6, face_reso, face_reso, n_channels)
229 | const float* __restrict__ cubemap2, // (6, face_reso, face_reso, n_channels)
230 | const CubemapBilerpQuery& query,
231 | float interp_wt,
232 | int face_reso,
233 | int n_channels,
234 | int chnl_id) {
235 | const float val1 = cubemap_sample(cubemap1,
236 | query,
237 | face_reso,
238 | n_channels,
239 | chnl_id);
240 | const float val2 = cubemap_sample(cubemap2,
241 | query,
242 | face_reso,
243 | n_channels,
244 | chnl_id);
245 | return lerp(val1, val2, interp_wt);
246 | }
247 |
248 | __device__ __inline__ void
249 | multi_cubemap_sample_backward(
250 | float* __restrict__ cubemap_grad1, // (6, face_reso, face_reso, n_channels)
251 | float* __restrict__ cubemap_grad2, // (6, face_reso, face_reso, n_channels)
252 | const CubemapBilerpQuery& query,
253 | float interp_wt,
254 | int face_reso,
255 | int n_channels,
256 | float grad_out,
257 | int chnl_id,
258 | bool* __restrict__ mask_out1 = nullptr,
259 | bool* __restrict__ mask_out2 = nullptr) {
260 | if (cubemap_grad1 == nullptr) return;
261 | cubemap_sample_backward(cubemap_grad1,
262 | query,
263 | face_reso,
264 | n_channels,
265 | grad_out * (1.f - interp_wt),
266 | chnl_id,
267 | mask_out1);
268 | cubemap_sample_backward(cubemap_grad2,
269 | query,
270 | face_reso,
271 | n_channels,
272 | grad_out * interp_wt,
273 | chnl_id,
274 | mask_out1 == nullptr ? nullptr : mask_out2);
275 | }
276 |
277 |
278 | } // namespace device
279 | } // namespace
280 |
--------------------------------------------------------------------------------
/lib/plenoxel/include/cuda_util.cuh:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Alex Yu
2 | #pragma once
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include "util.hpp"
8 |
9 |
10 | #define DEVICE_GUARD(_ten) \
11 | const at::cuda::OptionalCUDAGuard device_guard(device_of(_ten));
12 |
13 | #define CUDA_GET_THREAD_ID(tid, Q) const int tid = blockIdx.x * blockDim.x + threadIdx.x; \
14 | if (tid >= Q) return
15 | #define CUDA_GET_THREAD_ID_U64(tid, Q) const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; \
16 | if (tid >= Q) return
17 | #define CUDA_N_BLOCKS_NEEDED(Q, CUDA_N_THREADS) ((Q - 1) / CUDA_N_THREADS + 1)
18 | #define CUDA_CHECK_ERRORS \
19 | cudaError_t err = cudaGetLastError(); \
20 | if (err != cudaSuccess) \
21 | printf("Error in svox2.%s : %s\n", __FUNCTION__, cudaGetErrorString(err))
22 |
23 | #define CUDA_MAX_THREADS at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock
24 |
25 | #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
26 | #else
27 | __device__ inline double atomicAdd(double* address, double val){
28 | unsigned long long int* address_as_ull = (unsigned long long int*)address;
29 | unsigned long long int old = *address_as_ull, assumed;
30 | do {
31 | assumed = old;
32 | old = atomicCAS(address_as_ull, assumed,
33 | __double_as_longlong(val + __longlong_as_double(assumed)));
34 | } while (assumed != old);
35 | return __longlong_as_double(old);
36 | }
37 | #endif
38 |
39 | __device__ inline void atomicMax(float* result, float value){
40 | unsigned* result_as_u = (unsigned*)result;
41 | unsigned old = *result_as_u, assumed;
42 | do {
43 | assumed = old;
44 | old = atomicCAS(result_as_u, assumed,
45 | __float_as_int(fmaxf(value, __int_as_float(assumed))));
46 | } while (old != assumed);
47 | return;
48 | }
49 |
50 | __device__ inline void atomicMax(double* result, double value){
51 | unsigned long long int* result_as_ull = (unsigned long long int*)result;
52 | unsigned long long int old = *result_as_ull, assumed;
53 | do {
54 | assumed = old;
55 | old = atomicCAS(result_as_ull, assumed,
56 | __double_as_longlong(fmaxf(value, __longlong_as_double(assumed))));
57 | } while (old != assumed);
58 | return;
59 | }
60 |
61 | __device__ __inline__ void transform_coord(float* __restrict__ point,
62 | const float* __restrict__ scaling,
63 | const float* __restrict__ offset) {
64 | point[0] = fmaf(point[0], scaling[0], offset[0]); // a*b + c
65 | point[1] = fmaf(point[1], scaling[1], offset[1]); // a*b + c
66 | point[2] = fmaf(point[2], scaling[2], offset[2]); // a*b + c
67 | }
68 |
69 | // Linear interp
70 | // Subtract and fused multiply-add
71 | // (1-w) a + w b
72 | template
73 | __host__ __device__ __inline__ T lerp(T a, T b, T w) {
74 | return fmaf(w, b - a, a);
75 | }
76 |
77 | __device__ __inline__ static float _norm(
78 | const float* __restrict__ dir) {
79 | // return sqrtf(dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]);
80 | return norm3df(dir[0], dir[1], dir[2]);
81 | }
82 |
83 | __device__ __inline__ static float _rnorm(
84 | const float* __restrict__ dir) {
85 | // return 1.f / _norm(dir);
86 | return rnorm3df(dir[0], dir[1], dir[2]);
87 | }
88 |
89 | __host__ __device__ __inline__ static void xsuby3d(
90 | float* __restrict__ x,
91 | const float* __restrict__ y) {
92 | x[0] -= y[0];
93 | x[1] -= y[1];
94 | x[2] -= y[2];
95 | }
96 |
97 | __host__ __device__ __inline__ static float _dot(
98 | const float* __restrict__ x,
99 | const float* __restrict__ y) {
100 | return x[0] * y[0] + x[1] * y[1] + x[2] * y[2];
101 | }
102 |
103 | __host__ __device__ __inline__ static void _cross(
104 | const float* __restrict__ a,
105 | const float* __restrict__ b,
106 | float* __restrict__ out) {
107 | out[0] = a[1] * b[2] - a[2] * b[1];
108 | out[1] = a[2] * b[0] - a[0] * b[2];
109 | out[2] = a[0] * b[1] - a[1] * b[0];
110 | }
111 |
112 | __device__ __inline__ static float _dist_ray_to_origin(
113 | const float* __restrict__ origin,
114 | const float* __restrict__ dir) {
115 | // dir must be unit vector
116 | float tmp[3];
117 | _cross(origin, dir, tmp);
118 | return _norm(tmp);
119 | }
120 |
121 | #define int_div2_ceil(x) ((((x) - 1) >> 1) + 1)
122 |
123 | __host__ __inline__ cudaError_t cuda_assert(
124 | const cudaError_t code, const char* const file,
125 | const int line, const bool abort) {
126 | if (code != cudaSuccess) {
127 | fprintf(stderr, "cuda_assert: %s %s %s %d\n", cudaGetErrorName(code) ,cudaGetErrorString(code),
128 | file, line);
129 |
130 | if (abort) {
131 | cudaDeviceReset();
132 | exit(code);
133 | }
134 | }
135 |
136 | return code;
137 | }
138 |
139 | #define cuda(...) cuda_assert((cuda##__VA_ARGS__), __FILE__, __LINE__, true);
140 |
141 |
--------------------------------------------------------------------------------
/lib/plenoxel/include/data_spec.hpp:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Alex Yu
2 | #pragma once
3 | #include "util.hpp"
4 | #include
5 |
6 | using torch::Tensor;
7 |
8 | enum BasisType {
9 | // For svox 1 compatibility
10 | // BASIS_TYPE_RGBA = 0
11 | BASIS_TYPE_SH = 1,
12 | // BASIS_TYPE_SG = 2
13 | // BASIS_TYPE_ASG = 3
14 | BASIS_TYPE_3D_TEXTURE = 4,
15 | BASIS_TYPE_MLP = 255,
16 | };
17 |
18 | struct SparseGridSpec {
19 | Tensor density_data;
20 | Tensor sh_data;
21 | Tensor links;
22 | Tensor _offset;
23 | Tensor _scaling;
24 |
25 | Tensor background_links;
26 | Tensor background_data;
27 |
28 | int basis_dim;
29 | uint8_t basis_type;
30 | Tensor basis_data;
31 |
32 | inline void check() {
33 | CHECK_INPUT(density_data);
34 | CHECK_INPUT(sh_data);
35 | CHECK_INPUT(links);
36 | if (background_links.defined()) {
37 | CHECK_INPUT(background_links);
38 | CHECK_INPUT(background_data);
39 | TORCH_CHECK(background_links.ndimension() ==
40 | 2); // (H, W) -> [N] \cup {-1}
41 | TORCH_CHECK(background_data.ndimension() == 3); // (N, D, C) -> R
42 | }
43 | if (basis_data.defined()) {
44 | CHECK_INPUT(basis_data);
45 | }
46 | CHECK_CPU_INPUT(_offset);
47 | CHECK_CPU_INPUT(_scaling);
48 | TORCH_CHECK(density_data.ndimension() == 2);
49 | TORCH_CHECK(sh_data.ndimension() == 2);
50 | TORCH_CHECK(links.ndimension() == 3);
51 | }
52 | };
53 |
54 | struct GridOutputGrads {
55 | torch::Tensor grad_density_out;
56 | torch::Tensor grad_sh_out;
57 | torch::Tensor grad_basis_out;
58 | torch::Tensor grad_background_out;
59 |
60 | torch::Tensor mask_out;
61 | torch::Tensor mask_background_out;
62 | inline void check() {
63 | if (grad_density_out.defined()) {
64 | CHECK_INPUT(grad_density_out);
65 | }
66 | if (grad_sh_out.defined()) {
67 | CHECK_INPUT(grad_sh_out);
68 | }
69 | if (grad_basis_out.defined()) {
70 | CHECK_INPUT(grad_basis_out);
71 | }
72 | if (grad_background_out.defined()) {
73 | CHECK_INPUT(grad_background_out);
74 | }
75 | if (mask_out.defined() && mask_out.size(0) > 0) {
76 | CHECK_INPUT(mask_out);
77 | }
78 | if (mask_background_out.defined() && mask_background_out.size(0) > 0) {
79 | CHECK_INPUT(mask_background_out);
80 | }
81 | }
82 | };
83 |
84 | struct CameraSpec {
85 | torch::Tensor c2w;
86 | float fx;
87 | float fy;
88 | float cx;
89 | float cy;
90 | int width;
91 | int height;
92 |
93 | float ndc_coeffx;
94 | float ndc_coeffy;
95 |
96 | inline void check() {
97 | CHECK_INPUT(c2w);
98 | TORCH_CHECK(c2w.is_floating_point());
99 | TORCH_CHECK(c2w.ndimension() == 2);
100 | TORCH_CHECK(c2w.size(1) == 4);
101 | }
102 | };
103 |
104 | struct RaysSpec {
105 | Tensor origins;
106 | Tensor dirs;
107 | inline void check() {
108 | CHECK_INPUT(origins);
109 | CHECK_INPUT(dirs);
110 | TORCH_CHECK(origins.is_floating_point());
111 | TORCH_CHECK(dirs.is_floating_point());
112 | }
113 | };
114 |
115 | struct RenderOptions {
116 | float background_brightness;
117 | // float step_epsilon;
118 | float step_size;
119 | float sigma_thresh;
120 | float stop_thresh;
121 |
122 | float near_clip;
123 | bool use_spheric_clip;
124 |
125 | bool last_sample_opaque;
126 | float mask_transmit_threshold;
127 | };
128 |
--------------------------------------------------------------------------------
/lib/plenoxel/include/data_spec_packed.cuh:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Alex Yu
2 | #pragma once
3 | #include
4 | #include "data_spec.hpp"
5 | #include "cuda_util.cuh"
6 | #include "random_util.cuh"
7 |
8 | namespace {
9 | namespace device {
10 |
11 | struct PackedSparseGridSpec {
12 | PackedSparseGridSpec(SparseGridSpec& spec)
13 | :
14 | density_data(spec.density_data.data_ptr()),
15 | sh_data(spec.sh_data.data_ptr()),
16 | links(spec.links.data_ptr()),
17 | basis_type(spec.basis_type),
18 | basis_data(spec.basis_data.defined() ? spec.basis_data.data_ptr() : nullptr),
19 | background_links(spec.background_links.defined() ?
20 | spec.background_links.data_ptr() :
21 | nullptr),
22 | background_data(spec.background_data.defined() ?
23 | spec.background_data.data_ptr() :
24 | nullptr),
25 | size{(int)spec.links.size(0),
26 | (int)spec.links.size(1),
27 | (int)spec.links.size(2)},
28 | stride_x{(int)spec.links.stride(0)},
29 | background_reso{
30 | spec.background_links.defined() ? (int)spec.background_links.size(1) : 0,
31 | },
32 | background_nlayers{
33 | spec.background_data.defined() ? (int)spec.background_data.size(1) : 0
34 | },
35 | basis_dim(spec.basis_dim),
36 | sh_data_dim((int)spec.sh_data.size(1)),
37 | basis_reso(spec.basis_data.defined() ? spec.basis_data.size(0) : 0),
38 | _offset{spec._offset.data_ptr()[0],
39 | spec._offset.data_ptr()[1],
40 | spec._offset.data_ptr()[2]},
41 | _scaling{spec._scaling.data_ptr()[0],
42 | spec._scaling.data_ptr()[1],
43 | spec._scaling.data_ptr()[2]} {
44 | }
45 |
46 | float* __restrict__ density_data;
47 | float* __restrict__ sh_data;
48 | const int32_t* __restrict__ links;
49 |
50 | const uint8_t basis_type;
51 | float* __restrict__ basis_data;
52 |
53 | const int32_t* __restrict__ background_links;
54 | float* __restrict__ background_data;
55 |
56 | const int size[3], stride_x;
57 | const int background_reso, background_nlayers;
58 |
59 | const int basis_dim, sh_data_dim, basis_reso;
60 | const float _offset[3];
61 | const float _scaling[3];
62 | };
63 |
64 | struct PackedGridOutputGrads {
65 | PackedGridOutputGrads(GridOutputGrads& grads) :
66 | grad_density_out(grads.grad_density_out.defined() ? grads.grad_density_out.data_ptr() : nullptr),
67 | grad_sh_out(grads.grad_sh_out.defined() ? grads.grad_sh_out.data_ptr() : nullptr),
68 | grad_basis_out(grads.grad_basis_out.defined() ? grads.grad_basis_out.data_ptr() : nullptr),
69 | grad_background_out(grads.grad_background_out.defined() ? grads.grad_background_out.data_ptr() : nullptr),
70 | mask_out((grads.mask_out.defined() && grads.mask_out.size(0) > 0) ? grads.mask_out.data_ptr() : nullptr),
71 | mask_background_out((grads.mask_background_out.defined() && grads.mask_background_out.size(0) > 0) ? grads.mask_background_out.data_ptr() : nullptr)
72 | {}
73 | float* __restrict__ grad_density_out;
74 | float* __restrict__ grad_sh_out;
75 | float* __restrict__ grad_basis_out;
76 | float* __restrict__ grad_background_out;
77 |
78 | bool* __restrict__ mask_out;
79 | bool* __restrict__ mask_background_out;
80 | };
81 |
82 | struct PackedCameraSpec {
83 | PackedCameraSpec(CameraSpec& cam) :
84 | c2w(cam.c2w.packed_accessor32()),
85 | fx(cam.fx), fy(cam.fy),
86 | cx(cam.cx), cy(cam.cy),
87 | width(cam.width), height(cam.height),
88 | ndc_coeffx(cam.ndc_coeffx), ndc_coeffy(cam.ndc_coeffy) {}
89 | const torch::PackedTensorAccessor32
90 | c2w;
91 | float fx;
92 | float fy;
93 | float cx;
94 | float cy;
95 | int width;
96 | int height;
97 |
98 | float ndc_coeffx;
99 | float ndc_coeffy;
100 | };
101 |
102 | struct PackedRaysSpec {
103 | const torch::PackedTensorAccessor32 origins;
104 | const torch::PackedTensorAccessor32 dirs;
105 | PackedRaysSpec(RaysSpec& spec) :
106 | origins(spec.origins.packed_accessor32()),
107 | dirs(spec.dirs.packed_accessor32())
108 | { }
109 | };
110 |
111 | struct SingleRaySpec {
112 | SingleRaySpec() = default;
113 | __device__ SingleRaySpec(const float* __restrict__ origin, const float* __restrict__ dir)
114 | : origin{origin[0], origin[1], origin[2]},
115 | dir{dir[0], dir[1], dir[2]} {}
116 | __device__ void set(const float* __restrict__ origin, const float* __restrict__ dir) {
117 | #pragma unroll 3
118 | for (int i = 0; i < 3; ++i) {
119 | this->origin[i] = origin[i];
120 | this->dir[i] = dir[i];
121 | }
122 | }
123 |
124 | float origin[3];
125 | float dir[3];
126 | float tmin, tmax, world_step;
127 |
128 | float pos[3];
129 | int32_t l[3];
130 | RandomEngine32 rng;
131 | };
132 |
133 | } // namespace device
134 | } // namespace
135 |
--------------------------------------------------------------------------------
/lib/plenoxel/include/random_util.cuh:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Alex Yu
2 | #pragma once
3 | #include
4 | #include
5 |
6 | // A custom xorshift random generator
7 | // Maybe replace with some CUDA internal stuff?
8 | struct RandomEngine32 {
9 | uint32_t x, y, z;
10 |
11 | // Inclusive both
12 | __host__ __device__
13 | uint32_t randint(uint32_t lo, uint32_t hi) {
14 | if (hi <= lo) return lo;
15 | uint32_t z = (*this)();
16 | return z % (hi - lo + 1) + lo;
17 | }
18 |
19 | __host__ __device__
20 | void rand2(float* out1, float* out2) {
21 | const uint32_t z = (*this)();
22 | const uint32_t fmax = (1 << 16);
23 | const uint32_t z1 = z >> 16;
24 | const uint32_t z2 = z & (fmax - 1);
25 | const float ifmax = 1.f / fmax;
26 |
27 | *out1 = z1 * ifmax;
28 | *out2 = z2 * ifmax;
29 | }
30 |
31 | __host__ __device__
32 | float rand() {
33 | uint32_t z = (*this)();
34 | return float(z) / (1LL << 32);
35 | }
36 |
37 |
38 | __host__ __device__
39 | void randn2(float* out1, float* out2) {
40 | rand2(out1, out2);
41 | // Box-Muller transform
42 | const float srlog = sqrtf(-2 * logf(*out1 + 1e-32f));
43 | *out2 *= 2 * M_PI;
44 | *out1 = srlog * cosf(*out2);
45 | *out2 = srlog * sinf(*out2);
46 | }
47 |
48 | __host__ __device__
49 | float randn() {
50 | float x, y;
51 | rand2(&x, &y);
52 | // Box-Muller transform
53 | return sqrtf(-2 * logf(x + 1e-32f))* cosf(2 * M_PI * y);
54 | }
55 |
56 | __host__ __device__
57 | uint32_t operator()() {
58 | uint32_t t;
59 | x ^= x << 16;
60 | x ^= x >> 5;
61 | x ^= x << 1;
62 | t = x;
63 | x = y;
64 | y = z;
65 | z = t ^ x ^ y;
66 | return z;
67 | }
68 | };
69 |
--------------------------------------------------------------------------------
/lib/plenoxel/include/util.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | // Changed from x.type().is_cuda() due to deprecation
3 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
4 | #define CHECK_CPU(x) TORCH_CHECK(!x.is_cuda(), #x " must be a CPU tensor")
5 | #define CHECK_CONTIGUOUS(x) \
6 | TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
7 | #define CHECK_INPUT(x) \
8 | CHECK_CUDA(x); \
9 | CHECK_CONTIGUOUS(x)
10 | #define CHECK_CPU_INPUT(x) \
11 | CHECK_CPU(x); \
12 | CHECK_CONTIGUOUS(x)
13 |
14 | #if defined(__CUDACC__)
15 | // #define _EXP(x) expf(x) // SLOW EXP
16 | #define _EXP(x) __expf(x) // FAST EXP
17 | #define _SIGMOID(x) (1 / (1 + _EXP(-(x))))
18 |
19 | #else
20 |
21 | #define _EXP(x) expf(x)
22 | #define _SIGMOID(x) (1 / (1 + expf(-(x))))
23 | #endif
24 | #define _SQR(x) ((x) * (x))
25 |
--------------------------------------------------------------------------------
/lib/plenoxel/svox2.cpp:
--------------------------------------------------------------------------------
1 | // Copyright 2021 Alex Yu
2 |
3 | // This file contains only Python bindings
4 | #include "data_spec.hpp"
5 | #include
6 | #include
7 | #include
8 |
9 | using torch::Tensor;
10 |
11 | std::tuple sample_grid(SparseGridSpec &, Tensor,
12 | bool);
13 | void sample_grid_backward(SparseGridSpec &, Tensor, Tensor, Tensor, Tensor,
14 | Tensor, bool);
15 |
16 | // ** NeRF rendering formula (trilerp)
17 | Tensor volume_render_cuvol(SparseGridSpec &, RaysSpec &, RenderOptions &);
18 | // Tensor volume_render_cuvol_image(SparseGridSpec &, CameraSpec &,
19 | // RenderOptions &);
20 | void volume_render_cuvol_backward(SparseGridSpec &, RaysSpec &, RenderOptions &,
21 | Tensor, Tensor, GridOutputGrads &);
22 | void volume_render_cuvol_fused(
23 | SparseGridSpec &, RaysSpec &, RenderOptions &,
24 | Tensor, float, float, bool, bool, Tensor, Tensor, GridOutputGrads &
25 | );
26 | // Expected termination (depth) rendering
27 | torch::Tensor volume_render_expected_term(SparseGridSpec &, RaysSpec &,
28 | RenderOptions &);
29 | // Depth rendering based on sigma-threshold as in Dex-NeRF
30 | torch::Tensor volume_render_sigma_thresh(SparseGridSpec &, RaysSpec &,
31 | RenderOptions &, float);
32 |
33 | // ** NV rendering formula (trilerp)
34 | Tensor volume_render_nvol(SparseGridSpec &, RaysSpec &, RenderOptions &);
35 | void volume_render_nvol_backward(SparseGridSpec &, RaysSpec &, RenderOptions &,
36 | Tensor, Tensor, GridOutputGrads &);
37 | void volume_render_nvol_fused(SparseGridSpec &, RaysSpec &, RenderOptions &,
38 | Tensor, float, float, Tensor, GridOutputGrads &);
39 |
40 | // ** NeRF rendering formula (nearest-neighbor, infinitely many steps)
41 | Tensor volume_render_svox1(SparseGridSpec &, RaysSpec &, RenderOptions &);
42 | void volume_render_svox1_backward(SparseGridSpec &, RaysSpec &, RenderOptions &,
43 | Tensor, Tensor, GridOutputGrads &);
44 | void volume_render_svox1_fused(SparseGridSpec &, RaysSpec &, RenderOptions &,
45 | Tensor, float, float, Tensor, GridOutputGrads &);
46 |
47 | // Tensor volume_render_cuvol_image(SparseGridSpec &, CameraSpec &,
48 | // RenderOptions &);
49 | //
50 | // void volume_render_cuvol_image_backward(SparseGridSpec &, CameraSpec &,
51 | // RenderOptions &, Tensor, Tensor,
52 | // GridOutputGrads &);
53 |
54 | // Misc
55 | Tensor dilate(Tensor);
56 | void accel_dist_prop(Tensor);
57 | void grid_weight_render(Tensor, CameraSpec &, float, float, bool, Tensor,
58 | Tensor, Tensor);
59 | // void sample_cubemap(Tensor, Tensor, bool, Tensor);
60 |
61 | // Loss
62 | Tensor tv(Tensor, Tensor, int, int, bool, float, bool, float, float);
63 | void tv_grad(Tensor, Tensor, int, int, float, bool, float, bool, float, float,
64 | Tensor);
65 | void tv_grad_sparse(Tensor, Tensor, Tensor, Tensor, int, int, float, bool,
66 | float, bool, bool, float, float, Tensor);
67 | void msi_tv_grad_sparse(Tensor, Tensor, Tensor, Tensor, float, float, Tensor);
68 | void lumisphere_tv_grad_sparse(SparseGridSpec &, Tensor, Tensor, Tensor, float,
69 | float, float, float, GridOutputGrads &);
70 |
71 | // Optim
72 | void rmsprop_step(Tensor, Tensor, Tensor, Tensor, float, float, float, float,
73 | float);
74 | void sgd_step(Tensor, Tensor, Tensor, float, float);
75 |
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 | #define _REG_FUNC(funname) m.def(#funname, &funname)
78 | _REG_FUNC(sample_grid);
79 | _REG_FUNC(sample_grid_backward);
80 | _REG_FUNC(volume_render_cuvol);
81 | // _REG_FUNC(volume_render_cuvol_image);
82 | _REG_FUNC(volume_render_cuvol_backward);
83 | _REG_FUNC(volume_render_cuvol_fused);
84 | _REG_FUNC(volume_render_expected_term);
85 | _REG_FUNC(volume_render_sigma_thresh);
86 |
87 | _REG_FUNC(volume_render_nvol);
88 | _REG_FUNC(volume_render_nvol_backward);
89 | _REG_FUNC(volume_render_nvol_fused);
90 |
91 | _REG_FUNC(volume_render_svox1);
92 | _REG_FUNC(volume_render_svox1_backward);
93 | _REG_FUNC(volume_render_svox1_fused);
94 |
95 | // _REG_FUNC(volume_render_cuvol_image);
96 | // _REG_FUNC(volume_render_cuvol_image_backward);
97 |
98 | // Loss
99 | _REG_FUNC(tv);
100 | _REG_FUNC(tv_grad);
101 | _REG_FUNC(tv_grad_sparse);
102 | _REG_FUNC(msi_tv_grad_sparse);
103 | _REG_FUNC(lumisphere_tv_grad_sparse);
104 |
105 | // Misc
106 | _REG_FUNC(dilate);
107 | _REG_FUNC(accel_dist_prop);
108 | _REG_FUNC(grid_weight_render);
109 | // _REG_FUNC(sample_cubemap);
110 |
111 | // Optimizer
112 | _REG_FUNC(rmsprop_step);
113 | _REG_FUNC(sgd_step);
114 | #undef _REG_FUNC
115 |
116 | py::class_(m, "SparseGridSpec")
117 | .def(py::init<>())
118 | .def_readwrite("density_data", &SparseGridSpec::density_data)
119 | .def_readwrite("sh_data", &SparseGridSpec::sh_data)
120 | .def_readwrite("links", &SparseGridSpec::links)
121 | .def_readwrite("_offset", &SparseGridSpec::_offset)
122 | .def_readwrite("_scaling", &SparseGridSpec::_scaling)
123 | .def_readwrite("basis_dim", &SparseGridSpec::basis_dim)
124 | .def_readwrite("basis_type", &SparseGridSpec::basis_type)
125 | .def_readwrite("basis_data", &SparseGridSpec::basis_data)
126 | .def_readwrite("background_links", &SparseGridSpec::background_links)
127 | .def_readwrite("background_data", &SparseGridSpec::background_data);
128 |
129 | py::class_(m, "CameraSpec")
130 | .def(py::init<>())
131 | .def_readwrite("c2w", &CameraSpec::c2w)
132 | .def_readwrite("fx", &CameraSpec::fx)
133 | .def_readwrite("fy", &CameraSpec::fy)
134 | .def_readwrite("cx", &CameraSpec::cx)
135 | .def_readwrite("cy", &CameraSpec::cy)
136 | .def_readwrite("width", &CameraSpec::width)
137 | .def_readwrite("height", &CameraSpec::height)
138 | .def_readwrite("ndc_coeffx", &CameraSpec::ndc_coeffx)
139 | .def_readwrite("ndc_coeffy", &CameraSpec::ndc_coeffy);
140 |
141 | py::class_(m, "RaysSpec")
142 | .def(py::init<>())
143 | .def_readwrite("origins", &RaysSpec::origins)
144 | .def_readwrite("dirs", &RaysSpec::dirs);
145 |
146 | py::class_(m, "RenderOptions")
147 | .def(py::init<>())
148 | .def_readwrite("background_brightness",
149 | &RenderOptions::background_brightness)
150 | .def_readwrite("step_size", &RenderOptions::step_size)
151 | .def_readwrite("sigma_thresh", &RenderOptions::sigma_thresh)
152 | .def_readwrite("stop_thresh", &RenderOptions::stop_thresh)
153 | .def_readwrite("near_clip", &RenderOptions::near_clip)
154 | .def_readwrite("use_spheric_clip", &RenderOptions::use_spheric_clip)
155 | .def_readwrite("last_sample_opaque", &RenderOptions::last_sample_opaque)
156 | .def_readwrite("mask_transmit_threshold", &RenderOptions::mask_transmit_threshold);
157 |
158 | py::class_(m, "GridOutputGrads")
159 | .def(py::init<>())
160 | .def_readwrite("grad_density_out", &GridOutputGrads::grad_density_out)
161 | .def_readwrite("grad_sh_out", &GridOutputGrads::grad_sh_out)
162 | .def_readwrite("grad_basis_out", &GridOutputGrads::grad_basis_out)
163 | .def_readwrite("grad_background_out",
164 | &GridOutputGrads::grad_background_out)
165 | .def_readwrite("mask_out", &GridOutputGrads::mask_out)
166 | .def_readwrite("mask_background_out",
167 | &GridOutputGrads::mask_background_out);
168 | }
169 |
--------------------------------------------------------------------------------
/lib/plenoxel/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.0.1.dev0+sphtexcub.lincolor.fast'
2 |
--------------------------------------------------------------------------------
/model/interface.py:
--------------------------------------------------------------------------------
1 | import pytorch_lightning as pl
2 |
3 | import numpy as np
4 | import torch
5 | import json
6 |
7 | from piqa.ssim import SSIM
8 | from piqa.lpips import LPIPS
9 |
10 | reshape_2d = lambda x: x.reshape((x.shape[0], -1))
11 | clip_0_1 = lambda x: torch.clip(x, 0, 1).detach()
12 |
13 | class LitModel(pl.LightningModule):
14 |
15 | # Utils to reorganize output values from evaluation steps,
16 | # i.e., validation and test step.
17 | def alter_gather_cat(self, outputs, key, image_sizes):
18 | each = torch.cat([output[key] for output in outputs])
19 | all = self.all_gather(each).detach()
20 | if all.dim() == 3:
21 | all = all.permute((1, 0, 2)).flatten(0, 1)
22 | ret, curr = [], 0
23 | for (h, w) in image_sizes:
24 | ret.append(all[curr:curr+h*w].reshape(h, w, -1))
25 | curr += h * w
26 | return ret
27 |
28 | @torch.no_grad()
29 | def psnr_each(self, preds, gts):
30 | psnr_list = []
31 | for (pred, gt) in zip(preds, gts):
32 | pred = torch.clip(pred, 0, 1)
33 | gt = torch.clip(gt, 0, 1)
34 | mse = torch.mean((pred - gt) ** 2)
35 | psnr = -10.0 * torch.log(mse) / np.log(10)
36 | psnr_list.append(psnr)
37 | return torch.stack(psnr_list)
38 |
39 | @torch.no_grad()
40 | def ssim_each(self, preds, gts):
41 | ssim_model = SSIM().to(device=self.device)
42 | ssim_list = []
43 | for (pred, gt) in zip(preds, gts):
44 | pred = torch.clip(
45 | pred.permute((2, 0, 1)).unsqueeze(0).float(),
46 | 0, 1
47 | )
48 | gt = torch.clip(
49 | gt.permute((2, 0, 1)).unsqueeze(0).float(),
50 | 0, 1
51 | )
52 | ssim = ssim_model(pred, gt)
53 | ssim_list.append(ssim)
54 | del ssim_model
55 | return torch.stack(ssim_list)
56 |
57 | @torch.no_grad()
58 | def lpips_each(self, preds, gts):
59 | lpips_model = LPIPS(network="vgg").to(device=self.device)
60 | lpips_list = []
61 | for (pred, gt) in zip(preds, gts):
62 | pred = torch.clip(
63 | pred.permute((2, 0, 1)).unsqueeze(0).float(),
64 | 0, 1
65 | )
66 | gt = torch.clip(
67 | gt.permute((2, 0, 1)).unsqueeze(0).float(),
68 | 0, 1
69 | )
70 | lpips = lpips_model(pred, gt)
71 | lpips_list.append(lpips)
72 | del lpips_model
73 | return torch.stack(lpips_list)
74 |
75 | @torch.no_grad()
76 | def psnr(self, preds, gts, i_train, i_val, i_test):
77 | ret = {}
78 | ret["name"] = "PSNR"
79 | psnr_list = self.psnr_each(preds, gts)
80 | ret["mean"] = psnr_list.mean().item()
81 | if self.trainer.datamodule.eval_test_only:
82 | ret["test"] = psnr_list.mean().item()
83 | else:
84 | ret["train"] = psnr_list[i_train].mean().item()
85 | ret["val"] = psnr_list[i_val].mean().item()
86 | ret["test"] = psnr_list[i_test].mean().item()
87 |
88 | return ret
89 |
90 | @torch.no_grad()
91 | def ssim(self, preds, gts, i_train, i_val, i_test):
92 | ret = {}
93 | ret["name"] = "SSIM"
94 | ssim_list = self.ssim_each(preds, gts)
95 | ret["mean"] = ssim_list.mean().item()
96 | if self.trainer.datamodule.eval_test_only:
97 | ret["test"] = ssim_list.mean().item()
98 | else:
99 | ret["train"] = ssim_list[i_train].mean().item()
100 | ret["val"] = ssim_list[i_val].mean().item()
101 | ret["test"] = ssim_list[i_test].mean().item()
102 |
103 | return ret
104 |
105 | @torch.no_grad()
106 | def lpips(self, preds, gts, i_train, i_val, i_test):
107 | ret = {}
108 | ret["name"] = "LPIPS"
109 | lpips_list = self.lpips_each(preds, gts)
110 | ret["mean"] = lpips_list.mean().item()
111 | if self.trainer.datamodule.eval_test_only:
112 | ret["test"] = lpips_list.mean().item()
113 | else:
114 | ret["train"] = lpips_list[i_train].mean().item()
115 | ret["val"] = lpips_list[i_val].mean().item()
116 | ret["test"] = lpips_list[i_test].mean().item()
117 |
118 | return ret
119 |
120 | def write_stats(self, fpath, *stats):
121 |
122 | d = {}
123 | for stat in stats:
124 | d[stat["name"]] = {k : float(w) for (k, w) in stat.items() if k != "name" and k != "scene_wise"}
125 |
126 | with open(fpath, 'w') as fp:
127 | json.dump(d, fp, indent=4, sort_keys=True)
128 |
--------------------------------------------------------------------------------
/model/plenoxel_torch/__global__.py:
--------------------------------------------------------------------------------
1 | BASIS_TYPE_SH = 1
2 | BASIS_TYPE_3D_TEXTURE = 4
3 | BASIS_TYPE_MLP = 255
4 |
5 |
6 | def _get_c_extension():
7 | from warnings import warn
8 |
9 | try:
10 | import lib.plenoxel as _C
11 |
12 | if not hasattr(_C, "sample_grid"):
13 | _C = None
14 | except:
15 | _C = None
16 |
17 | if _C is None:
18 | warn(
19 | "CUDA extension svox2.csrc could not be loaded! "
20 | + "Operations will be slow.\n"
21 | + "Please do not import svox in the svox2 source directory."
22 | )
23 | return _C
24 |
--------------------------------------------------------------------------------
/model/plenoxel_torch/autograd.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.autograd as autograd
3 | import model.plenoxel_torch.utils as utils
4 |
5 | from typing import Tuple
6 |
7 | from model.plenoxel_torch.__global__ import (
8 | BASIS_TYPE_SH,
9 | _get_c_extension,
10 | BASIS_TYPE_3D_TEXTURE,
11 | BASIS_TYPE_MLP,
12 | )
13 |
14 | _C = _get_c_extension()
15 |
16 | # BEGIN Differentiable CUDA functions with custom gradient
17 | class _SampleGridAutogradFunction(autograd.Function):
18 | @staticmethod
19 | def forward(
20 | ctx,
21 | data_density: torch.Tensor,
22 | data_sh: torch.Tensor,
23 | grid,
24 | points: torch.Tensor,
25 | want_colors: bool,
26 | ):
27 | assert not points.requires_grad, "Point gradient not supported"
28 | out_density, out_sh = _C.sample_grid(grid, points, want_colors)
29 | ctx.save_for_backward(points)
30 | ctx.grid = grid
31 | ctx.want_colors = want_colors
32 | return out_density, out_sh
33 |
34 | @staticmethod
35 | def backward(ctx, grad_out_density, grad_out_sh):
36 | (points,) = ctx.saved_tensors
37 | grad_density_grid = torch.zeros_like(ctx.grid.density_data.data)
38 | grad_sh_grid = torch.zeros_like(ctx.grid.sh_data.data)
39 | _C.sample_grid_backward(
40 | ctx.grid,
41 | points,
42 | grad_out_density.contiguous(),
43 | grad_out_sh.contiguous(),
44 | grad_density_grid,
45 | grad_sh_grid,
46 | ctx.want_colors,
47 | )
48 | if not ctx.needs_input_grad[0]:
49 | grad_density_grid = None
50 | if not ctx.needs_input_grad[1]:
51 | grad_sh_grid = None
52 |
53 | return grad_density_grid, grad_sh_grid, None, None, None
54 |
55 |
56 | class _VolumeRenderFunction(autograd.Function):
57 | @staticmethod
58 | def forward(
59 | ctx,
60 | data_density: torch.Tensor,
61 | data_sh: torch.Tensor,
62 | data_basis: torch.Tensor,
63 | data_background: torch.Tensor,
64 | grid,
65 | rays,
66 | opt,
67 | backend: str,
68 | ):
69 | cu_fn = _C.__dict__[f"volume_render_{backend}"]
70 | color, _ = cu_fn(
71 | grid, rays, opt
72 | )
73 | ctx.save_for_backward(color)
74 | ctx.grid = grid
75 | ctx.rays = rays
76 | ctx.opt = opt
77 | ctx.backend = backend
78 | ctx.basis_data = data_basis
79 | return color
80 |
81 | @staticmethod
82 | def backward(ctx, grad_out):
83 | (color_cache,) = ctx.saved_tensors
84 | cu_fn = _C.__dict__[f"volume_render_{ctx.backend}_backward"]
85 | grad_density_grid = torch.zeros_like(ctx.grid.density_data.data)
86 | grad_sh_grid = torch.zeros_like(ctx.grid.sh_data.data)
87 | if ctx.grid.basis_type == BASIS_TYPE_MLP:
88 | grad_basis = torch.zeros_like(ctx.basis_data)
89 | elif ctx.grid.basis_type == BASIS_TYPE_3D_TEXTURE:
90 | grad_basis = torch.zeros_like(ctx.grid.basis_data.data)
91 | if ctx.grid.background_data is not None:
92 | grad_background = torch.zeros_like(ctx.grid.background_data.data)
93 | grad_holder = _C.GridOutputGrads()
94 | grad_holder.grad_density_out = grad_density_grid
95 | grad_holder.grad_sh_out = grad_sh_grid
96 | if ctx.needs_input_grad[2]:
97 | grad_holder.grad_basis_out = grad_basis
98 | if ctx.grid.background_data is not None and ctx.needs_input_grad[3]:
99 | grad_holder.grad_background_out = grad_background
100 | cu_fn(
101 | ctx.grid, ctx.rays, ctx.opt, grad_out.contiguous(), color_cache, grad_holder
102 | )
103 | ctx.grid = ctx.rays = ctx.opt = None
104 | if not ctx.needs_input_grad[0]:
105 | grad_density_grid = None
106 | if not ctx.needs_input_grad[1]:
107 | grad_sh_grid = None
108 | if not ctx.needs_input_grad[2]:
109 | grad_basis = None
110 | if not ctx.needs_input_grad[3]:
111 | grad_background = None
112 | ctx.basis_data = None
113 |
114 | return (
115 | grad_density_grid,
116 | grad_sh_grid,
117 | grad_basis,
118 | grad_background,
119 | None,
120 | None,
121 | None,
122 | None,
123 | )
124 |
125 |
126 | class _TotalVariationFunction(autograd.Function):
127 | @staticmethod
128 | def forward(
129 | ctx,
130 | data: torch.Tensor,
131 | links: torch.Tensor,
132 | start_dim: int,
133 | end_dim: int,
134 | use_logalpha: bool,
135 | logalpha_delta: float,
136 | ignore_edge: bool,
137 | ndc_coeffs: Tuple[float, float],
138 | ):
139 | tv = _C.tv(
140 | links,
141 | data,
142 | start_dim,
143 | end_dim,
144 | use_logalpha,
145 | logalpha_delta,
146 | ignore_edge,
147 | ndc_coeffs[0],
148 | ndc_coeffs[1],
149 | )
150 | ctx.save_for_backward(links, data)
151 | ctx.start_dim = start_dim
152 | ctx.end_dim = end_dim
153 | ctx.use_logalpha = use_logalpha
154 | ctx.logalpha_delta = logalpha_delta
155 | ctx.ignore_edge = ignore_edge
156 | ctx.ndc_coeffs = ndc_coeffs
157 | return tv
158 |
159 | @staticmethod
160 | def backward(ctx, grad_out):
161 | links, data = ctx.saved_tensors
162 | grad_grid = torch.zeros_like(data)
163 | _C.tv_grad(
164 | links,
165 | data,
166 | ctx.start_dim,
167 | ctx.end_dim,
168 | 1.0,
169 | ctx.use_logalpha,
170 | ctx.logalpha_delta,
171 | ctx.ignore_edge,
172 | ctx.ndc_coeffs[0],
173 | ctx.ndc_coeffs[1],
174 | grad_grid,
175 | )
176 | ctx.start_dim = ctx.end_dim = None
177 | if not ctx.needs_input_grad[0]:
178 | grad_grid = None
179 | return grad_grid, None, None, None, None, None, None, None
180 |
--------------------------------------------------------------------------------
/model/plenoxel_torch/dataclass.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from random import random
3 | from typing import List, Optional, Tuple, Union
4 |
5 | import torch
6 |
7 | import model.plenoxel_torch.utils as utils
8 | from model.plenoxel_torch.__global__ import _get_c_extension
9 |
10 | _C = _get_c_extension()
11 |
12 |
13 | @dataclass
14 | class RenderOptions:
15 | """
16 | Rendering options, see comments
17 | available:
18 | :param backend: str, renderer backend
19 | :param background_brightness: float
20 | :param step_size: float, step size for rendering
21 | :param sigma_thresh: float
22 | :param stop_thresh: float
23 | """
24 |
25 | def __init__(
26 | self,
27 | backend: str = "cuvol",
28 | background_brightness: float = 1.0,
29 | step_size: float = 0.5,
30 | sigma_thresh: float = 1e-10,
31 | stop_thresh: float = 1e-7,
32 | last_sample_opaque: bool = False,
33 | near_clip: float = 0.0,
34 | use_spheric_clip: bool = False,
35 | mask_transmit_threshold = 0.9,
36 | ):
37 | self.backend = backend
38 | self.background_brightness = background_brightness
39 | self.step_size = step_size
40 | self.sigma_thresh = sigma_thresh
41 | self.stop_thresh = stop_thresh
42 | self.last_sample_opaque = last_sample_opaque
43 | self.near_clip = near_clip
44 | self.use_spheric_clip = use_spheric_clip
45 | self.mask_transmit_threshold = mask_transmit_threshold
46 |
47 | def _to_cpp(
48 | self, randomize: bool = False
49 | ):
50 | """
51 | Generate object to pass to C++
52 | """
53 | opt = _C.RenderOptions()
54 | opt.background_brightness = self.background_brightness
55 | opt.step_size = self.step_size
56 | opt.sigma_thresh = self.sigma_thresh
57 | opt.stop_thresh = self.stop_thresh
58 | opt.near_clip = self.near_clip
59 | opt.use_spheric_clip = self.use_spheric_clip
60 | opt.last_sample_opaque = self.last_sample_opaque
61 | opt.mask_transmit_threshold = self.mask_transmit_threshold
62 |
63 | return opt
64 |
65 |
66 | @dataclass
67 | class Rays:
68 | origins: torch.Tensor
69 | dirs: torch.Tensor
70 |
71 | def _to_cpp(self):
72 | """
73 | Generate object to pass to C++
74 | """
75 | spec = _C.RaysSpec()
76 | spec.origins = self.origins
77 | spec.dirs = self.dirs
78 | return spec
79 |
80 | def __getitem__(self, key):
81 | return Rays(self.origins[key], self.dirs[key])
82 |
83 | @property
84 | def is_cuda(self) -> bool:
85 | return self.origins.is_cuda and self.dirs.is_cuda
86 |
87 |
88 | @dataclass
89 | class Camera:
90 | c2w: torch.Tensor # OpenCV
91 | fx: float = 1111.11
92 | fy: Optional[float] = None
93 | cx: Optional[float] = None
94 | cy: Optional[float] = None
95 | width: int = 800
96 | height: int = 800
97 |
98 | ndc_coeffs: Union[Tuple[float, float], List[float]] = (-1.0, -1.0)
99 |
100 | @property
101 | def fx_val(self):
102 | return self.fx
103 |
104 | @property
105 | def fy_val(self):
106 | return self.fx if self.fy is None else self.fy
107 |
108 | @property
109 | def cx_val(self):
110 | return self.width * 0.5 if self.cx is None else self.cx
111 |
112 | @property
113 | def cy_val(self):
114 | return self.height * 0.5 if self.cy is None else self.cy
115 |
116 | @property
117 | def using_ndc(self):
118 | return self.ndc_coeffs[0] > 0.0
119 |
120 | def _to_cpp(self):
121 | """
122 | Generate object to pass to C++
123 | """
124 | spec = _C.CameraSpec()
125 | spec.c2w = self.c2w.float()
126 | spec.fx = float(self.fx_val)
127 | spec.fy = float(self.fy_val)
128 | spec.cx = float(self.cx_val)
129 | spec.cy = float(self.cy_val)
130 | spec.width = int(self.width)
131 | spec.height = int(self.height)
132 | spec.ndc_coeffx = float(self.ndc_coeffs[0])
133 | spec.ndc_coeffy = float(self.ndc_coeffs[1])
134 | return spec
135 |
136 | @property
137 | def is_cuda(self) -> bool:
138 | return self.c2w.is_cuda
139 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import os
4 | import shutil
5 | from typing import *
6 |
7 | import gin
8 | import pytorch_lightning.loggers as pl_loggers
9 | import torch
10 | from pytorch_lightning import Trainer, seed_everything
11 | from pytorch_lightning.callbacks import (
12 | LearningRateMonitor,
13 | ModelCheckpoint,
14 | TQDMProgressBar,
15 | )
16 | from pytorch_lightning.plugins import DDPPlugin
17 |
18 | from utils.logger import RetryingWandbLogger
19 | from utils.select_option import select_callback, select_dataset, select_model
20 |
21 |
22 | def str2bool(v):
23 | if isinstance(v, bool):
24 | return v
25 | if v.lower() in ("yes", "true", "t", "y", "1"):
26 | return True
27 | elif v.lower() in ("no", "false", "f", "n", "0"):
28 | return False
29 | else:
30 | raise Exception("Boolean value expected.")
31 |
32 |
33 | @gin.configurable()
34 | def run(
35 | resume_training: bool = False,
36 | ckpt_path: Optional[str] = None,
37 | datadir: Optional[str] = None,
38 | logbase: Optional[str] = None,
39 | scene_name: Optional[str] = None,
40 | model_name: Optional[str] = None,
41 | proj_name: Optional[str] = None,
42 | dataset_name: Optional[str] = None,
43 | postfix: Optional[str] = None,
44 | entity: Optional[str] = None,
45 | # Optimization
46 | max_steps: int = 200000,
47 | precision: int = 32,
48 | # Logging
49 | log_every_n_steps: int = 1000,
50 | progressbar_refresh_rate: int = 5,
51 | # Run Mode
52 | run_train: bool = True,
53 | run_eval: bool = True,
54 | run_render: bool = False,
55 | accelerator: str = "gpu",
56 | num_gpus: Optional[int] = 1,
57 | num_tpus: Optional[int] = None,
58 | num_sanity_val_steps: int = 0,
59 | seed: int = 777,
60 | debug: bool = False,
61 | save_last_only: bool = False,
62 | check_val_every_n_epoch: int = 1,
63 | ):
64 |
65 | logging.getLogger("lightning").setLevel(logging.ERROR)
66 | datadir = datadir.rstrip("/")
67 |
68 | if scene_name is None and dataset_name == "co3d":
69 | scene_name = "349_36520_66801"
70 |
71 | if scene_name is None and dataset_name == "scannet":
72 | scene_name = "scene0000_00"
73 |
74 | exp_name = model_name + "_" + dataset_name + "_" + scene_name
75 | if postfix is not None:
76 | exp_name += "_" + str(postfix)
77 | if debug:
78 | exp_name += "_debug"
79 |
80 | if num_gpus is None:
81 | num_gpus = torch.cuda.device_count()
82 |
83 | os.makedirs(logbase, exist_ok=True)
84 | logdir = os.path.join(logbase, exp_name)
85 |
86 | os.makedirs(logdir, exist_ok=True)
87 |
88 | # WANDB fails when using TPUs
89 | wandb_logger = (
90 | RetryingWandbLogger(
91 | name=exp_name,
92 | entity=entity,
93 | project=model_name if proj_name is None else proj_name,
94 | )
95 | if accelerator == "gpu"
96 | else pl_loggers.TensorBoardLogger(save_dir=logdir, name=exp_name)
97 | )
98 |
99 | seed_everything(seed, workers=True)
100 |
101 | lr_monitor = LearningRateMonitor(logging_interval="step")
102 | model_checkpoint = ModelCheckpoint(
103 | monitor="val/psnr",
104 | dirpath=logdir,
105 | filename="best",
106 | save_top_k=1,
107 | mode="max",
108 | save_last=save_last_only,
109 | )
110 | tqdm_progrss = TQDMProgressBar(refresh_rate=progressbar_refresh_rate)
111 |
112 | callbacks = [lr_monitor, model_checkpoint, tqdm_progrss]
113 | callbacks += select_callback(model_name)
114 |
115 | trainer = Trainer(
116 | logger=wandb_logger if run_train or run_render else None,
117 | log_every_n_steps=log_every_n_steps,
118 | devices=num_gpus,
119 | max_steps=max_steps,
120 | replace_sampler_ddp=False,
121 | check_val_every_n_epoch=check_val_every_n_epoch,
122 | precision=precision,
123 | accelerator="gpu",
124 | num_sanity_val_steps=num_sanity_val_steps,
125 | callbacks=callbacks,
126 | )
127 |
128 | if resume_training:
129 | if ckpt_path is None:
130 | ckpt_path = f"{logdir}/last.ckpt"
131 |
132 | data_module = select_dataset(
133 | dataset_name=dataset_name,
134 | scene_name=scene_name,
135 | datadir=datadir,
136 | accelerator="gpu",
137 | num_gpus=num_gpus,
138 | num_tpus=num_tpus,
139 | )
140 | model = select_model(model_name=model_name)
141 | model.logdir = logdir
142 | if run_train:
143 | trainer.fit(model, data_module, ckpt_path=ckpt_path)
144 | if save_last_only:
145 | best_ckpt = os.path.join(logdir, "best.ckpt")
146 | if os.path.exists(best_ckpt):
147 | os.remove(best_ckpt)
148 | ckpt_path = f"{logdir}/best.ckpt" if not save_last_only else f"{logdir}/last.ckpt"
149 | if run_eval:
150 | trainer.test(model, data_module, ckpt_path=ckpt_path)
151 |
152 | if run_render:
153 | trainer.predict(model, data_module, ckpt_path=ckpt_path)
154 |
155 |
156 | if __name__ == "__main__":
157 | parser = argparse.ArgumentParser()
158 | parser.add_argument(
159 | "--ginc",
160 | action="append",
161 | help="gin config file",
162 | )
163 | parser.add_argument(
164 | "--ginb",
165 | action="append",
166 | help="gin bindings",
167 | )
168 | parser.add_argument(
169 | "--resume_training",
170 | type=str2bool,
171 | nargs="?",
172 | const=True,
173 | default=False,
174 | help="gin bindings",
175 | )
176 | parser.add_argument(
177 | "--ckpt_path",
178 | type=str,
179 | default=None,
180 | help="path to checkpoints",
181 | )
182 | parser.add_argument(
183 | "--scene_name",
184 | type=str,
185 | default=None,
186 | help="scene name",
187 | )
188 | parser.add_argument(
189 | "--entity",
190 | type=str,
191 | default=None,
192 | help="entity",
193 | )
194 | args = parser.parse_args()
195 |
196 | ginbs = []
197 | if args.ginb:
198 | ginbs.extend(args.ginb)
199 | logging.info(f"Gin configuration files: {args.ginc}")
200 | logging.info(f"Gin bindings: {ginbs}")
201 |
202 | gin.parse_config_files_and_bindings(args.ginc, ginbs)
203 | run(
204 | resume_training=args.resume_training,
205 | ckpt_path=args.ckpt_path,
206 | scene_name=args.scene_name,
207 | entity=args.entity,
208 | )
209 |
--------------------------------------------------------------------------------
/sbatch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | #SBATCH -J icn # Job name
4 | #SBATCH -o sbatch_log/pytorch-1gpu.%j.out # Name of stdout output file (%j expands to %jobId)
5 | #SBATCH -p A100 # queue name or partiton name titanxp/titanrtx/2080ti
6 | #SBATCH -t 3-00:00:00 # Run time (hh:mm:ss) - 1.5 hours
7 | #SBATCH --gres=gpu:1 # number of gpus you want to use
8 |
9 | #SBATCH --nodes=1
10 | ##SBATCH --exclude=n13
11 | ##SBTACH --nodelist=n12
12 |
13 | ##SBTACH --ntasks=1
14 | ##SBATCH --tasks-per-node=1
15 | ##SBATCH --cpus-per-task=1
16 |
17 | cd $SLURM_SUBMIT_DIR
18 |
19 | echo "SLURM_SUBMIT_DIR=$SLURM_SUBMIT_DIR"
20 | echo "CUDA_HOME=$CUDA_HOME"
21 | echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
22 | echo "CUDA_VERSION=$CUDA_VERSION"
23 |
24 | srun -l /bin/hostname
25 | srun -l /bin/pwd
26 | srun -l /bin/date
27 |
28 | module purge
29 |
30 | echo "Start"
31 | export NCCL_NSOCKS_PERTHREAD=4
32 | export NCCL_SOCKET_NTHREADS=2
33 | export WANDB_SPAWN_METHOD=fork
34 |
35 |
36 | nvidia-smi
37 | date
38 | squeue --job $SLURM_JOBID
39 |
40 | echo "##### END #####"
--------------------------------------------------------------------------------
/script/collage.sh:
--------------------------------------------------------------------------------
1 | python3 -m run --ginc configs/co3d.gin --scene_name 290_30780_59102 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
2 | python3 -m run --ginc configs/co3d.gin --scene_name 349_36520_66801 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
3 | python3 -m run --ginc configs/co3d.gin --scene_name 12_109_707 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
4 | python3 -m run --ginc configs/co3d.gin --scene_name 423_58951_114401 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
5 | python3 -m run --ginc configs/co3d.gin --scene_name 412_56309_109304 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
6 | python3 -m run --ginc configs/co3d.gin --scene_name 236_24789_51101 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
7 | python3 -m run --ginc configs/co3d.gin --scene_name 373_41487_82902 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
8 | python3 -m run --ginc configs/co3d.gin --scene_name 386_46018_91908 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
9 | python3 -m run --ginc configs/co3d.gin --scene_name 374_41919_83809 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
10 | python3 -m run --ginc configs/co3d.gin --scene_name 402_52411_102607 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
11 | python3 -m run --ginc configs/co3d.gin --scene_name 197_21206_41908 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
12 | python3 -m run --ginc configs/co3d.gin --scene_name 47_2694_7604 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
13 | python3 -m run --ginc configs/co3d.gin --scene_name 430_60724_119000 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
14 | python3 -m run --ginc configs/co3d.gin --scene_name 399_51048_100100 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
15 | python3 -m run --ginc configs/co3d.gin --scene_name 396_49638_97805 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
16 | python3 -m run --ginc configs/co3d.gin --scene_name 399_51079_100203 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
17 | python3 -m run --ginc configs/co3d.gin --scene_name 414_56915_110208 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
18 | python3 -m run --ginc configs/co3d.gin --scene_name 395_49190_97202 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
19 | python3 -m run --ginc configs/co3d.gin --scene_name 163_17880_33208 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
20 | python3 -m run --ginc configs/co3d.gin --scene_name 397_50080_98510 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
21 | python3 -m run --ginc configs/co3d.gin --scene_name 396_49564_97706 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
22 | python3 -m run --ginc configs/co3d.gin --scene_name 416_57463_111200 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
23 | python3 -m run --ginc configs/co3d.gin --scene_name 386_46212_92202 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
24 | python3 -m run --ginc configs/co3d.gin --scene_name 117_13756_28310 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
25 | python3 -m run --ginc configs/co3d.gin --scene_name 374_41996_84006 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
26 | python3 -m run --ginc configs/co3d.gin --scene_name 411_56010_108204 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
27 | python3 -m run --ginc configs/co3d.gin --scene_name 391_46910_93404 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
28 | python3 -m run --ginc configs/co3d.gin --scene_name 399_50932_99902 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
29 | python3 -m run --ginc configs/co3d.gin --scene_name 424_59119_114602 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
30 | python3 -m run --ginc configs/co3d.gin --scene_name 58_3355_10203 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
31 | python3 -m run --ginc configs/co3d.gin --scene_name 153_16983_31803 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
32 | python3 -m run --ginc configs/co3d.gin --scene_name 262_28099_53708 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
33 | python3 -m run --ginc configs/co3d.gin --scene_name 378_44212_88104 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
34 | python3 -m run --ginc configs/co3d.gin --scene_name 395_49194_97208 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
35 | python3 -m run --ginc configs/co3d.gin --scene_name 385_45782_91506 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
36 | python3 -m run --ginc configs/co3d.gin --scene_name 386_45955_91804 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
37 | python3 -m run --ginc configs/co3d.gin --scene_name 380_44942_90000 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
38 | python3 -m run --ginc configs/co3d.gin --scene_name 31_1375_4208 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
39 | python3 -m run --ginc configs/co3d.gin --scene_name 216_22866_49900 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
40 | python3 -m run --ginc configs/co3d.gin --scene_name 385_45373_90905 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
41 | python3 -m run --ginc configs/co3d.gin --scene_name 372_41093_82000 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
42 | python3 -m run --ginc configs/co3d.gin --scene_name 70_5758_13307 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
43 | python3 -m run --ginc configs/co3d.gin --scene_name 40_1899_5810 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
44 | python3 -m run --ginc configs/co3d.gin --scene_name 366_39368_76707 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
45 | python3 -m run --ginc configs/co3d.gin --scene_name 397_49988_98402 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
46 | python3 -m run --ginc configs/co3d.gin --scene_name 106_12689_26708 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
47 | python3 -m run --ginc configs/co3d.gin --scene_name 396_49566_97710 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
48 | python3 -m run --ginc configs/co3d.gin --scene_name 351_37059_67904 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
49 | python3 -m run --ginc configs/co3d.gin --scene_name 165_18081_34406 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
50 | python3 -m run --ginc configs/co3d.gin --scene_name 28_967_2810 --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True
51 |
--------------------------------------------------------------------------------
/script/v1.sh:
--------------------------------------------------------------------------------
1 | python3 -m run --ginc configs/co3d.gin --scene_name 131_15149_29891 --ginb run.postfix=\"v1\"
2 | python3 -m run --ginc configs/co3d.gin --scene_name 113_13363_23419 --ginb run.postfix=\"v1\"
3 | python3 -m run --ginc configs/co3d.gin --scene_name 255_27516_55384 --ginb run.postfix=\"v1\"
4 | python3 -m run --ginc configs/co3d.gin --scene_name 28_991_2996 --ginb run.postfix=\"v1\"
5 | python3 -m run --ginc configs/co3d.gin --scene_name 349_36520_66801 --ginb run.postfix=\"v1\"
--------------------------------------------------------------------------------
/script/v2.sh:
--------------------------------------------------------------------------------
1 |
2 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 131_15149_29891
3 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 113_13363_23419
4 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 255_27516_5538
5 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 28_991_2996
6 | python3 -m run --ginc configs/co3d_v2.gin --scene_name 349_36520_66801
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | import os
3 | import os.path as osp
4 | import warnings
5 |
6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
7 |
8 | ROOT_DIR = osp.dirname(osp.abspath(__file__))
9 |
10 | __version__ = None
11 | exec(open('lib/plenoxel/version.py', 'r').read())
12 |
13 | CUDA_FLAGS = []
14 | INSTALL_REQUIREMENTS = []
15 | include_dirs = [osp.join(ROOT_DIR, "lib", "plenoxel", "include")]
16 |
17 | # From PyTorch3D
18 | cub_home = os.environ.get("CUB_HOME", None)
19 | if cub_home is None:
20 | prefix = os.environ.get("CONDA_PREFIX", None)
21 | if prefix is not None and os.path.isdir(prefix + "/include/cub"):
22 | cub_home = prefix + "/include"
23 |
24 | if cub_home is None:
25 | warnings.warn(
26 | "The environment variable `CUB_HOME` was not found."
27 | "Installation will fail if your system CUDA toolkit version is less than 11."
28 | "NVIDIA CUB can be downloaded "
29 | "from `https://github.com/NVIDIA/cub/releases`. You can unpack "
30 | "it to a location of your choice and set the environment variable "
31 | "`CUB_HOME` to the folder containing the `CMakeListst.txt` file."
32 | )
33 | else:
34 | include_dirs.append(os.path.realpath(cub_home).replace("\\ ", " "))
35 |
36 | try:
37 | ext_modules = [
38 | CUDAExtension('lib.plenoxel', [
39 | 'lib/plenoxel/svox2.cpp',
40 | 'lib/plenoxel/svox2_kernel.cu',
41 | 'lib/plenoxel/render_lerp_kernel_cuvol.cu',
42 | 'lib/plenoxel/render_lerp_kernel_nvol.cu',
43 | 'lib/plenoxel/render_svox1_kernel.cu',
44 | 'lib/plenoxel/misc_kernel.cu',
45 | 'lib/plenoxel/loss_kernel.cu',
46 | 'lib/plenoxel/optim_kernel.cu',
47 | ], include_dirs=include_dirs,
48 | optional=False),
49 | ]
50 | except:
51 | import warnings
52 | warnings.warn("Failed to build CUDA extension")
53 | ext_modules = []
54 |
55 | setup(
56 | name='plenoxel',
57 | version=__version__,
58 | author='Alex Yu',
59 | author_email='alexyu99126@gmail.com',
60 | description='PyTorch sparse voxel volume extension, including custom CUDA kernels',
61 | long_description='PyTorch sparse voxel volume extension, including custom CUDA kernels',
62 | ext_modules=ext_modules,
63 | setup_requires=['pybind11>=2.5.0'],
64 | packages=['lib.plenoxel'],
65 | cmdclass={'build_ext': BuildExtension},
66 | zip_safe=False,
67 | )
68 |
--------------------------------------------------------------------------------
/utils/SensorData.py:
--------------------------------------------------------------------------------
1 | import os
2 | import struct
3 | import zlib
4 |
5 | import cv2
6 | import imageio
7 | import numpy as np
8 | import png
9 |
10 | COMPRESSION_TYPE_COLOR = {-1: "unknown", 0: "raw", 1: "png", 2: "jpeg"}
11 | COMPRESSION_TYPE_DEPTH = {
12 | -1: "unknown",
13 | 0: "raw_ushort",
14 | 1: "zlib_ushort",
15 | 2: "occi_ushort",
16 | }
17 |
18 |
19 | class RGBDFrame:
20 | def load(self, file_handle):
21 | self.camera_to_world = np.asarray(
22 | struct.unpack("f" * 16, file_handle.read(16 * 4)), dtype=np.float32
23 | ).reshape(4, 4)
24 | self.timestamp_color = struct.unpack("Q", file_handle.read(8))[0]
25 | self.timestamp_depth = struct.unpack("Q", file_handle.read(8))[0]
26 | self.color_size_bytes = struct.unpack("Q", file_handle.read(8))[0]
27 | self.depth_size_bytes = struct.unpack("Q", file_handle.read(8))[0]
28 | self.color_data = b"".join(
29 | struct.unpack(
30 | "c" * self.color_size_bytes, file_handle.read(self.color_size_bytes)
31 | )
32 | )
33 | self.depth_data = b"".join(
34 | struct.unpack(
35 | "c" * self.depth_size_bytes, file_handle.read(self.depth_size_bytes)
36 | )
37 | )
38 |
39 | def decompress_depth(self, compression_type):
40 | if compression_type == "zlib_ushort":
41 | return self.decompress_depth_zlib()
42 | else:
43 | raise
44 |
45 | def decompress_depth_zlib(self):
46 | return zlib.decompress(self.depth_data)
47 |
48 | def decompress_color(self, compression_type):
49 | if compression_type == "jpeg":
50 | return self.decompress_color_jpeg()
51 | else:
52 | raise
53 |
54 | def decompress_color_jpeg(self):
55 | return imageio.imread(self.color_data)
56 |
57 |
58 | class SensorData:
59 | def __init__(self, filename):
60 | self.version = 4
61 | self.load(filename)
62 |
63 | def load(self, filename):
64 | with open(filename, "rb") as f:
65 | version = struct.unpack("I", f.read(4))[0]
66 | assert self.version == version
67 | strlen = struct.unpack("Q", f.read(8))[0]
68 | self.sensor_name = b"".join(struct.unpack("c" * strlen, f.read(strlen)))
69 | self.intrinsic_color = np.asarray(
70 | struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32
71 | ).reshape(4, 4)
72 | self.extrinsic_color = np.asarray(
73 | struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32
74 | ).reshape(4, 4)
75 | self.intrinsic_depth = np.asarray(
76 | struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32
77 | ).reshape(4, 4)
78 | self.extrinsic_depth = np.asarray(
79 | struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32
80 | ).reshape(4, 4)
81 | self.color_compression_type = COMPRESSION_TYPE_COLOR[
82 | struct.unpack("i", f.read(4))[0]
83 | ]
84 | self.depth_compression_type = COMPRESSION_TYPE_DEPTH[
85 | struct.unpack("i", f.read(4))[0]
86 | ]
87 | self.color_width = struct.unpack("I", f.read(4))[0]
88 | self.color_height = struct.unpack("I", f.read(4))[0]
89 | self.depth_width = struct.unpack("I", f.read(4))[0]
90 | self.depth_height = struct.unpack("I", f.read(4))[0]
91 | self.depth_shift = struct.unpack("f", f.read(4))[0]
92 | num_frames = struct.unpack("Q", f.read(8))[0]
93 | self.frames = []
94 | for i in range(num_frames):
95 | frame = RGBDFrame()
96 | frame.load(f)
97 | self.frames.append(frame)
98 |
99 | def export_depth_images(self, output_path, image_size=None, frame_skip=1):
100 | if not os.path.exists(output_path):
101 | os.makedirs(output_path)
102 | print(
103 | "exporting", len(self.frames) // frame_skip, " depth frames to", output_path
104 | )
105 | for f in range(0, len(self.frames), frame_skip):
106 | depth_data = self.frames[f].decompress_depth(self.depth_compression_type)
107 | depth = np.fromstring(depth_data, dtype=np.uint16).reshape(
108 | self.depth_height, self.depth_width
109 | )
110 | if image_size is not None:
111 | depth = cv2.resize(
112 | depth,
113 | (image_size[1], image_size[0]),
114 | interpolation=cv2.INTER_NEAREST,
115 | )
116 | # imageio.imwrite(os.path.join(output_path, str(f) + '.png'), depth)
117 | with open(
118 | os.path.join(output_path, f"{f:04d}" + ".png"), "wb"
119 | ) as f: # write 16-bit
120 | writer = png.Writer(
121 | width=depth.shape[1], height=depth.shape[0], bitdepth=16
122 | )
123 | depth = depth.reshape(-1, depth.shape[1]).tolist()
124 | writer.write(f, depth)
125 |
126 | def export_color_images(self, output_path, image_size=None, frame_skip=1):
127 | if not os.path.exists(output_path):
128 | os.makedirs(output_path)
129 | print(
130 | "exporting", len(self.frames) // frame_skip, "color frames to", output_path
131 | )
132 | for f in range(0, len(self.frames), frame_skip):
133 | color = self.frames[f].decompress_color(self.color_compression_type)
134 | if image_size is not None:
135 | color = cv2.resize(
136 | color,
137 | (image_size[1], image_size[0]),
138 | interpolation=cv2.INTER_NEAREST,
139 | )
140 | imageio.imwrite(os.path.join(output_path, f"{f:04d}" + ".jpg"), color)
141 |
142 | def save_mat_to_file(self, matrix, filename):
143 | with open(filename, "w") as f:
144 | for line in matrix:
145 | np.savetxt(f, line[np.newaxis], fmt="%f")
146 |
147 | def export_poses(self, output_path, frame_skip=1):
148 | if not os.path.exists(output_path):
149 | os.makedirs(output_path)
150 | print(
151 | "exporting", len(self.frames) // frame_skip, "camera poses to", output_path
152 | )
153 | for f in range(0, len(self.frames), frame_skip):
154 | self.save_mat_to_file(
155 | self.frames[f].camera_to_world,
156 | os.path.join(output_path, f"{f:04d}" + ".txt"),
157 | )
158 |
159 | def export_intrinsics(self, output_path, image_size=None):
160 | if not os.path.exists(output_path):
161 | os.makedirs(output_path)
162 | intrinsic_color = self.intrinsic_color
163 | if image_size is not None:
164 | resize_scale = max(
165 | image_size[0] / self.color_height, image_size[1] / self.color_width
166 | )
167 | intrinsic_color *= resize_scale
168 | intrinsic_color[[2, 3], [2, 3]] = 1
169 | print("exporting camera intrinsics to", output_path)
170 | self.save_mat_to_file(
171 | intrinsic_color, os.path.join(output_path, "intrinsic_color.txt")
172 | )
173 | self.save_mat_to_file(
174 | self.extrinsic_color, os.path.join(output_path, "extrinsic_color.txt")
175 | )
176 | self.save_mat_to_file(
177 | self.intrinsic_depth, os.path.join(output_path, "intrinsic_depth.txt")
178 | )
179 | self.save_mat_to_file(
180 | self.extrinsic_depth, os.path.join(output_path, "extrinsic_depth.txt")
181 | )
182 |
--------------------------------------------------------------------------------
/utils/download_perf.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | import os
4 |
5 | curl_v1 = "https://storage.live.com/downloadfiles/V1/Zip?&authkey=AM3Xv7w16oqDDv0&application=1141147648"
6 | data_raw_format_v1 = "resIds=9C85B2C346F440CF%{}&canary=YcSqFbuNcf7ZJ8hEk3EWehB7amZnmUeDmCaKX9bO%2FeQ%3D2&authkey=AM3Xv7w16oqDDv0"
7 |
8 | #
9 |
10 | curl_v2 = "https://storage.live.com/downloadfiles/V1/Zip?authKey=%21ACaUbVBSIuDvCrI&application=1141147648"
11 | data_raw_format_v2 = "resIds=60A1A318FA7A3606%{}&canary=LerJFOBG2LJm%2FTP%2BoThDzUjrn%2BnHeGoiRiam4wV0IpA%3D8&authkey=%21ACaUbVBSIuDvCrI"
12 |
13 | def download(args):
14 |
15 | assert args.dataset in ["co3d", "scannet"]
16 |
17 | if args.dataset == "co3d":
18 | if args.chunks is None:
19 | chunks = [str(i).zfill(2) for i in range(100)]
20 | else:
21 | chunks = args.chunks.lstrip("[").rstrip("]").split(",")
22 |
23 | for chunk in chunks:
24 | chunk = chunk.zfill(2)
25 | chunk_int = int(chunk)
26 | outpath = os.path.join(args.outdir, chunk + ".zip")
27 | if chunk_int > 75:
28 | data_raw = data_raw_format_v2.format(str(211419 - 76 + chunk_int))
29 | curl = curl_v2
30 | else:
31 | data_raw = data_raw_format_v1.format(str(21111 - 00 + chunk_int))
32 | curl = curl_v1
33 |
34 | run_str = f"curl -L \"{curl}\" --data-raw \"{data_raw}\" --compressed --output {outpath}"
35 | print("Running:", run_str)
36 | os.system(run_str)
37 |
38 | if __name__ == "__main__":
39 |
40 | parser = argparse.ArgumentParser()
41 | parser.add_argument(
42 | "--dataset",
43 | type=str,
44 | choices = ["co3d", "scannet"]
45 | )
46 | parser.add_argument(
47 | "--chunks",
48 | type=str,
49 | default=None
50 | )
51 | parser.add_argument(
52 | "--outdir",
53 | type=str,
54 | default="."
55 | )
56 | args = parser.parse_args()
57 |
58 | download(args)
--------------------------------------------------------------------------------
/utils/extract_scannet.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import sys
4 |
5 | from utils.SensorData import SensorData
6 |
7 | # params
8 | parser = argparse.ArgumentParser()
9 | # data paths
10 | parser.add_argument("--input_path", required=True, help="path to sens file to read")
11 | parser.add_argument("--output_path", required=True, help="path to output folder")
12 | parser.add_argument(
13 | "--export_depth_images", dest="export_depth_images", action="store_true"
14 | )
15 | parser.add_argument(
16 | "--export_color_images", dest="export_color_images", action="store_true"
17 | )
18 | parser.add_argument("--export_poses", dest="export_poses", action="store_true")
19 | parser.add_argument(
20 | "--export_intrinsics", dest="export_intrinsics", action="store_true"
21 | )
22 | parser.set_defaults(
23 | export_depth_images=False,
24 | export_color_images=False,
25 | export_poses=False,
26 | export_intrinsics=False,
27 | )
28 |
29 | opt = parser.parse_args()
30 | print(opt)
31 |
32 |
33 | def main(scene_name):
34 | print(f"processing {scene_name}")
35 | sens_file = os.path.join(opt.input_path, scene_name, f"{scene_name}.sens")
36 | outpath = os.path.join(opt.output_path, scene_name)
37 | if not os.path.exists(outpath):
38 | os.makedirs(outpath)
39 |
40 | # load the data
41 | imsize = [480, 640]
42 | sys.stdout.write("loading %s..." % sens_file)
43 | sd = SensorData(sens_file)
44 | sys.stdout.write("loaded!\n")
45 | if opt.export_depth_images:
46 | sd.export_depth_images(os.path.join(outpath, "depth"), image_size=imsize)
47 | if opt.export_color_images:
48 | sd.export_color_images(os.path.join(outpath, "color"), image_size=imsize)
49 | if opt.export_poses:
50 | sd.export_poses(os.path.join(outpath, "pose"))
51 | if opt.export_intrinsics:
52 | sd.export_intrinsics(os.path.join(outpath, "intrinsic"), image_size=imsize)
53 |
54 |
55 | if __name__ == "__main__":
56 | from multiprocessing import Pool
57 |
58 | scene_names = os.listdir("/root/data/scannet/scans")
59 | scene_names = sorted(scene_names)
60 |
61 | pool = Pool(processes=16)
62 | pool.map(main, scene_names)
63 | pool.close()
64 | pool.join()
65 |
--------------------------------------------------------------------------------
/utils/extract_stats.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import os
4 |
5 | import numpy as np
6 |
7 | if __name__ == "__main__":
8 | parser = argparse.ArgumentParser()
9 | parser.add_argument(
10 | "--datadir", default=".", type=str, help="path to the directory"
11 | )
12 | args = parser.parse_args()
13 |
14 | scenes = os.listdir(args.datadir)
15 | print(f"Total {len(scenes)} scenes from {args.datadir}")
16 | psnr, ssim, lpips = [], [], []
17 |
18 | results, missing = [], []
19 | for scene in scenes:
20 | json_file = os.path.join(args.datadir, scene, "results.json")
21 | if not os.path.exists(json_file):
22 | print(f"Not exist: {json_file}")
23 | missing.append(scene)
24 | continue
25 | else:
26 | with open(json_file, "r") as f:
27 | data = json.load(f)
28 | results.append(
29 | dict(
30 | scene=scene,
31 | psnr=data["PSNR"]["mean"],
32 | ssim=data["SSIM"]["mean"],
33 | lpips=data["LPIPS"]["mean"],
34 | )
35 | )
36 |
37 | score_name = ("psnr", "ssim", "lpips")
38 |
39 | for name in score_name:
40 | # print(f"{name} : {np.array(eval(name)).mean()}")
41 | metrics = np.array([r[name] for r in results])
42 | metrics = metrics[~np.isnan(metrics)]
43 | if name == "psnr":
44 | print(
45 | f"{name:>5}: {np.mean(metrics):.3f}+-{np.std(metrics):.3f}, > 15: {np.mean(metrics > 15):.3f}, > 20: {np.mean(metrics > 20):.3f}, > 25: {np.mean(metrics > 25):.3f}, max: {np.max(metrics):.3f}, min: {np.min(metrics):.3f}, 25th percentile: {np.percentile(metrics, 25):.3f}, 50th percentile: {np.percentile(metrics, 50):.3f}, 75th percentile: {np.percentile(metrics, 75):.3f}, 90th: {np.percentile(metrics, 90):.3f}, 95th: {np.percentile(metrics, 95):.3f}"
46 | )
47 | else:
48 | print(
49 | f"avg {name:>5}: {np.mean(metrics):.3f}, max: {np.max(metrics):.3f}, min: {np.min(metrics):.3f}"
50 | )
51 |
--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | import wandb
5 | from pytorch_lightning.loggers import WandbLogger
6 | from pytorch_lightning.loggers.base import rank_zero_experiment
7 | from wandb.wandb_run import Run
8 |
9 | MAX_RETRY = 100
10 |
11 |
12 | class RetryingWandbLogger(WandbLogger):
13 | @property
14 | @rank_zero_experiment
15 | def experiment(self) -> Run:
16 | r"""
17 | Actual wandb object. To use wandb features in your
18 | :class:`~pytorch_lightning.core.lightning.LightningModule` do the following.
19 | Example::
20 | self.logger.experiment.some_wandb_function()
21 | """
22 | if self._experiment is None:
23 | if self._offline:
24 | os.environ["WANDB_MODE"] = "dryrun"
25 |
26 | print("Initializing wandb")
27 | for i in range(MAX_RETRY):
28 | try:
29 | self._experiment = wandb.init(
30 | **self._wandb_init,
31 | )
32 | break
33 | except (
34 | TimeoutError,
35 | ConnectionError,
36 | wandb.errors.UsageError,
37 | wandb.errors.CommError,
38 | ) as e:
39 | print(f"Error {e}. Retrying in 5 sec")
40 | time.sleep(5)
41 |
42 | # save checkpoints in wandb dir to upload on W&B servers
43 | if self._log_model:
44 | self._save_dir = self._experiment.dir
45 | return self._experiment
--------------------------------------------------------------------------------
/utils/notebooks/co3d_lists.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Generating Co3D Data List"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "The code below loads whole Co3D dataset to "
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 5,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "path_to_co3d = \"/home/yoonwoo/data/perfception/co3d\""
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 6,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "import json\n",
33 | "import os \n",
34 | "project_path = \"../..\""
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 7,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "frame_num_mapper = {}\n",
44 | "\n",
45 | "cls_list = [cls_name for cls_name in sorted(os.listdir(path_to_co3d))]\n",
46 | "for cls_name in cls_list: \n",
47 | " cls_path = os.path.join(path_to_co3d, cls_name)\n",
48 | " for frame_num in sorted(os.listdir(cls_path)):\n",
49 | " frame_path = os.path.join(cls_path, frame_num)\n",
50 | " if not os.path.isdir(frame_path): \n",
51 | " continue\n",
52 | " frame_num_mapper[frame_num] = cls_name"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 8,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\", \"w\") as fp:\n",
62 | " json.dump(frame_num_mapper, fp, indent=4, sort_keys=True)"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 18,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "import json\n",
72 | "import yaml\n",
73 | "\n",
74 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\") as fp:\n",
75 | " co3d_lists = json.load(fp)\n",
76 | "\n",
77 | "co3d_scene_lists = sorted(co3d_lists.keys())\n",
78 | "\n",
79 | "def generate_sweep_co3d():\n",
80 | " params = dict()\n",
81 | " params[\"scene_name\"] = dict(values=co3d_scene_lists)\n",
82 | " params[\"entity\"] = dict(value=\"postech_cvlab\")\n",
83 | " config = dict()\n",
84 | " config[\"method\"] = \"grid\"\n",
85 | " config[\"program\"] = \"run.py\"\n",
86 | " config[\"parameters\"] = params\n",
87 | " with open(\"../../cache/co3d.yaml\", \"w\") as fp:\n",
88 | " yaml.dump(config, fp)\n",
89 | "\n",
90 | "generate_sweep_co3d()"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": null,
96 | "metadata": {},
97 | "outputs": [],
98 | "source": []
99 | }
100 | ],
101 | "metadata": {
102 | "kernelspec": {
103 | "display_name": "Python 3.8.10 64-bit",
104 | "language": "python",
105 | "name": "python3"
106 | },
107 | "language_info": {
108 | "codemirror_mode": {
109 | "name": "ipython",
110 | "version": 3
111 | },
112 | "file_extension": ".py",
113 | "mimetype": "text/x-python",
114 | "name": "python",
115 | "nbconvert_exporter": "python",
116 | "pygments_lexer": "ipython3",
117 | "version": "3.8.10"
118 | },
119 | "orig_nbformat": 4,
120 | "vscode": {
121 | "interpreter": {
122 | "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
123 | }
124 | }
125 | },
126 | "nbformat": 4,
127 | "nbformat_minor": 2
128 | }
129 |
--------------------------------------------------------------------------------
/utils/notebooks/collect_results.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 26,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "import json\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 27,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "path_to_co3d_perf = \"/home/yoonwoo/data/perfception/perfception_co3d\""
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 28,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "psnr, ssim, lpips = [], [], []\n",
30 | "nan = []\n",
31 | "missing = []\n",
32 | "\n",
33 | "for scene in sorted(os.listdir(path_to_co3d_perf)):\n",
34 | " scene_path = os.path.join(path_to_co3d_perf, scene)\n",
35 | " result_path = os.path.join(scene_path, \"results.json\")\n",
36 | " if os.path.exists(result_path):\n",
37 | " with open(result_path, \"r\") as fp:\n",
38 | " json_file = json.load(fp)\n",
39 | " psnr.append(json_file[\"PSNR\"][\"test\"])\n",
40 | " ssim.append(json_file[\"SSIM\"][\"test\"])\n",
41 | " lpips_curr = json_file[\"LPIPS\"][\"test\"]\n",
42 | " if np.isnan(lpips_curr):\n",
43 | " nan.append(scene_path)\n",
44 | " else:\n",
45 | " lpips.append(json_file[\"LPIPS\"][\"test\"])\n",
46 | " else:\n",
47 | " missing.append(scene_path)"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 33,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "PSNR : 28.816426871881763\n",
60 | "SSIM : 0.8564840038082171\n",
61 | "LPIPS : 0.34507738965239027\n",
62 | "0.00021483430903915354\n",
63 | "PSNR > 20 0.9980664912186477\n",
64 | "PSNR > 20 0.9824372952360492\n",
65 | "PSNR > 20 0.872603254739782\n"
66 | ]
67 | }
68 | ],
69 | "source": [
70 | "psnr = np.array(psnr)\n",
71 | "ssim = np.array(ssim)\n",
72 | "lpips = np.array(lpips)\n",
73 | "\n",
74 | "print(\"PSNR : \", psnr.mean())\n",
75 | "print(\"SSIM : \", ssim.mean())\n",
76 | "print(\"LPIPS : \", lpips.mean())\n",
77 | "print(len(nan) / len(psnr))\n",
78 | "print(\"PSNR > 20\", (psnr > 15).mean())\n",
79 | "print(\"PSNR > 20\", (psnr > 20).mean())\n",
80 | "print(\"PSNR > 20\", (psnr > 25).mean())"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 31,
86 | "metadata": {},
87 | "outputs": [
88 | {
89 | "data": {
90 | "text/plain": [
91 | "[]"
92 | ]
93 | },
94 | "execution_count": 31,
95 | "metadata": {},
96 | "output_type": "execute_result"
97 | }
98 | ],
99 | "source": [
100 | "missing"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": null,
106 | "metadata": {},
107 | "outputs": [],
108 | "source": []
109 | }
110 | ],
111 | "metadata": {
112 | "interpreter": {
113 | "hash": "68327cb5290cd066fc32988c700987102dfc6b9931a40b8ea9c47728386b26ed"
114 | },
115 | "kernelspec": {
116 | "display_name": "Python 3.8.5 ('nerf_factory')",
117 | "language": "python",
118 | "name": "python3"
119 | },
120 | "language_info": {
121 | "codemirror_mode": {
122 | "name": "ipython",
123 | "version": 3
124 | },
125 | "file_extension": ".py",
126 | "mimetype": "text/x-python",
127 | "name": "python",
128 | "nbconvert_exporter": "python",
129 | "pygments_lexer": "ipython3",
130 | "version": "3.8.5"
131 | },
132 | "orig_nbformat": 4
133 | },
134 | "nbformat": 4,
135 | "nbformat_minor": 2
136 | }
137 |
--------------------------------------------------------------------------------
/utils/notebooks/find_missing_logs.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "import shutil\n",
11 | "import json\n",
12 | "import yaml"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": null,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "path_to_perf_co3d = \"/home/yoonwoo/data/perfception/perfception_co3d\""
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": null,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "cnt = 0\n",
31 | "co3d_lists = os.listdir(path_to_perf_co3d)\n",
32 | "for co3d_dir_name in co3d_lists:\n",
33 | " co3d_dir_path = os.path.join(path_to_perf_co3d, co3d_dir_name)\n",
34 | " results_path = os.path.join(co3d_dir_path, \"results.json\")\n",
35 | " if not os.path.exists(results_path):\n",
36 | " # shutil.rmtree(co3d_dir_path)\n",
37 | " cnt += 1\n",
38 | "print(cnt, len(co3d_lists))"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": null,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\") as fp:\n",
48 | " co3d_all_lists = json.load(fp)\n",
49 | "\n",
50 | "co3d_lists = os.listdir(path_to_perf_co3d)\n",
51 | "print(co3d_lists[0], list(co3d_all_lists.keys())[0])\n",
52 | "co3d_missing_scene_list = []\n",
53 | "for co3d_dir_name in co3d_all_lists.keys():\n",
54 | " if \"plenoxel_co3d_\" + co3d_dir_name not in co3d_lists:\n",
55 | " co3d_missing_scene_list.append(co3d_dir_name)\n",
56 | "\n",
57 | "def generate_sweep_co3d():\n",
58 | " params = dict()\n",
59 | " params[\"scene_name\"] = dict(values=co3d_missing_scene_list)\n",
60 | " params[\"entity\"] = dict(value=\"postech_cvlab\")\n",
61 | " config = dict()\n",
62 | " config[\"method\"] = \"grid\"\n",
63 | " config[\"program\"] = \"run.py\"\n",
64 | " config[\"parameters\"] = params\n",
65 | " with open(\"../../cache/co3d_missing.yaml\", \"w\") as fp:\n",
66 | " yaml.dump(config, fp)\n",
67 | "\n",
68 | "generate_sweep_co3d()"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "metadata": {},
74 | "source": [
75 | "# Find missing logs for rendering"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 22,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "import os\n",
85 | "import json\n",
86 | "import yaml\n",
87 | "import shutil"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": 30,
93 | "metadata": {},
94 | "outputs": [],
95 | "source": [
96 | "rendered_data_path = \"/home/yoonwoo/data/render\"\n",
97 | "assert os.path.exists(rendered_data_path)\n",
98 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\") as fp:\n",
99 | " co3d_all_lists = json.load(fp)\n",
100 | "\n",
101 | "co3d_missing_scene_list = []\n",
102 | "for (scene_number, class_name) in co3d_all_lists.items():\n",
103 | " dirpath = os.path.join(rendered_data_path, class_name, scene_number)\n",
104 | " bgpath = os.path.join(dirpath, \"bg\")\n",
105 | " if not os.path.exists(os.path.join(bgpath, \"image049.jpg\")): \n",
106 | " co3d_missing_scene_list.append(scene_number)\n",
107 | " # if os.path.exists(dirpath):\n",
108 | " # shutil.rmtree(dirpath)\n",
109 | " print(bgpath)"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 20,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "def generate_sweep_co3d():\n",
119 | " params = dict()\n",
120 | " params[\"scene_name\"] = dict(values=co3d_missing_scene_list)\n",
121 | " params[\"entity\"] = dict(value=\"postech_cvlab\")\n",
122 | " config = dict()\n",
123 | " config[\"method\"] = \"grid\"\n",
124 | " config[\"program\"] = \"run.py\"\n",
125 | " config[\"parameters\"] = params\n",
126 | " with open(\"../../cache/co3d_missing.yaml\", \"w\") as fp:\n",
127 | " yaml.dump(config, fp)\n",
128 | "\n",
129 | "generate_sweep_co3d()"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 29,
135 | "metadata": {},
136 | "outputs": [],
137 | "source": [
138 | "# Move the rendered results back\n",
139 | "import os\n",
140 | "rendered_dir = \"../../render\"\n",
141 | "for cls_name in os.listdir(rendered_dir):\n",
142 | " cls_path = os.path.join(rendered_dir, cls_name) \n",
143 | " for scene_num in os.listdir(cls_path):\n",
144 | " scene_path = os.path.join(cls_path, scene_num)\n",
145 | " scene_mv_to = os.path.join(rendered_data_path, cls_name, scene_num)\n",
146 | " shutil.copytree(scene_path, scene_mv_to)"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 48,
152 | "metadata": {},
153 | "outputs": [],
154 | "source": [
155 | "# Merge the bg and fg/bg\n",
156 | "import os\n",
157 | "\n",
158 | "co3d_2d_orig = \"/home/yoonwoo/data/perfception/perfception_co3d_2d\"\n",
159 | "assert os.path.exists(co3d_2d_orig)\n",
160 | "\n",
161 | "for cls_name in os.listdir(co3d_2d_orig):\n",
162 | " cls_path = os.path.join(co3d_2d_orig, cls_name)\n",
163 | " for scene_num in os.listdir(cls_path):\n",
164 | " scene_path = os.path.join(cls_path, scene_num)\n",
165 | " bg_path = os.path.join(scene_path, \"bg\")\n",
166 | " # if os.path.exists(bg_path): \n",
167 | " # shutil.rmtree(bg_path)\n",
168 | "\n",
169 | "for cls_name in os.listdir(co3d_2d_orig):\n",
170 | " cls_path = os.path.join(co3d_2d_orig, cls_name)\n",
171 | " for scene_num in os.listdir(cls_path): \n",
172 | " scene_path = os.path.join(cls_path, scene_num)\n",
173 | " scene_path_src = os.path.join(\"/home/yoonwoo/data/render\", cls_name, scene_num)\n",
174 | " pose_path = os.path.join(scene_path_src, \"poses.npy\")\n",
175 | " intrinsics_path = os.path.join(scene_path_src, \"intrinsics.npy\")\n",
176 | " bg_path = os.path.join(scene_path_src, \"bg\")\n",
177 | " assert os.path.exists(pose_path)\n",
178 | " assert os.path.exists(intrinsics_path)\n",
179 | " assert os.path.exists(bg_path)\n",
180 | " assert os.path.exists(scene_path)\n",
181 | " shutil.move(pose_path, os.path.join(scene_path, \"poses.npy\"))\n",
182 | " shutil.move(intrinsics_path, os.path.join(scene_path, \"intrinsics.npy\"))\n",
183 | " shutil.move(bg_path, os.path.join(scene_path, \"bg\"))"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": null,
189 | "metadata": {},
190 | "outputs": [],
191 | "source": []
192 | }
193 | ],
194 | "metadata": {
195 | "interpreter": {
196 | "hash": "68f3718ea42175570df1c98c46be3c56e287215895583cda485fe92d043b42e4"
197 | },
198 | "kernelspec": {
199 | "display_name": "Python 3.8.5 ('atp')",
200 | "language": "python",
201 | "name": "python3"
202 | },
203 | "language_info": {
204 | "codemirror_mode": {
205 | "name": "ipython",
206 | "version": 3
207 | },
208 | "file_extension": ".py",
209 | "mimetype": "text/x-python",
210 | "name": "python",
211 | "nbconvert_exporter": "python",
212 | "pygments_lexer": "ipython3",
213 | "version": "3.8.5"
214 | },
215 | "orig_nbformat": 4
216 | },
217 | "nbformat": 4,
218 | "nbformat_minor": 2
219 | }
220 |
--------------------------------------------------------------------------------
/utils/notebooks/make_collage_list.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 27,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "import json\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 28,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "logpath = \"../../logs\"\n",
21 | "loglist = os.listdir(logpath)\n",
22 | "loglistpath = [os.path.join(logpath, f) for f in loglist]"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 47,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "cls_list, ssim_list = [], []\n",
32 | "\n",
33 | "for fpath in loglistpath:\n",
34 | " json_path = os.path.join(fpath, \"results.json\")\n",
35 | " txt_path = os.path.join(fpath, \"class_info.txt\")\n",
36 | " with open(json_path) as fp:\n",
37 | " json_file = json.load(fp)\n",
38 | " with open(txt_path) as fp:\n",
39 | " txt_path = fp.readline()\n",
40 | " ssim_list.append(json_file[\"SSIM\"][\"test\"])\n",
41 | " cls_list.append(txt_path)\n",
42 | "\n",
43 | "ssim_list = np.array(ssim_list)"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 48,
49 | "metadata": {},
50 | "outputs": [
51 | {
52 | "name": "stdout",
53 | "output_type": "stream",
54 | "text": [
55 | "0.9771251082420349\n"
56 | ]
57 | }
58 | ],
59 | "source": [
60 | "argsort = np.argsort(ssim_list)[::-1]\n",
61 | "print(ssim_list[argsort[0]])\n",
62 | "\n",
63 | "scene_list, cls_scene_list = [], []\n",
64 | "for idx in argsort:\n",
65 | " if len(scene_list) == 50:\n",
66 | " break\n",
67 | " cls_name = cls_list[idx]\n",
68 | " ssim_score = ssim_list[idx]\n",
69 | " if cls_name in cls_scene_list: continue\n",
70 | " cls_scene_list.append(cls_name)\n",
71 | " scene_list.append(loglist[idx])\n",
72 | "\n",
73 | "parsed_scene_name = [\"_\".join(scene_name.split(\"_\")[2:]) for scene_name in scene_list]"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 50,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "with open(\"collage.sh\", \"w\") as fp:\n",
83 | " for scene in parsed_scene_name:\n",
84 | " fp.write(\n",
85 | " f\"python3 -m run --ginc configs/co3d.gin --scene_name {scene} --ginb run.run_train=False --ginb run.run_eval=False --ginb run.run_render=True --ginb load_co3d_data.render_scene_interp=True\\n\"\n",
86 | " )"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": []
95 | }
96 | ],
97 | "metadata": {
98 | "kernelspec": {
99 | "display_name": "Python 3.8.13 ('perfception')",
100 | "language": "python",
101 | "name": "python3"
102 | },
103 | "language_info": {
104 | "codemirror_mode": {
105 | "name": "ipython",
106 | "version": 3
107 | },
108 | "file_extension": ".py",
109 | "mimetype": "text/x-python",
110 | "name": "python",
111 | "nbconvert_exporter": "python",
112 | "pygments_lexer": "ipython3",
113 | "version": "3.8.13"
114 | },
115 | "orig_nbformat": 4,
116 | "vscode": {
117 | "interpreter": {
118 | "hash": "b471587bbe11c509fa4c273e3c27dad8d0f74f51357e50e66bf733daf1d8a5fb"
119 | }
120 | }
121 | },
122 | "nbformat": 4,
123 | "nbformat_minor": 2
124 | }
125 |
--------------------------------------------------------------------------------
/utils/notebooks/make_video.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 5,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "import imageio\n",
11 | "from PIL import Image\n",
12 | "import numpy as np"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 14,
18 | "metadata": {},
19 | "outputs": [
20 | {
21 | "name": "stderr",
22 | "output_type": "stream",
23 | "text": [
24 | "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (168, 300) to (176, 304) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n",
25 | "[swscaler @ 0x6977780] Warning: data is not aligned! This can lead to a speed loss\n",
26 | "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (168, 300) to (176, 304) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n",
27 | "[swscaler @ 0x6414a00] Warning: data is not aligned! This can lead to a speed loss\n",
28 | "IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (168, 300) to (176, 304) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n",
29 | "[swscaler @ 0x6e96a00] Warning: data is not aligned! This can lead to a speed loss\n"
30 | ]
31 | },
32 | {
33 | "name": "stdout",
34 | "output_type": "stream",
35 | "text": [
36 | "remote 117_13756_28310\n"
37 | ]
38 | }
39 | ],
40 | "source": [
41 | "render_path = \"../../render\"\n",
42 | "cls_list = os.listdir(render_path)\n",
43 | "for cls_name in cls_list: \n",
44 | " cls_path = os.path.join(render_path, cls_name)\n",
45 | " seq_list = os.listdir(cls_path)\n",
46 | " for seq in seq_list: \n",
47 | " seq_path = os.path.join(cls_path, seq)\n",
48 | " assets_list = [\"fg\", \"fgbg\", \"mask\"]\n",
49 | " for asset in assets_list:\n",
50 | " asset_path = os.path.join(seq_path, asset)\n",
51 | " img_path_list = os.listdir(asset_path)\n",
52 | " img_list = []\n",
53 | " for img in img_path_list:\n",
54 | " if img.endswith(\".mp4\"):\n",
55 | " continue\n",
56 | " imgpath = os.path.join(asset_path, img)\n",
57 | " img_list.append(np.asarray(Image.open(imgpath)))\n",
58 | " imageio.mimwrite(os.path.join(asset_path, f\"{seq}_{asset}.mp4\"), img_list)\n",
59 | " print(cls_name, seq)\n",
60 | " break\n",
61 | " break\n"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": null,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": []
70 | }
71 | ],
72 | "metadata": {
73 | "kernelspec": {
74 | "display_name": "Python 3.8.13 ('perfception')",
75 | "language": "python",
76 | "name": "python3"
77 | },
78 | "language_info": {
79 | "codemirror_mode": {
80 | "name": "ipython",
81 | "version": 3
82 | },
83 | "file_extension": ".py",
84 | "mimetype": "text/x-python",
85 | "name": "python",
86 | "nbconvert_exporter": "python",
87 | "pygments_lexer": "ipython3",
88 | "version": "3.8.13"
89 | },
90 | "orig_nbformat": 4,
91 | "vscode": {
92 | "interpreter": {
93 | "hash": "b471587bbe11c509fa4c273e3c27dad8d0f74f51357e50e66bf733daf1d8a5fb"
94 | }
95 | }
96 | },
97 | "nbformat": 4,
98 | "nbformat_minor": 2
99 | }
100 |
--------------------------------------------------------------------------------
/utils/notebooks/masked_out.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 10,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "from PIL import Image \n",
11 | "import numpy as np\n",
12 | "import tqdm\n",
13 | "import json\n",
14 | "import gzip"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 19,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "with open(\"../../dataloader/co3d_lists/co3d_list.json\") as fp:\n",
24 | " co3d_lists = json.load(fp)\n",
25 | "\n",
26 | "ours_path = os.path.join(\"/home/yoonwoo/data/perfception/perfception_co3d\")\n",
27 | "co3d_path = os.path.join(\"/home/yoonwoo/data/perfception/co3d\")\n",
28 | "\n",
29 | "\n",
30 | "for (k, v) in co3d_lists.items():\n",
31 | " basedir = os.path.join(co3d_path, v)\n",
32 | " co3d_curr = os.path.join(co3d_path, v)\n",
33 | " json_path = os.path.join(basedir, \"frame_annotations.jgz\")\n",
34 | " with gzip.open(json_path, \"r\") as fp:\n",
35 | " all_frames_data = json.load(fp)\n",
36 | "\n",
37 | " frame_data, images, intrinsics, extrinsics, image_sizes = [], [], [], [], []\n",
38 | "\n",
39 | " for temporal_data in all_frames_data:\n",
40 | " if temporal_data[\"sequence_name\"] == k:\n",
41 | " frame_data.append(temporal_data)\n",
42 | "\n",
43 | " test_frame_data = frame_data[::10]\n",
44 | " test_fname = [os.path.join(co3d_path, frame[\"image\"][\"path\"]) for frame in test_frame_data]\n",
45 | " mask_fname = [os.path.join(co3d_path, frame[\"mask\"][\"path\"]) for frame in test_frame_data]\n",
46 | "\n",
47 | " test_img = [np.asarray(Image.open(fname)) for fname in test_fname]\n",
48 | " mask_img = [np.asarray(Image.open(fname)) for fname in mask_fname]\n",
49 | "\n",
50 | " ours_curr = os.path.join(ours_path, \"render_model\")\n",
51 | " break"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 21,
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "data": {
61 | "text/plain": [
62 | "(2000, 1086)"
63 | ]
64 | },
65 | "execution_count": 21,
66 | "metadata": {},
67 | "output_type": "execute_result"
68 | }
69 | ],
70 | "source": [
71 | "mask_img[0].shape"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {},
78 | "outputs": [],
79 | "source": []
80 | }
81 | ],
82 | "metadata": {
83 | "interpreter": {
84 | "hash": "68f3718ea42175570df1c98c46be3c56e287215895583cda485fe92d043b42e4"
85 | },
86 | "kernelspec": {
87 | "display_name": "Python 3.8.5 ('atp')",
88 | "language": "python",
89 | "name": "python3"
90 | },
91 | "language_info": {
92 | "codemirror_mode": {
93 | "name": "ipython",
94 | "version": 3
95 | },
96 | "file_extension": ".py",
97 | "mimetype": "text/x-python",
98 | "name": "python",
99 | "nbconvert_exporter": "python",
100 | "pygments_lexer": "ipython3",
101 | "version": "3.8.5"
102 | },
103 | "orig_nbformat": 4
104 | },
105 | "nbformat": 4,
106 | "nbformat_minor": 2
107 | }
108 |
--------------------------------------------------------------------------------
/utils/notebooks/v1_v2_score.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 9,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import os\n",
10 | "import json\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 5,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "v1_v2_path = \"/home/yoonwoo/data/perfception/single_v2_log/\"\n",
21 | "scene_list = os.listdir(v1_v2_path)\n",
22 | "v1_list, v2_list = [], []\n",
23 | "for scene in scene_list:\n",
24 | " scene_path = os.path.join(v1_v2_path, scene)\n",
25 | " if \"v1\" in scene:\n",
26 | " v1_list.append(scene_path)\n",
27 | " assert os.path.exists(os.path.join(scene_path, \"results.json\"))\n",
28 | " if \"v2\" in scene:\n",
29 | " v2_list.append(scene_path)\n",
30 | " assert os.path.exists(os.path.join(scene_path, \"results.json\"))"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 10,
36 | "metadata": {},
37 | "outputs": [
38 | {
39 | "name": "stdout",
40 | "output_type": "stream",
41 | "text": [
42 | "28.818891472286648\n",
43 | "0.8507727715704176\n",
44 | "0.3533864857421981\n",
45 | "29.86061403486464\n",
46 | "0.8563342110978233\n",
47 | "0.3178743819395701\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "select_list = [v1_list, v2_list]\n",
53 | "for v_list in select_list:\n",
54 | " psnr, ssim, lpips = [], [], []\n",
55 | " for scene in v_list:\n",
56 | " with open(os.path.join(scene, \"results.json\")) as fp:\n",
57 | " json_file = json.load(fp)\n",
58 | " psnr.append(json_file[\"PSNR\"][\"test\"])\n",
59 | " ssim.append(json_file[\"SSIM\"][\"test\"])\n",
60 | " lpips.append(json_file[\"LPIPS\"][\"test\"])\n",
61 | " print(np.mean(np.array(psnr)))\n",
62 | " print(np.mean(np.array(ssim)))\n",
63 | " print(np.mean(np.array(lpips)))"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": []
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": []
79 | }
80 | ],
81 | "metadata": {
82 | "kernelspec": {
83 | "display_name": "Python 3.8.13 ('perfception')",
84 | "language": "python",
85 | "name": "python3"
86 | },
87 | "language_info": {
88 | "codemirror_mode": {
89 | "name": "ipython",
90 | "version": 3
91 | },
92 | "file_extension": ".py",
93 | "mimetype": "text/x-python",
94 | "name": "python",
95 | "nbconvert_exporter": "python",
96 | "pygments_lexer": "ipython3",
97 | "version": "3.8.13"
98 | },
99 | "orig_nbformat": 4,
100 | "vscode": {
101 | "interpreter": {
102 | "hash": "b471587bbe11c509fa4c273e3c27dad8d0f74f51357e50e66bf733daf1d8a5fb"
103 | }
104 | }
105 | },
106 | "nbformat": 4,
107 | "nbformat_minor": 2
108 | }
109 |
--------------------------------------------------------------------------------
/utils/notebooks/visualize_voxel.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Code for visualizing Plenoxel voxel"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import os\n",
17 | "import json\n",
18 | "# os.environ['DISPLAY']=\":1\"\n",
19 | "\n",
20 | "import numpy as np\n",
21 | "import open3d as o3d\n",
22 | "import torch"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 2,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "def voxel2pcd(ckpt_dir):\n",
32 | "\n",
33 | " path_candidate = os.path.join(ckpt_dir, \"last.ckpt\")\n",
34 | " if not os.path.exists(path_candidate):\n",
35 | " path_candidate = os.path.join(ckpt_dir, \"data.ckpt\")\n",
36 | " ckpt = torch.load(path_candidate)\n",
37 | "\n",
38 | " density = ckpt[\"state_dict\"][\"model.density_data\"].detach().cpu()\n",
39 | " links_idx = ckpt[\"state_dict\"][\"model.links_idx\"].detach().cpu()\n",
40 | " valid = torch.where(density > 0.0)[0].long()\n",
41 | " density, links_idx = density[valid], links_idx[valid].long()\n",
42 | "\n",
43 | " resolution = (\n",
44 | " ckpt[\"reso_list\"][\"reso_idx\"] \n",
45 | " if \"reso_list\" in ckpt.keys() else \n",
46 | " [256, 256, 256]\n",
47 | " )\n",
48 | " \n",
49 | " links_idx = torch.stack(\n",
50 | " [\n",
51 | " links_idx // (resolution[1] * resolution[2]),\n",
52 | " links_idx % (resolution[1] * resolution[2]) // resolution[2],\n",
53 | " -links_idx % resolution[2],\n",
54 | " ],\n",
55 | " -1,\n",
56 | " )\n",
57 | " pts = links_idx.numpy().astype(np.float64)\n",
58 | " pts_color = (density - density.min()) / (density.max() - density.min())\n",
59 | " pts_color = pts_color.numpy().astype(np.float64).repeat(3, axis=-1)\n",
60 | "\n",
61 | " pts = np.concatenate([pts], axis=0)\n",
62 | " pts_color = np.concatenate([pts_color], axis=0)\n",
63 | "\n",
64 | " pcd = o3d.geometry.PointCloud()\n",
65 | " pcd.points = o3d.utility.Vector3dVector(pts)\n",
66 | " pcd.colors = o3d.utility.Vector3dVector(pts_color)\n",
67 | " o3d.visualization.draw_geometries([pcd])"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 3,
73 | "metadata": {},
74 | "outputs": [],
75 | "source": [
76 | "voxel2pcd(\"/home/yoonwoo/Workspace/NeRF-Factory/logs/plenoxel_co3d_106_12648_23157\")"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 72,
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "path_to_ckpt_dir = \"\"\n",
86 | "ckpt_dirs = []\n",
87 | "# ckpt_dirs = [\"plenoxel_torch_110_13051_23361\"]\n",
88 | "path_to_ckpt_dir = \"/home/yoonwoo/Workspace/NeRF-Factory/logs\"\n",
89 | "path_to_v1 = \"/home/yoonwoo/data/perfception/perfception_co3d\"\n",
90 | "ckpt_dirs = [\"plenoxel_co3d_115_13559_29303\"]\n",
91 | "ckpt_dirs = [f for f in sorted(os.listdir(\"../../logs/\")) if \"plenoxel\" in f]"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 80,
97 | "metadata": {},
98 | "outputs": [],
99 | "source": [
100 | "v2_psnr, v1_psnr = [], []\n",
101 | "v2_ssim, v1_ssim = [], []\n",
102 | "v2_lpips, v1_lpips = [], []\n",
103 | "\n",
104 | "for ckpt_dir in ckpt_dirs: \n",
105 | " path_ckpt_dir = os.path.join(path_to_ckpt_dir, ckpt_dir)\n",
106 | " path_v1_dir = os.path.join(path_to_v1, ckpt_dir.replace(\"co3d\", \"torch\"))\n",
107 | " if not os.path.exists(path_v1_dir):\n",
108 | " continue\n",
109 | " # voxel2pcd(path_ckpt_dir)\n",
110 | " # voxel2pcd(path_v1_dir)\n",
111 | " result_v2 = os.path.join(path_to_ckpt_dir,ckpt_dir, \"results.json\")\n",
112 | " result_v1 = os.path.join(path_to_v1, ckpt_dir.replace(\"co3d\", \"torch\"), \"results.json\")\n",
113 | " with open(result_v2) as fp:\n",
114 | " json_file = json.load(fp)\n",
115 | " v2_psnr.append(json_file[\"PSNR\"][\"test\"])\n",
116 | " v2_ssim.append(json_file[\"SSIM\"][\"test\"])\n",
117 | " v2_lpips.append(json_file[\"LPIPS\"][\"test\"])\n",
118 | " with open(result_v1) as fp:\n",
119 | " json_file = json.load(fp)\n",
120 | " v1_psnr.append(json_file[\"PSNR\"][\"test_mean\"])\n",
121 | " v1_ssim.append(json_file[\"SSIM\"][\"test_mean\"])\n",
122 | " v1_lpips.append(json_file[\"LPIPS-VGG\"][\"test_mean\"])"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 83,
128 | "metadata": {},
129 | "outputs": [
130 | {
131 | "name": "stdout",
132 | "output_type": "stream",
133 | "text": [
134 | "v2, v1 PSNR: 27.934632050363643 26.406888197799503\n",
135 | "v2, v1 SSIM: 0.8345804088994077 0.7940645335536254\n",
136 | "v2, v1 LPIPS: 0.36206666929157155 0.4027399397210071\n"
137 | ]
138 | }
139 | ],
140 | "source": [
141 | "print(\"v2, v1 PSNR: \", np.array(v2_psnr).mean(), \" \", np.array(v1_psnr).mean())\n",
142 | "print(\"v2, v1 SSIM: \", np.array(v2_ssim).mean(), \" \", np.array(v1_ssim).mean())\n",
143 | "print(\"v2, v1 LPIPS: \", np.array(v2_lpips).mean(), \" \", np.array(v1_lpips).mean())"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": null,
149 | "metadata": {},
150 | "outputs": [],
151 | "source": []
152 | }
153 | ],
154 | "metadata": {
155 | "interpreter": {
156 | "hash": "68327cb5290cd066fc32988c700987102dfc6b9931a40b8ea9c47728386b26ed"
157 | },
158 | "kernelspec": {
159 | "display_name": "Python 3.8.5 ('nerf_factory')",
160 | "language": "python",
161 | "name": "python3"
162 | },
163 | "language_info": {
164 | "codemirror_mode": {
165 | "name": "ipython",
166 | "version": 3
167 | },
168 | "file_extension": ".py",
169 | "mimetype": "text/x-python",
170 | "name": "python",
171 | "nbconvert_exporter": "python",
172 | "pygments_lexer": "ipython3",
173 | "version": "3.8.5"
174 | },
175 | "orig_nbformat": 4
176 | },
177 | "nbformat": 4,
178 | "nbformat_minor": 2
179 | }
180 |
--------------------------------------------------------------------------------
/utils/ray.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | def convert_to_ndc(
5 | origins,
6 | directions,
7 | ndc_coeffs,
8 | near: float = 1.0
9 | ):
10 | """Convert a set of rays to NDC coordinates."""
11 | t = (near - origins[Ellipsis, 2]) / directions[Ellipsis, 2]
12 | origins = origins + t[Ellipsis, None] * directions
13 |
14 | dx, dy, dz = directions[:, 0], directions[:, 1], directions[:, 2]
15 | ox, oy, oz = origins[:, 0], origins[:, 1], origins[:, 2]
16 | o0 = ndc_coeffs[0] * (ox / oz)
17 | o1 = ndc_coeffs[1] * (oy / oz)
18 | o2 = 1 - 2 * near / oz
19 | d0 = ndc_coeffs[0] * (dx / dz - ox / oz)
20 | d1 = ndc_coeffs[1] * (dy / dz - oy / oz)
21 | d2 = 2 * near / oz
22 |
23 | origins = torch.stack([o0, o1, o2], -1)
24 | directions = torch.stack([d0, d1, d2], -1)
25 |
26 | return origins, directions
27 |
28 |
29 | def batchified_get_rays(
30 | intrinsics,
31 | extrinsics,
32 | image_sizes,
33 | use_pixel_centers
34 | ):
35 | center = 0.5 if use_pixel_centers else 0.
36 | mesh_grids = [np.meshgrid(
37 | np.arange(w, dtype=np.float32) + center,
38 | np.arange(h, dtype=np.float32) + center,
39 | indexing="xy"
40 | ) for (h, w) in image_sizes]
41 |
42 | i_coords = [mesh_grid[0] for mesh_grid in mesh_grids]
43 | j_coords = [mesh_grid[1] for mesh_grid in mesh_grids]
44 |
45 | dirs = [
46 | np.stack(
47 | [
48 | (i - intrinsic[0][2]) / intrinsic[0][0],
49 | (j - intrinsic[1][2]) / intrinsic[1][1],
50 | np.ones_like(i)
51 | ], -1
52 | ) for (intrinsic, i, j) in zip(intrinsics, i_coords, j_coords)
53 | ]
54 |
55 | rays_o = np.concatenate([
56 | np.tile(
57 | extrinsic[np.newaxis, :3, 3], (1, h * w, 1)
58 | ).reshape(-1, 3) for (extrinsic, (h, w)) in zip(extrinsics, image_sizes)
59 | ]).astype(np.float32)
60 |
61 | rays_d = np.concatenate([
62 | np.einsum(
63 | "hwc, rc -> hwr", dir, extrinsic[:3, :3]
64 | ).reshape(-1, 3) for (dir, extrinsic) in zip(dirs, extrinsics)
65 | ]).astype(np.float32)
66 |
67 | rays_d /= np.linalg.norm(rays_d, axis=-1, keepdims=True)
68 |
69 | return rays_o, rays_d
--------------------------------------------------------------------------------
/utils/select_option.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import *
3 |
4 | import gdown
5 |
6 | from dataloader.litdata import LitDataCo3D, LitDataScannet
7 | from model.plenoxel_torch.model import LitPlenoxel, ResampleCallBack
8 |
9 | url_co3d_list = "https://drive.google.com/uc?id=1jCDaA41ZddkgPl4Yw2h-XI7mt9o56kb7"
10 |
11 | def select_model(
12 | model_name: str,
13 | ):
14 | return LitPlenoxel()
15 |
16 |
17 | def select_dataset(
18 | dataset_name: str,
19 | datadir: str,
20 | scene_name: str,
21 | accelerator: str,
22 | num_gpus: int,
23 | num_tpus: int,
24 | ):
25 | if dataset_name == "co3d":
26 | data_fun = LitDataCo3D
27 | co3d_list_json_path = os.path.join("dataloader/co3d_lists/co3d_list.json")
28 | if not os.path.exists(co3d_list_json_path):
29 | gdown.download(url_co3d_list, co3d_list_json_path)
30 | elif dataset_name == "scannet":
31 | data_fun = LitDataScannet
32 |
33 | return data_fun(
34 | datadir=datadir,
35 | scene_name=scene_name,
36 | accelerator=accelerator,
37 | num_gpus=num_gpus,
38 | num_tpus=num_tpus,
39 | )
40 |
41 | def select_callback(model_name):
42 |
43 | return [ResampleCallBack()]
44 |
--------------------------------------------------------------------------------
/utils/store_util.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import imageio
4 | import numpy as np
5 | import torch
6 | from PIL import Image
7 |
8 |
9 | def to8b(x):
10 | return (255 * np.clip(x, 0, 1)).astype(np.uint8)
11 |
12 |
13 | def binary(x):
14 | x = np.round(x)
15 | return (255 * np.clip(x, 0, 1)).astype(np.uint8)
16 |
17 |
18 | def norm8b(x):
19 | x = (x - x.min()) / (x.max() - x.min())
20 | return to8b(x)
21 |
22 |
23 | def store_image(dirpath, rgbs):
24 | for (i, rgb) in enumerate(rgbs):
25 | imgname = f"image{str(i).zfill(3)}.jpg"
26 | rgbimg = Image.fromarray(to8b(rgb.detach().cpu().numpy()))
27 | imgpath = os.path.join(dirpath, imgname)
28 | rgbimg.save(imgpath)
29 |
30 |
31 | def store_depth(dirpath, depths):
32 | for (i, depth) in enumerate(depths):
33 | depthname = f"depth{str(i).zfill(3)}.jpg"
34 | disparity = torch.zeros_like(depth)
35 | disparity[torch.where(depth != 0)] = torch.log(
36 | (1 / (depth[torch.where(depth != 0)] + 1e-6))
37 | )
38 | img = norm8b(disparity.detach().cpu().numpy().repeat(3, axis=-1))
39 | depthimg = Image.fromarray(img)
40 | depthpath = os.path.join(dirpath, depthname)
41 | depthimg.save(depthpath)
42 |
43 |
44 | def store_video(dirpath, rgbs):
45 | rgbimgs = [to8b(rgb.detach().cpu().numpy()) for rgb in rgbs]
46 | video_dir = os.path.join(dirpath, "videos")
47 | os.makedirs(video_dir, exist_ok=True)
48 | imageio.mimwrite(os.path.join(video_dir, "images.mp4"), rgbimgs, fps=30, quality=8)
49 |
50 |
51 | def store_mask(dirpath, masks):
52 | for (i, mask) in enumerate(masks):
53 | maskname = f"mask{str(i).zfill(3)}.jpg"
54 | maskimg = Image.fromarray(binary(mask.detach().cpu().numpy()))
55 | maskpath = os.path.join(dirpath, maskname)
56 | maskimg.save(maskpath)
57 |
--------------------------------------------------------------------------------
/utils/tsdf.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | import cv2
5 | import numpy as np
6 | import open3d as o3d
7 | import tqdm
8 |
9 | from dataloader.data_util.common import connected_component_filter, find_files
10 | from dataloader.data_util.scannet import detect_blur_fft
11 |
12 |
13 | def integrate(
14 | scene_name,
15 | outdir,
16 | max_frame,
17 | skip_frame,
18 | blur_thresh,
19 | max_image_dim,
20 | voxel_size,
21 | max_depth=4.5,
22 | debug=False,
23 | ):
24 | print(f"processing {scene_name}")
25 | # setup dir
26 | scenedir = os.path.join(outdir, scene_name)
27 | if not os.path.exists(scenedir):
28 | os.makedirs(scenedir, exist_ok=True)
29 |
30 | if os.path.exists(os.path.join(scenedir, f"tsdf_pcd_{voxel_size}.pcd")):
31 | print(f"skip exist {scene_name}")
32 | return
33 |
34 | files = find_files(os.path.join(scenedir, "color"), exts=["*.jpg"])
35 | assert len(files) > 0, f"{scenedir} does not contain color images."
36 | frame_ids = sorted([os.path.basename(f).rstrip(".jpg") for f in files])
37 | frame_ids = np.array(frame_ids)
38 |
39 | # filter invalid poses
40 | poses = np.stack(
41 | [np.loadtxt(os.path.join(scenedir, "pose", f"{f}.txt")) for f in frame_ids],
42 | axis=0,
43 | )
44 | poses = poses.astype(np.float32)
45 | numerics = np.all(
46 | (~np.isinf(poses) * ~np.isnan(poses) * ~np.isneginf(poses)).reshape(-1, 16),
47 | axis=1,
48 | )
49 |
50 | # load images
51 | print(f"loading images - {len(frame_ids)}")
52 | colors = np.stack(
53 | [cv2.imread(os.path.join(scenedir, "color", f"{f}.jpg")) for f in frame_ids],
54 | axis=0,
55 | )
56 | colors = colors.astype(np.float32) / 255.0
57 |
58 | # load depths
59 | print(f"loading depths - {len(frame_ids)}")
60 | depth_shift = 1000.0
61 | depths = np.stack(
62 | [
63 | cv2.imread(
64 | os.path.join(scenedir, "depth", f"{f}.png"), cv2.IMREAD_UNCHANGED
65 | )
66 | for f in frame_ids
67 | ],
68 | axis=0,
69 | )
70 | depths = depths.astype(np.float32) / depth_shift
71 |
72 | # load intrinsics
73 | print(f"loading intrinsic")
74 | _intrinsic = np.loadtxt(os.path.join(scenedir, "intrinsic", "intrinsic_color.txt"))
75 | _intrinsic = _intrinsic.astype(np.float32)
76 |
77 | # filter blurry images
78 | print(f"filter blurry images")
79 | if not os.path.exists(os.path.join(scenedir, "blur.npy")):
80 | blurness = np.stack(
81 | [detect_blur_fft(c, thresh=blur_thresh)[0] for c in colors], axis=0
82 | ).reshape(-1)
83 | np.save(os.path.join(scenedir, "blur.npy"), blurness)
84 | else:
85 | blurness = np.load(os.path.join(scenedir, "blur.npy"))
86 | num_valid = min(150, int(0.2 * len(frame_ids)))
87 | ths = np.sort(blurness)[num_valid]
88 | is_valid = np.logical_and(blurness > ths, numerics)
89 | print(f"filtered {is_valid.sum()} out of {len(is_valid)} images")
90 |
91 | colors, depths, poses = (
92 | colors[is_valid][::skip_frame],
93 | depths[is_valid][::skip_frame],
94 | poses[is_valid][::skip_frame],
95 | )
96 | frame_ids = frame_ids[is_valid][::skip_frame]
97 |
98 | # setup TSDF volume
99 | intrinsic = o3d.camera.PinholeCameraIntrinsic()
100 | intrinsic.set_intrinsics(
101 | colors.shape[2],
102 | colors.shape[1],
103 | _intrinsic[0, 0],
104 | _intrinsic[1, 1],
105 | _intrinsic[0, 2],
106 | _intrinsic[1, 2],
107 | )
108 | volume = o3d.pipelines.integration.ScalableTSDFVolume(
109 | voxel_length=voxel_size,
110 | sdf_trunc=0.04,
111 | color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8,
112 | )
113 |
114 | # integration
115 | for image, pose, depth in tqdm.tqdm(zip(colors, poses, depths)):
116 | image *= 255.0
117 | image = image.astype(np.uint8)
118 | image_o3d = o3d.geometry.Image(image)
119 | depth_o3d = o3d.geometry.Image(depth)
120 |
121 | rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
122 | image_o3d,
123 | depth_o3d,
124 | depth_scale=1.0,
125 | depth_trunc=max_depth,
126 | convert_rgb_to_intensity=False,
127 | )
128 | volume.integrate(rgbd, intrinsic, np.linalg.inv(pose))
129 |
130 | # extract geometery
131 | pcd = volume.extract_point_cloud()
132 | xyz = np.asarray(pcd.points)
133 | sel = connected_component_filter(xyz, 0.05)
134 |
135 | points = np.asarray(pcd.points)[sel].astype(np.float32)
136 | colors = np.asarray(pcd.colors)[sel].astype(np.float32)
137 |
138 | np.savez(
139 | os.path.join(scenedir, f"tsdf_pcd_{voxel_size}.npz"),
140 | xyz=points,
141 | color=colors,
142 | )
143 | print(f">> processed {scene_name}")
144 |
145 |
146 | if __name__ == "__main__":
147 | from functools import partial
148 | from multiprocessing import Pool
149 |
150 | parser = argparse.ArgumentParser()
151 | parser.add_argument("--scene_name", type=str, required=True)
152 | parser.add_argument("--max_frame", type=int, default=1000)
153 | parser.add_argument("--skip_frame", type=int, default=1)
154 | parser.add_argument("--blur_thresh", type=float, default=10)
155 | parser.add_argument("--max_depth", type=float, default=4.5)
156 | parser.add_argument("--max_image_dim", type=int, default=640)
157 | parser.add_argument("--voxel_size", type=float, default=0.025)
158 | parser.add_argument("--basedir", type=str, default="./tsdf_results")
159 | parser.add_argument("--outdir", type=str, default="/root/data/scannet/scans")
160 | parser.add_argument("--num_workers", type=int, default=8)
161 | parser.add_argument("--offset", type=int, default=0)
162 |
163 | args = parser.parse_args()
164 |
165 | if args.scene_name == "all":
166 | scene_list = sorted(os.listdir(args.outdir))
167 | else:
168 | scene_list = [args.scene_name]
169 |
170 | if args.scene_name == "all":
171 | integrate_partial = partial(
172 | integrate,
173 | outdir=args.outdir,
174 | max_frame=args.max_frame,
175 | skip_frame=args.skip_frame,
176 | blur_thresh=args.blur_thresh,
177 | max_image_dim=args.max_image_dim,
178 | voxel_size=args.voxel_size,
179 | max_depth=args.max_depth,
180 | )
181 | scene_list_cur = scene_list[args.offset :: args.num_workers]
182 | for scene in scene_list_cur:
183 | integrate_partial(scene)
184 | else:
185 | for scene in scene_list:
186 | integrate(
187 | scene,
188 | args.outdir,
189 | args.max_frame,
190 | args.skip_frame,
191 | args.blur_thresh,
192 | args.max_image_dim,
193 | args.voxel_size,
194 | args.max_depth,
195 | )
196 |
--------------------------------------------------------------------------------
/utils/tsdf_cuda.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | import numpy as np
5 | import open3d as o3d
6 | import open3d.core as o3c
7 | import tqdm
8 |
9 | from dataloader.data_util.common import connected_component_filter, find_files
10 |
11 |
12 | def integrate(scene_name, outdir, reso=512, max_depth=4.5):
13 | device = o3c.Device(o3c.Device.CUDA, 0)
14 | print(f"processing {scene_name}")
15 |
16 | # setup dir
17 | scenedir = os.path.join(outdir, scene_name)
18 | if not os.path.exists(scenedir):
19 | os.makedirs(scenedir, exist_ok=True)
20 |
21 | if os.path.exists(os.path.join(scenedir, f"tsdf_pcd_{reso}.npy")):
22 | print(f"skip exist {scene_name}")
23 | return
24 |
25 | files = find_files(os.path.join(scenedir, "color"), exts=["*.jpg"])
26 | if len(files) == 0:
27 | print(f"{scenedir} does not contain color images. skip.")
28 | return
29 | frame_ids = sorted([os.path.basename(f).rstrip(".jpg") for f in files])
30 | frame_ids = np.array(frame_ids)
31 |
32 | # filter invalid poses
33 | poses = np.stack(
34 | [np.loadtxt(os.path.join(scenedir, "pose", f"{f}.txt")) for f in frame_ids],
35 | axis=0,
36 | )
37 | poses = poses.astype(np.float32)
38 | numerics = np.all(
39 | (
40 | ~np.isinf(poses)
41 | * ~np.isnan(poses)
42 | * ~np.isneginf(poses)
43 | * (np.abs(poses) < 30)
44 | ).reshape(-1, 16),
45 | axis=1,
46 | )
47 | poses = poses[numerics]
48 | frame_ids = frame_ids[numerics]
49 |
50 | skip_frame = 1
51 | if len(frame_ids) > 3000:
52 | skip_frame = 2
53 | if len(frame_ids) > 5000:
54 | skip_frame = 3
55 |
56 | depth_shift = 1000.0
57 |
58 | # load intrinsics
59 | print(f"loading intrinsic")
60 | _intrinsic = np.loadtxt(os.path.join(scenedir, "intrinsic", "intrinsic_color.txt"))
61 | _intrinsic = _intrinsic.astype(np.float32)
62 |
63 | poses = poses[::skip_frame]
64 | frame_ids = frame_ids[::skip_frame]
65 |
66 | # setup voxel block grid
67 | vbg = o3d.t.geometry.VoxelBlockGrid(
68 | attr_names=("tsdf", "weight"),
69 | attr_dtypes=(o3c.float32, o3c.float32),
70 | attr_channels=((1), (1)),
71 | voxel_size=3.0 / reso,
72 | block_resolution=16,
73 | block_count=100000,
74 | device=device,
75 | )
76 | intrinsic = o3d.camera.PinholeCameraIntrinsic()
77 | intrinsic.set_intrinsics(
78 | 640,
79 | 480,
80 | _intrinsic[0, 0],
81 | _intrinsic[1, 1],
82 | _intrinsic[0, 2],
83 | _intrinsic[1, 2],
84 | )
85 | intrinsic_tensor = o3c.Tensor(intrinsic.intrinsic_matrix, o3c.Dtype.Float64)
86 |
87 | for i, (fid, E) in tqdm.tqdm(
88 | enumerate(zip(frame_ids, poses)), total=len(frame_ids)
89 | ):
90 | # print(f"integraing frame {i+1}/{len(frame_ids)} for scene {scene_name}")
91 | depth = o3d.t.io.read_image(os.path.join(scenedir, "depth", f"{fid}.png")).to(
92 | device
93 | )
94 | extrinsic = o3c.Tensor(E, o3c.Dtype.Float64)
95 | extrinsic = o3c.inv(extrinsic).contiguous()
96 | frustum_block_coords = vbg.compute_unique_block_coordinates(
97 | depth, intrinsic_tensor, extrinsic, depth_shift, max_depth
98 | )
99 |
100 | vbg.integrate(
101 | frustum_block_coords,
102 | depth,
103 | intrinsic_tensor,
104 | extrinsic,
105 | depth_shift,
106 | max_depth,
107 | )
108 |
109 | # extract geometery
110 | pcd_tensor = vbg.extract_point_cloud()
111 | pcd = pcd_tensor.to_legacy()
112 | xyz = np.asarray(pcd.points)
113 | sel = connected_component_filter(xyz, 0.05)
114 |
115 | points = np.asarray(pcd.points)[sel].astype(np.float32)
116 | colors = None
117 | if pcd.has_colors():
118 | colors = np.asarray(pcd.colors)[sel].astype(np.float32)
119 |
120 | np.save(os.path.join(scenedir, f"tsdf_pcd_{reso}.npy"), points)
121 | print(f">> processed {scene_name}")
122 |
123 |
124 | if __name__ == "__main__":
125 | parser = argparse.ArgumentParser()
126 | parser.add_argument("--scene_name", type=str, required=True)
127 | parser.add_argument("--max_depth", type=float, default=4.5)
128 | parser.add_argument("--reso", type=int, default=1024)
129 | parser.add_argument("--outdir", type=str, default="/root/data/scannet/scans")
130 | parser.add_argument("--offset", type=int, default=0)
131 |
132 | args = parser.parse_args()
133 |
134 | if args.scene_name == "all":
135 | scene_list = sorted(os.listdir(args.outdir))
136 | else:
137 | scene_list = [args.scene_name]
138 |
139 | for scene in scene_list:
140 | integrate(
141 | scene,
142 | outdir=args.outdir,
143 | reso=args.reso,
144 | max_depth=args.max_depth,
145 | )
146 |
--------------------------------------------------------------------------------