├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── DATA_PROCESSING.md
├── LICENSE.md
├── README.md
├── assets
└── teaser.gif
├── docker
├── Dockerfile-colmap
└── Dockerfile-neuralangelo
├── imaginaire
├── config.py
├── config_base.yaml
├── datasets
│ ├── base.py
│ └── utils
│ │ ├── dataloader.py
│ │ ├── get_dataloader.py
│ │ └── sampler.py
├── models
│ ├── base.py
│ └── utils
│ │ ├── init_weight.py
│ │ └── model_average.py
├── trainers
│ ├── base.py
│ └── utils
│ │ ├── get_trainer.py
│ │ ├── logging.py
│ │ └── meters.py
└── utils
│ ├── cudnn.py
│ ├── distributed.py
│ ├── gpu_affinity.py
│ ├── misc.py
│ ├── set_random_seed.py
│ ├── termcolor.py
│ └── visualization.py
├── neuralangelo.yaml
├── projects
├── nerf
│ ├── configs
│ │ ├── ingp_blender.yaml
│ │ ├── nerf_blender.yaml
│ │ └── nerf_llff.yaml
│ ├── datasets
│ │ ├── base.py
│ │ ├── nerf_blender.py
│ │ └── nerf_llff.py
│ ├── models
│ │ ├── ingp.py
│ │ └── nerf.py
│ ├── trainers
│ │ ├── base.py
│ │ └── nerf.py
│ └── utils
│ │ ├── camera.py
│ │ ├── misc.py
│ │ ├── nerf_util.py
│ │ ├── render.py
│ │ └── visualize.py
└── neuralangelo
│ ├── configs
│ ├── base.yaml
│ ├── custom
│ │ └── template.yaml
│ ├── dtu.yaml
│ └── tnt.yaml
│ ├── data.py
│ ├── model.py
│ ├── scripts
│ ├── convert_data_to_json.py
│ ├── convert_dtu_to_json.py
│ ├── convert_tnt_to_json.py
│ ├── extract_mesh.py
│ ├── generate_config.py
│ ├── preprocess.sh
│ ├── preprocess_dtu.sh
│ ├── preprocess_tnt.sh
│ ├── run_colmap.sh
│ ├── run_ffmpeg.sh
│ ├── visualize_colmap.ipynb
│ ├── visualize_mesh.ipynb
│ └── visualize_transforms.ipynb
│ ├── trainer.py
│ └── utils
│ ├── mesh.py
│ ├── misc.py
│ ├── mlp.py
│ ├── modules.py
│ └── spherical_harmonics.py
├── requirements.txt
└── train.py
/.gitignore:
--------------------------------------------------------------------------------
1 | checkpoints
2 |
3 | # Other uncheckable file types
4 | *.zip
5 | *.exe
6 | *.dll
7 | *.swp
8 | *.vscode
9 | *.ipynb
10 | *.DS_Store
11 | *.pyc
12 |
13 | # Credential information that should never be checked in
14 | *.secret
15 |
16 | # Data types
17 | *.png
18 | *.hdr
19 | *.jpg
20 | *.jpeg
21 | *.pgm
22 | *.tiff
23 | *.tif
24 | *.mp4
25 | *.MOV
26 | *.tar
27 | *.tar.gz
28 | *.pkl
29 | *.pt
30 | *.bin
31 | *.ply
32 |
33 | # log folder
34 | logs/
35 |
36 | # dataset folder
37 | datasets/
38 | /datasets/
39 |
40 | # config folder
41 | !projects/neuralangelo/configs/custom/template.yaml
42 | projects/neuralangelo/configs/custom
43 |
44 | # ------------------------ BELOW IS AUTO-GENERATED FOR PYTHON REPOS ------------------------
45 |
46 | # Byte-compiled / optimized / DLL files
47 | __pycache__/
48 | *.py[cod]
49 | *$py.class
50 |
51 | # C extensions
52 | *.so
53 |
54 | # Distribution / packaging
55 | .Python
56 | build/
57 | develop-eggs/
58 | dist/
59 | downloads/
60 | eggs/
61 | .eggs/
62 | lib/
63 | lib64/
64 | parts/
65 | sdist/
66 | var/
67 | wheels/
68 | share/python-wheels/
69 | *.egg-info/
70 | .installed.cfg
71 | *.egg
72 | MANIFEST
73 |
74 | # PyInstaller
75 | # Usually these files are written by a python script from a template
76 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
77 | *.manifest
78 | *.spec
79 |
80 | # Installer logs
81 | pip-log.txt
82 | pip-delete-this-directory.txt
83 |
84 | # Unit test / coverage reports
85 | htmlcov/
86 | .tox/
87 | .nox/
88 | .coverage
89 | .coverage.*
90 | .cache
91 | nosetests.xml
92 | coverage.xml
93 | *.cover
94 | *.py,cover
95 | .hypothesis/
96 | .pytest_cache/
97 | cover/
98 |
99 | # Translations
100 | *.mo
101 | *.pot
102 |
103 | # Django stuff:
104 | *.log
105 | local_settings.py
106 | db.sqlite3
107 | db.sqlite3-journal
108 |
109 | # Flask stuff:
110 | instance/
111 | .webassets-cache
112 |
113 | # Scrapy stuff:
114 | .scrapy
115 |
116 | # Sphinx documentation
117 | docs/_build/
118 |
119 | # PyBuilder
120 | .pybuilder/
121 | target/
122 |
123 | # Jupyter Notebook
124 | .ipynb_checkpoints
125 |
126 | # IPython
127 | profile_default/
128 | ipython_config.py
129 |
130 | # pyenv
131 | # For a library or package, you might want to ignore these files since the code is
132 | # intended to run in multiple environments; otherwise, check them in:
133 | # .python-version
134 |
135 | # pipenv
136 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
137 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
138 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
139 | # install all needed dependencies.
140 | #Pipfile.lock
141 |
142 | # poetry
143 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
144 | # This is especially recommended for binary packages to ensure reproducibility, and is more
145 | # commonly ignored for libraries.
146 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
147 | #poetry.lock
148 |
149 | # pdm
150 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
151 | #pdm.lock
152 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
153 | # in version control.
154 | # https://pdm.fming.dev/#use-with-ide
155 | .pdm.toml
156 |
157 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
158 | __pypackages__/
159 |
160 | # Celery stuff
161 | celerybeat-schedule
162 | celerybeat.pid
163 |
164 | # SageMath parsed files
165 | *.sage.py
166 |
167 | # Environments
168 | .env
169 | .venv
170 | env/
171 | venv/
172 | ENV/
173 | env.bak/
174 | venv.bak/
175 |
176 | # Spyder project settings
177 | .spyderproject
178 | .spyproject
179 |
180 | # Rope project settings
181 | .ropeproject
182 |
183 | # mkdocs documentation
184 | /site
185 |
186 | # mypy
187 | .mypy_cache/
188 | .dmypy.json
189 | dmypy.json
190 |
191 | # Pyre type checker
192 | .pyre/
193 |
194 | # pytype static type analyzer
195 | .pytype/
196 |
197 | # Cython debug symbols
198 | cython_debug/
199 |
200 | # PyCharm
201 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
202 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
203 | # and can be added to the global gitignore or merged into this file. For a more nuclear
204 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
205 | #.idea/
206 | CLIP
207 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third_party/colmap"]
2 | path = third_party/colmap
3 | url = https://github.com/colmap/colmap.git
4 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pycqa/flake8
3 | rev: 4.0.0
4 | hooks:
5 | - id: flake8
6 | args: [--max-line-length=120]
7 | exclude: third_party
8 |
--------------------------------------------------------------------------------
/DATA_PROCESSING.md:
--------------------------------------------------------------------------------
1 | # Data Preparation
2 |
3 | *Note: please use respecting the license terms of each dataset. Each user is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.*
4 |
5 | The following sections provide a guide on how to preprocess input videos for Neuralangelo.
6 |
7 | ## Prerequisites
8 | Initialize the COLMAP submodule:
9 | ```bash
10 | git submodule update --init --recursive
11 | ```
12 |
13 | ## Self-captured video sequence
14 | To capture your own data, we recommend using a high shutter speed to avoid motion blur (which is very common when using a phone camera). We provide a synthetic [Lego sequence](https://drive.google.com/file/d/1yWoZ4Hk3FgmV3pd34ZbW7jEqgqyJgzHy/view?usp=drive_link) (from the original [NeRF](https://github.com/bmild/nerf)) as a toy example video for testing the workflow. There are two steps:
15 | 1. [preprocessing](#preprocessing) the data and running COLMAP,
16 | 2. [inspecting](#inspect-and-adjust-colmap-results) and refining the bounding sphere of interest for running Neuralangelo.
17 |
18 | ### Preprocessing
19 | First, set some environment variables:
20 | ```bash
21 | SEQUENCE=lego
22 | PATH_TO_VIDEO=lego.mp4
23 | DOWNSAMPLE_RATE=2
24 | SCENE_TYPE=object
25 | ```
26 | where
27 | - `SEQUENCE`: your custom name for the video sequence.
28 | - `PATH_TO_VIDEO`: absolute/relative path to your video.
29 | - `DOWNSAMPLE_RATE`: temporal downsampling rate of video sequence (for extracting video frames).
30 | - `SCENE_TYPE`: can be one of ` {outdoor,indoor,object}`.
31 |
32 | To preprocess your data, you can choose to either
33 |
34 | - Run the following end-to-end script:
35 | ```bash
36 | bash projects/neuralangelo/scripts/preprocess.sh ${SEQUENCE} ${PATH_TO_VIDEO} ${DOWNSAMPLE_RATE} ${SCENE_TYPE}
37 | ```
38 |
39 | - Or you can follow the steps below if you want more fine-grained control:
40 |
41 | 1. Extract images from the input video
42 |
43 | ```bash
44 | bash projects/neuralangelo/scripts/run_ffmpeg.sh ${SEQUENCE} ${PATH_TO_VIDEO} ${DOWNSAMPLE_RATE}
45 | ```
46 | This will create a directory `datasets/{SEQUENCE}_ds{DOWNSAMPLE_RATE}` (set as `DATA_PATH` onwards), which stores all the processed data.
47 | The extracted images will be stored in `{DATA_PATH}/images_raw`.
48 |
49 | 2. Run COLMAP
50 |
51 | ```bash
52 | DATA_PATH=datasets/${SEQUENCE}_ds${DOWNSAMPLE_RATE}
53 | bash projects/neuralangelo/scripts/run_colmap.sh ${DATA_PATH}
54 | ```
55 | `DATA_PATH`: path to processed data.
56 |
57 | After COLMAP finishes, the folder structure will look like following:
58 | ```
59 | DATA_PATH
60 | ├─ database.db (COLMAP database)
61 | ├─ images (undistorted input images)
62 | ├─ images_raw (raw input images)
63 | ├─ sparse (COLMAP data from SfM)
64 | │ ├─ cameras.bin (camera parameters)
65 | │ ├─ images.bin (images and camera poses)
66 | │ ├─ points3D.bin (sparse point clouds)
67 | │ ├─ 0 (a directory containing individual SfM models. There could also be 1, 2... etc.)
68 | │ ...
69 | ├─ stereo (COLMAP data for MVS, not used here)
70 | ...
71 | ```
72 | `{DATA_PATH}/images` will be the input image observations for surface reconstruction.
73 |
74 | 3. Generate JSON file for data loading
75 |
76 | In this step, we define the bounding region for reconstruction and convert the COLMAP data to JSON format following Instant NGP.
77 | It is strongly recommended to [inspect](#inspect-and-adjust-colmap-results) the results to verify and adjust the bounding region for improved performance.
78 | ```bash
79 | python3 projects/neuralangelo/scripts/convert_data_to_json.py --data_dir ${DATA_PATH} --scene_type ${SCENE_TYPE}
80 | ```
81 | The JSON file will be generated in `{DATA_PATH}/transforms.json`.
82 |
83 | 4. Config files
84 |
85 | Use the following to configure and generate your config files:
86 | ```bash
87 | python3 projects/neuralangelo/scripts/generate_config.py --sequence_name ${SEQUENCE} --data_dir ${DATA_PATH} --scene_type ${SCENE_TYPE}
88 | ```
89 | The config file will be generated as `projects/neuralangelo/configs/custom/{SEQUENCE}.yaml`.
90 | You can add the `--help` flag to list all arguments; for example, consider adding `--auto_exposure_wb` for modeling varying lighting/appearances in the video.
91 | Alternatively, you can directly modify the hyperparameters in the generated config file.
92 |
93 | ### Inspect and adjust COLMAP results
94 |
95 | For certain cases, the camera poses estimated by COLMAP could be erroneous. In addition, the automated estimation of the bounding sphere could be inaccurate (which ideally should include the scene/object of interest). It is highly recommended that the bounding sphere is adjusted.
96 | We offer some tools to to inspect and adjust the pre-processing results. Below are some options:
97 |
98 | - Blender: Download [Blender](https://www.blender.org/download/) and follow the instructions in our [add-on repo](https://github.com/mli0603/BlenderNeuralangelo). The add-on will save your adjustment of the bounding sphere.
99 | - This [Jupyter notebook](projects/neuralangelo/scripts/visualize_colmap.ipynb) (using K3D) can be helpful for visualizing the COLMAP results. You can adjust the bounding sphere by manually specifying the refining sphere center and size in the `data.readjust` config.
100 |
101 | For certain cases, an exhaustive feature matcher may be able to estimate more accurate camera poses.
102 | This could be done by changing `sequential_matcher` to `exhaustive_matcher` in [run_colmap.sh](https://github.com/NVlabs/neuralangelo/blob/main/projects/neuralangelo/scripts/run_colmap.sh#L24).
103 | However, this would take more time to process and could sometimes result in "broken trajectories" (from COLMAP failing due to ambiguous matches).
104 | For more details, please refer to the COLMAP [documentation](https://colmap.github.io/).
105 |
106 | ## DTU dataset
107 | You can run the following command to download [the DTU dataset](https://roboimagedata.compute.dtu.dk/?page_id=36) that is preprocessed by NeuS authors and generate json files:
108 | ```bash
109 | PATH_TO_DTU=datasets/dtu # Modify this to be the DTU dataset root directory.
110 | bash projects/neuralangelo/scripts/preprocess_dtu.sh ${PATH_TO_DTU}
111 | ```
112 |
113 | ## Tanks and Temples dataset
114 | Download the data from [Tanks and Temples](https://tanksandtemples.org/download/) website.
115 | You will also need to download additional [COLMAP/camera/alignment](https://drive.google.com/file/d/1jAr3IDvhVmmYeDWi0D_JfgiHcl70rzVE/view?resourcekey=) and the images of each scene.
116 | The file structure should look like (you need to move the downloaded images to folder `images_raw`):
117 | ```
118 | tanks_and_temples
119 | ├─ Barn
120 | │ ├─ Barn_COLMAP_SfM.log (camera poses)
121 | │ ├─ Barn.json (cropfiles)
122 | │ ├─ Barn.ply (ground-truth point cloud)
123 | │ ├─ Barn_trans.txt (colmap-to-ground-truth transformation)
124 | │ └─ images_raw (raw input images downloaded from Tanks and Temples website)
125 | │ ├─ 000001.png
126 | │ ├─ 000002.png
127 | │ ...
128 | ├─ Caterpillar
129 | │ ├─ ...
130 | ...
131 | ```
132 | Run the following command to generate json files:
133 | ```bash
134 | PATH_TO_TNT=datasets/tanks_and_temples # Modify this to be the Tanks and Temples root directory.
135 | bash projects/neuralangelo/scripts/preprocess_tnt.sh ${PATH_TO_TNT}
136 | ```
137 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # NVIDIA Source Code License for Neuralangelo
2 |
3 | ## 1. Definitions
4 |
5 | - “Licensor” means any person or entity that distributes its Work.
6 |
7 | - “Software” means the original work of authorship made available under this License.
8 |
9 | - “Work” means the Software and any additions to or derivative works of the Software that are made available under this License.
10 |
11 | - “NVIDIA Processors” means any central processing unit (CPU), graphics processing unit (GPU), field-programmable gate array (FPGA), application-specific integrated circuit (ASIC) or any combination thereof designed, made, sold, or provided by NVIDIA or its affiliates.
12 |
13 | - The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
14 |
15 | - Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License.
16 |
17 | ## 2. License Grant
18 |
19 | ### 2.1 Copyright Grant.
20 |
21 | Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
22 |
23 | ## 3. Limitations
24 |
25 | ### 3.1 Redistribution.
26 |
27 | You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work.
28 |
29 | ### 3.2 Derivative Works.
30 |
31 | You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself.
32 |
33 | ### 3.3 Use Limitation.
34 |
35 | The Work and any derivative works thereof only may be used or intended for use non-commercially and with NVIDIA Processors. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
36 |
37 | ### 3.4 Patent Claims.
38 |
39 | If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately.
40 |
41 | ### 3.5 Trademarks.
42 |
43 | This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License.
44 |
45 | ### 3.6 Termination.
46 |
47 | If you violate any term of this License, then your rights under this License (including the grant in Section 2.1) will terminate immediately.
48 |
49 | ## 4. Disclaimer of Warranty.
50 |
51 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.
52 |
53 | ## 5. Limitation of Liability.
54 |
55 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
56 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Neuralangelo
2 | This is the official implementation of **Neuralangelo: High-Fidelity Neural Surface Reconstruction**.
3 |
4 | [Zhaoshuo Li](https://mli0603.github.io/),
5 | [Thomas Müller](https://tom94.net/),
6 | [Alex Evans](https://research.nvidia.com/person/alex-evans),
7 | [Russell H. Taylor](https://www.cs.jhu.edu/~rht/),
8 | [Mathias Unberath](https://mathiasunberath.github.io/),
9 | [Ming-Yu Liu](https://mingyuliu.net/),
10 | [Chen-Hsuan Lin](https://chenhsuanlin.bitbucket.io/)
11 | IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2023
12 |
13 | ### [Project page](https://research.nvidia.com/labs/dir/neuralangelo/) | [Paper](https://arxiv.org/abs/2306.03092/) | [Colab notebook](https://colab.research.google.com/drive/13u8DX9BNzQwiyPPCB7_4DbSxiQ5-_nGF)
14 |
15 |
16 |
17 | The code is built upon the Imaginaire library from the Deep Imagination Research Group at NVIDIA.
18 | For business inquiries, please submit the [NVIDIA research licensing form](https://www.nvidia.com/en-us/research/inquiries/).
19 |
20 | --------------------------------------
21 |
22 | ## Installation
23 | We offer two ways to setup the environment:
24 | 1. We provide prebuilt Docker images, where
25 | - `docker.io/chenhsuanlin/colmap:3.8` is for running COLMAP and the data preprocessing scripts. This includes the prebuilt COLMAP library (CUDA-supported).
26 | - `docker.io/chenhsuanlin/neuralangelo:23.04-py3` is for running the main Neuralangelo pipeline.
27 |
28 | The corresponding Dockerfiles can be found in the `docker` directory.
29 | 2. The conda environment for Neuralangelo. Install the dependencies and activate the environment `neuralangelo` with
30 | ```bash
31 | conda env create --file neuralangelo.yaml
32 | conda activate neuralangelo
33 | ```
34 | For COLMAP, alternative installation options are also available on the [COLMAP website](https://colmap.github.io/).
35 |
36 | --------------------------------------
37 |
38 | ## Data preparation
39 | Please refer to [Data Preparation](DATA_PROCESSING.md) for step-by-step instructions.
40 | We assume known camera poses for each extracted frame from the video.
41 | The code uses the same json format as [Instant NGP](https://github.com/NVlabs/instant-ngp).
42 |
43 | --------------------------------------
44 |
45 | ## Run Neuralangelo!
46 | ```bash
47 | EXPERIMENT=toy_example
48 | GROUP=example_group
49 | NAME=example_name
50 | CONFIG=projects/neuralangelo/configs/custom/${EXPERIMENT}.yaml
51 | GPUS=1 # use >1 for multi-GPU training!
52 | torchrun --nproc_per_node=${GPUS} train.py \
53 | --logdir=logs/${GROUP}/${NAME} \
54 | --config=${CONFIG} \
55 | --show_pbar
56 | ```
57 | Some useful notes:
58 | - This codebase supports logging with [Weights & Biases](https://wandb.ai/site). You should have a W&B account for this.
59 | - Add `--wandb` to the command line argument to enable W&B logging.
60 | - Add `--wandb_name` to specify the W&B project name.
61 | - More detailed control can be found in the `init_wandb()` function in `imaginaire/trainers/base.py`.
62 | - Configs can be overridden through the command line (e.g. `--optim.params.lr=1e-2`).
63 | - Set `--checkpoint={CHECKPOINT_PATH}` to initialize with a certain checkpoint; set `--resume` to resume training.
64 | - If appearance embeddings are enabled, make sure `data.num_images` is set to the number of training images.
65 |
66 | --------------------------------------
67 |
68 | ## Isosurface extraction
69 | Use the following command to run isosurface mesh extraction:
70 | ```bash
71 | CHECKPOINT=logs/${GROUP}/${NAME}/xxx.pt
72 | OUTPUT_MESH=xxx.ply
73 | CONFIG=logs/${GROUP}/${NAME}/config.yaml
74 | RESOLUTION=2048
75 | BLOCK_RES=128
76 | GPUS=1 # use >1 for multi-GPU mesh extraction
77 | torchrun --nproc_per_node=${GPUS} projects/neuralangelo/scripts/extract_mesh.py \
78 | --config=${CONFIG} \
79 | --checkpoint=${CHECKPOINT} \
80 | --output_file=${OUTPUT_MESH} \
81 | --resolution=${RESOLUTION} \
82 | --block_res=${BLOCK_RES}
83 | ```
84 | Some useful notes:
85 | - Add `--textured` to extract meshes with textures.
86 | - Add `--keep_lcc` to remove noises. May also remove thin structures.
87 | - Lower `BLOCK_RES` to reduce GPU memory usage.
88 | - Lower `RESOLUTION` to reduce mesh size.
89 |
90 | --------------------------------------
91 |
92 | ## Frequently asked questions (FAQ)
93 | 1. **Q:** CUDA out of memory. How do I decrease the memory footprint?
94 | **A:** Neuralangelo requires at least 24GB GPU memory with our default configuration. If you run out of memory, consider adjusting the following hyperparameters under `model.object.sdf.encoding.hashgrid` (with suggested values):
95 |
96 | | GPU VRAM | Hyperparameter |
97 | | :-----------: | :---------------------: |
98 | | 8GB | `dict_size=20`, `dim=4` |
99 | | 12GB | `dict_size=21`, `dim=4` |
100 | | 16GB | `dict_size=21`, `dim=8` |
101 |
102 | Please note that the above hyperparameter adjustment may sacrifice the reconstruction quality.
103 |
104 | If Neuralangelo runs fine during training but CUDA out of memory during evaluation, consider adjusting the evaluation parameters under `data.val`, including setting smaller `image_size` (e.g., maximum resolution 200x200), and setting `batch_size=1`, `subset=1`.
105 |
106 | 2. **Q:** The reconstruction of my custom dataset is bad. What can I do?
107 | **A:** It is worth looking into the following:
108 | - The camera poses recovered by COLMAP may be off. We have implemented tools (using [Blender](https://github.com/mli0603/BlenderNeuralangelo) or [Jupyter notebook](projects/neuralangelo/scripts/visualize_colmap.ipynb)) to inspect the COLMAP results.
109 | - The computed bounding regions may be off and/or too small/large. Please refer to [data preprocessing](DATA_PROCESSING.md) on how to adjust the bounding regions manually.
110 | - The video capture sequence may contain significant motion blur or out-of-focus frames. Higher shutter speed (reducing motion blur) and smaller aperture (increasing focus range) are very helpful.
111 |
112 | --------------------------------------
113 |
114 | ## Citation
115 | If you find our code useful for your research, please cite
116 | ```
117 | @inproceedings{li2023neuralangelo,
118 | title={Neuralangelo: High-Fidelity Neural Surface Reconstruction},
119 | author={Li, Zhaoshuo and M\"uller, Thomas and Evans, Alex and Taylor, Russell H and Unberath, Mathias and Liu, Ming-Yu and Lin, Chen-Hsuan},
120 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition ({CVPR})},
121 | year={2023}
122 | }
123 | ```
124 |
--------------------------------------------------------------------------------
/assets/teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/neuralangelo/94390b64683c067c620d9e075224ccfe582647d0/assets/teaser.gif
--------------------------------------------------------------------------------
/docker/Dockerfile-colmap:
--------------------------------------------------------------------------------
1 | # docker build -f docker/Dockerfile-colmap -t chenhsuanlin/colmap:3.8 .
2 | # docker push chenhsuanlin/colmap:3.8
3 |
4 | FROM nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
5 | ARG DEBIAN_FRONTEND=noninteractive
6 |
7 | # colmap dependencies
8 | RUN apt-get update && apt-get install -y \
9 | git \
10 | cmake \
11 | ninja-build \
12 | build-essential \
13 | libboost-program-options-dev \
14 | libboost-filesystem-dev \
15 | libboost-graph-dev \
16 | libboost-system-dev \
17 | libboost-test-dev \
18 | libeigen3-dev \
19 | libflann-dev \
20 | libfreeimage-dev \
21 | libmetis-dev \
22 | libgoogle-glog-dev \
23 | libgflags-dev \
24 | libsqlite3-dev \
25 | libglew-dev \
26 | qtbase5-dev \
27 | libqt5opengl5-dev \
28 | libcgal-dev \
29 | libceres-dev
30 | # headless servers
31 | RUN apt-get update && apt-get install -y \
32 | xvfb
33 | # Colmap
34 | RUN git clone https://github.com/colmap/colmap.git && cd colmap && git checkout 3.8
35 | RUN cd colmap && mkdir build && cd build && cmake .. -DCUDA_ENABLED=ON -DCMAKE_CUDA_ARCHITECTURES="70;72;75;80;86" -GNinja
36 | RUN cd colmap/build && ninja && ninja install
37 |
38 | # additional python packages
39 | RUN apt-get update && apt-get install -y \
40 | pip \
41 | ffmpeg
42 | RUN pip install \
43 | addict \
44 | k3d \
45 | opencv-python-headless \
46 | pillow \
47 | plotly \
48 | pyyaml \
49 | trimesh
50 |
--------------------------------------------------------------------------------
/docker/Dockerfile-neuralangelo:
--------------------------------------------------------------------------------
1 | # docker build -f docker/Dockerfile-neuralangelo -t chenhsuanlin/neuralangelo:23.04-py3 .
2 | # docker push chenhsuanlin/neuralangelo:23.04-py3
3 |
4 | FROM nvcr.io/nvidia/pytorch:23.04-py3
5 | ARG DEBIAN_FRONTEND=noninteractive
6 |
7 | # Install basics
8 | RUN apt-get update && apt-get install -y --no-install-recommends \
9 | build-essential \
10 | bzip2 \
11 | ca-certificates \
12 | cmake \
13 | curl \
14 | ffmpeg \
15 | g++ \
16 | git \
17 | libx264-dev \
18 | tmux \
19 | wget
20 |
21 | # Update pip
22 | RUN pip install --upgrade pip
23 |
24 | # Code formatting
25 | RUN pip install --upgrade \
26 | flake8 \
27 | pre-commit
28 |
29 | # Install base Python libraries for Imaginaire
30 | COPY requirements.txt requirements.txt
31 | ARG FORCE_CUDA=1
32 | ARG TCNN_CUDA_ARCHITECTURES=70,72,75,80,86
33 | RUN pip install --upgrade -r requirements.txt
34 |
--------------------------------------------------------------------------------
/imaginaire/config_base.yaml:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | # This is the base configuration file.
12 |
13 | # We often dump images to understand what's going on in the training.
14 | # image_save_iter specifies how often we dump images.
15 | image_save_iter: 9999999999
16 | # metrics_iter and metrics_epoch specify how often we compute the performance metrics
17 | # If these two numbers are not set, they are copied from checkpoint.save_iter and checkpoint.save_epoch respectively.
18 | metrics_iter:
19 | metrics_epoch:
20 | # max_epoch and max_iter specify what is the maximum epoch and iteration that we will train our model.
21 | # min( max_epoch * dataset_size / batch_size, max_iter) will be the total number of iterations that the model will be trained.
22 | max_epoch: 9999999999
23 | max_iter: 9999999999
24 | # logging_iter controls how often we log the training stats.
25 | logging_iter: 100
26 | # If speed_benchmark is True, we will print out time required for forward, backward, and gradient update.
27 | speed_benchmark: False
28 | # Kill the process if `timeout_period` seconds have passed since the last iteration. This usually means the process gets stuck.
29 | timeout_period: 9999999
30 |
31 | # Default local rank
32 | local_rank: 0
33 | # Toggle NVTX profiler
34 | nvtx_profile: False
35 |
36 | # Checkpointer
37 | checkpoint:
38 | # If save_iter is set to M, then we save the checkpoint every M iteration.
39 | # If save_latest_iter is set to M, then we save the checkpoint every M iteration using the name
40 | # 'latest_checkpoint.pt', so that the new checkpoint will overwrite previous ones.
41 | # If save_epoch is set to N, then we save the checkpoint every N epoch.
42 | # Both can be set at the same time.
43 | save_iter: 9999999999
44 | save_latest_iter: 9999999999
45 | save_epoch: 9999999999
46 | save_period: 9999999999
47 | # If True, load state_dict to the models in strict mode
48 | strict_resume: True
49 |
50 | # Trainer
51 | trainer:
52 | ema_config:
53 | enabled: False
54 | beta: 0.9999
55 | start_iteration: 0
56 |
57 | image_to_tensorboard: False
58 | ddp_config:
59 | find_unused_parameters: False
60 | static_graph: True
61 | init:
62 | type: none
63 | gain:
64 | amp_config:
65 | init_scale: 65536.0
66 | growth_factor: 2.0
67 | backoff_factor: 0.5
68 | growth_interval: 2000
69 | enabled: False
70 | grad_accum_iter: 1
71 |
72 | # Networks
73 | model:
74 | type: dummy
75 |
76 | # Optimizers
77 | optim:
78 | type: Adam
79 | params:
80 | # This defines the parameters for the specified PyTorch optimizer class (e.g. betas, eps).
81 | lr: 0.0001
82 | fused_opt: False
83 | # Default learning rate policy is step with iteration_mode=False (epoch mode), step_size=10^10, and gamma=1.
84 | # This means a constant learning rate
85 | sched:
86 | iteration_mode: False
87 | type: step
88 | step_size: 9999999999
89 | gamma: 1
90 |
91 | # Data
92 | data:
93 | name: dummy
94 | type: imaginaire.datasets.images
95 | use_multi_epoch_loader: False
96 | num_workers: 0
97 | test_data:
98 | name: dummy
99 | type: imaginaire.datasets.images
100 | num_workers: 0
101 | test:
102 | is_lmdb: False
103 | roots:
104 | batch_size: 1
105 |
106 | # cuDNN
107 | # set deterministic to True for better reproducibility of the results. When deterministic is True, it will only use CUDNN functions that are deterministic.
108 | # If benchmark is set to True, cudnn will benchmark several algorithms and pick that which it found to be fastest at the first iteration.
109 | cudnn:
110 | deterministic: False
111 | benchmark: True
112 |
113 | # Others
114 | pretrained_weight:
115 | inference_args: {}
116 |
--------------------------------------------------------------------------------
/imaginaire/datasets/utils/dataloader.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 |
15 |
16 | class MultiEpochsDataLoader(torch.utils.data.DataLoader):
17 | """
18 | Relentlessly sample from the dataset.
19 | This eliminates the overhead of prefetching data before each epoch.
20 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/loader.py
21 | """
22 |
23 | def __init__(self, *args, **kwargs):
24 | super().__init__(*args, **kwargs)
25 | self._DataLoader__initialized = False
26 | self.batch_sampler = _RepeatSampler(self.batch_sampler)
27 | self._DataLoader__initialized = True
28 | self.iterator = super().__iter__()
29 |
30 | def __len__(self):
31 | return len(self.batch_sampler.sampler)
32 |
33 | def __iter__(self):
34 | for i in range(len(self)):
35 | yield next(self.iterator)
36 |
37 |
38 | class _RepeatSampler(object):
39 | """ Sampler that repeats forever.
40 | Args:
41 | sampler (Sampler)
42 | """
43 |
44 | def __init__(self, sampler):
45 | self.sampler = sampler
46 |
47 | def __iter__(self):
48 | while True:
49 | yield from iter(self.sampler)
50 |
--------------------------------------------------------------------------------
/imaginaire/datasets/utils/get_dataloader.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import importlib
14 |
15 | import torch
16 | import torch.distributed as dist
17 |
18 | from imaginaire.utils.distributed import master_only_print as print
19 |
20 | from imaginaire.datasets.utils.sampler import DistributedSamplerPreemptable
21 | from imaginaire.datasets.utils.dataloader import MultiEpochsDataLoader
22 |
23 |
24 | def _get_train_dataset_objects(cfg, subset_indices=None):
25 | r"""Return dataset objects for the training set.
26 | Args:
27 | cfg (obj): Global configuration file.
28 | subset_indices (sequence): Indices of the subset to use.
29 |
30 | Returns:
31 | train_dataset (obj): PyTorch training dataset object.
32 | """
33 | dataset_module = importlib.import_module(cfg.data.type)
34 | train_dataset = dataset_module.Dataset(cfg, is_inference=False)
35 | if subset_indices is not None:
36 | train_dataset = torch.utils.data.Subset(train_dataset, subset_indices)
37 | print('Train dataset length:', len(train_dataset))
38 | return train_dataset
39 |
40 |
41 | def _get_val_dataset_objects(cfg, subset_indices=None):
42 | r"""Return dataset objects for the validation set.
43 | Args:
44 | cfg (obj): Global configuration file.
45 | subset_indices (sequence): Indices of the subset to use.
46 | Returns:
47 | val_dataset (obj): PyTorch validation dataset object.
48 | """
49 | dataset_module = importlib.import_module(cfg.data.type)
50 | if hasattr(cfg.data.val, 'type'):
51 | for key in ['type', 'input_types', 'input_image']:
52 | setattr(cfg.data, key, getattr(cfg.data.val, key))
53 | dataset_module = importlib.import_module(cfg.data.type)
54 | val_dataset = dataset_module.Dataset(cfg, is_inference=True)
55 |
56 | if subset_indices is not None:
57 | val_dataset = torch.utils.data.Subset(val_dataset, subset_indices)
58 | print('Val dataset length:', len(val_dataset))
59 | return val_dataset
60 |
61 |
62 | def _get_test_dataset_object(cfg, subset_indices=None):
63 | r"""Return dataset object for the test set
64 |
65 | Args:
66 | cfg (obj): Global configuration file.
67 | subset_indices (sequence): Indices of the subset to use.
68 | Returns:
69 | (obj): PyTorch dataset object.
70 | """
71 | dataset_module = importlib.import_module(cfg.test_data.type)
72 | test_dataset = dataset_module.Dataset(cfg, is_inference=True, is_test=True)
73 | if subset_indices is not None:
74 | test_dataset = torch.utils.data.Subset(test_dataset, subset_indices)
75 | return test_dataset
76 |
77 |
78 | def _get_data_loader(cfg, dataset, batch_size, not_distributed=False,
79 | shuffle=True, drop_last=True, seed=0, use_multi_epoch_loader=False,
80 | preemptable=False):
81 | r"""Return data loader .
82 |
83 | Args:
84 | cfg (obj): Global configuration file.
85 | dataset (obj): PyTorch dataset object.
86 | batch_size (int): Batch size.
87 | not_distributed (bool): Do not use distributed samplers.
88 | shuffle (bool): Whether to shuffle the data
89 | drop_last (bool): Whether to drop the last batch is the number of samples is smaller than the batch size
90 | seed (int): random seed.
91 | preemptable (bool): Whether to handle preemptions.
92 | Return:
93 | (obj): Data loader.
94 | """
95 | not_distributed = not_distributed or not dist.is_initialized()
96 | if not_distributed:
97 | sampler = None
98 | else:
99 | if preemptable:
100 | sampler = DistributedSamplerPreemptable(dataset, shuffle=shuffle, seed=seed)
101 | else:
102 | sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=shuffle, seed=seed)
103 | num_workers = getattr(cfg.data, 'num_workers', 8)
104 | persistent_workers = getattr(cfg.data, 'persistent_workers', False)
105 | data_loader = (MultiEpochsDataLoader if use_multi_epoch_loader else torch.utils.data.DataLoader)(
106 | dataset,
107 | batch_size=batch_size,
108 | shuffle=shuffle and (sampler is None),
109 | sampler=sampler,
110 | pin_memory=True,
111 | num_workers=num_workers,
112 | drop_last=drop_last,
113 | persistent_workers=persistent_workers if num_workers > 0 else False
114 | )
115 | return data_loader
116 |
117 |
118 | def get_train_dataloader(
119 | cfg, shuffle=True, drop_last=True, subset_indices=None, seed=0, preemptable=False):
120 | r"""Return dataset objects for the training and validation sets.
121 | Args:
122 | cfg (obj): Global configuration file.
123 | shuffle (bool): Whether to shuffle the data
124 | drop_last (bool): Whether to drop the last batch is the number of samples is smaller than the batch size
125 | subset_indices (sequence): Indices of the subset to use.
126 | seed (int): random seed.
127 | preemptable (bool): Flag for preemption handling
128 | Returns:
129 | train_data_loader (obj): Train data loader.
130 | """
131 | train_dataset = _get_train_dataset_objects(cfg, subset_indices=subset_indices)
132 | train_data_loader = _get_data_loader(
133 | cfg, train_dataset, cfg.data.train.batch_size, not_distributed=False,
134 | shuffle=shuffle, drop_last=drop_last, seed=seed,
135 | use_multi_epoch_loader=cfg.data.use_multi_epoch_loader,
136 | preemptable=preemptable
137 | )
138 | return train_data_loader
139 |
140 |
141 | def get_val_dataloader(cfg, subset_indices=None, seed=0):
142 | r"""Return dataset objects for the training and validation sets.
143 | Args:
144 | cfg (obj): Global configuration file.
145 | subset_indices (sequence): Indices of the subset to use.
146 | seed (int): random seed.
147 | Returns:
148 | val_data_loader (obj): Val data loader.
149 | """
150 | val_dataset = _get_val_dataset_objects(cfg, subset_indices=subset_indices)
151 | not_distributed = getattr(cfg.data, 'val_data_loader_not_distributed', False)
152 | # We often use a folder of images to represent a video. As doing evaluation, we like the images to preserve the
153 | # original order. As a result, we do not want to distribute images from the same video to different GPUs.
154 | not_distributed = 'video' in cfg.data.type or not_distributed
155 | drop_last = getattr(cfg.data.val, 'drop_last', False)
156 | # Validation loader need not have preemption handling.
157 | val_data_loader = _get_data_loader(
158 | cfg, val_dataset, cfg.data.val.batch_size, not_distributed=not_distributed,
159 | shuffle=False, drop_last=drop_last, seed=seed,
160 | preemptable=False
161 | )
162 | return val_data_loader
163 |
164 |
165 | def get_test_dataloader(cfg, subset_indices=None):
166 | r"""Return dataset objects for testing
167 |
168 | Args:
169 | cfg (obj): Global configuration file.
170 | subset_indices (sequence): Indices of the subset to use.
171 | Returns:
172 | (obj): Test data loader. It may not contain the ground truth.
173 | """
174 | test_dataset = _get_test_dataset_object(cfg, subset_indices=subset_indices)
175 | not_distributed = getattr(
176 | cfg.test_data, 'val_data_loader_not_distributed', False)
177 | not_distributed = 'video' in cfg.test_data.type or not_distributed
178 | test_data_loader = _get_data_loader(
179 | cfg, test_dataset, cfg.test_data.test.batch_size, not_distributed=not_distributed,
180 | shuffle=False)
181 | return test_data_loader
182 |
--------------------------------------------------------------------------------
/imaginaire/datasets/utils/sampler.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import math
14 | import torch.distributed as dist
15 | import torch
16 |
17 | from torch.utils.data import Sampler
18 | from typing import TypeVar
19 |
20 | T_co = TypeVar('T_co', covariant=True)
21 |
22 |
23 | class DistributedSamplerPreemptable(Sampler[T_co]):
24 | r"""Sampler that supports loading from an iteration.
25 | This is very useful for preemptable jobs.
26 |
27 | Args:
28 | dataset (torch.utils.data.Dataset): Dataset object
29 | num_replicas (int): Number of replicas to the distribute the dataloader over.
30 | This is typically the world size in DDP jobs.
31 | rank (int): Rank of the current process.
32 | shuffle (bool): Whether to shuffle the dataloader in each epoch.
33 | seed (int): Random seed used for shuffling the dataloader.
34 | drop_last (bool): Whether to drop the last batch.
35 | """
36 |
37 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True,
38 | seed=0, drop_last=False):
39 |
40 | if num_replicas is None:
41 | if not dist.is_available():
42 | raise RuntimeError("Requires distributed package to be available")
43 | num_replicas = dist.get_world_size()
44 | if rank is None:
45 | if not dist.is_available():
46 | raise RuntimeError("Requires distributed package to be available")
47 | rank = dist.get_rank()
48 | if rank >= num_replicas or rank < 0:
49 | raise ValueError(
50 | "Invalid rank {}, rank should be in the interval"
51 | " [0, {}]".format(rank, num_replicas - 1))
52 | self.dataset = dataset
53 | self.num_replicas = num_replicas
54 | self.rank = rank
55 | self.epoch = 0
56 |
57 | # start_index is the index to begin the dataloader from.
58 | self.start_index = 0
59 |
60 | self.drop_last = drop_last
61 | # If the dataset length is evenly divisible by # of replicas, then there
62 | # is no need to drop any data, since the dataset will be split equally.
63 | if self.drop_last and len(self.dataset) % self.num_replicas != 0: # type: ignore[arg-type]
64 | # Split to nearest available length that is evenly divisible.
65 | # This is to ensure each rank receives the same amount of data when
66 | # using this Sampler.
67 | self.num_samples = math.ceil(
68 | (len(self.dataset) - self.num_replicas) / self.num_replicas # type: ignore[arg-type]
69 | )
70 | else:
71 | self.num_samples = math.ceil(len(self.dataset) / self.num_replicas) # type: ignore[arg-type]
72 | self.total_size = self.num_samples * self.num_replicas
73 | self.shuffle = shuffle
74 | self.seed = seed
75 |
76 | def __iter__(self):
77 | if self.shuffle:
78 | # deterministically shuffle based on epoch and seed
79 | g = torch.Model()
80 | g.manual_seed(self.seed + self.epoch)
81 | indices = torch.randperm(len(self.dataset), generator=g).tolist() # type: ignore[arg-type]
82 | else:
83 | indices = list(range(len(self.dataset))) # type: ignore[arg-type]
84 |
85 | if not self.drop_last:
86 | # add extra samples to make it evenly divisible
87 | padding_size = self.total_size - len(indices)
88 | if padding_size <= len(indices):
89 | indices += indices[:padding_size]
90 | else:
91 | indices += (indices * math.ceil(padding_size / len(indices)))[:padding_size]
92 | else:
93 | # remove tail of data to make it evenly divisible.
94 | indices = indices[:self.total_size]
95 | assert len(indices) == self.total_size
96 |
97 | # subsample
98 | indices = indices[self.rank:self.total_size:self.num_replicas]
99 | assert len(indices) == self.num_samples
100 |
101 | # assert self.start_index < len(indices)
102 | if self.start_index >= len(indices):
103 | print('(Warning): Start index is less than len of dataloader. Goint to the last batch of dataset instead')
104 | # This is hardcoded to go one batch before.
105 | self.start_index = len(indices) - 64
106 | indices = indices[self.start_index:]
107 |
108 | return iter(indices)
109 |
110 | def __len__(self):
111 | return self.num_samples
112 |
113 | def set_epoch(self, epoch):
114 | self.epoch = epoch
115 |
116 | def set_iteration(self, start_index):
117 | self.start_index = start_index
118 |
--------------------------------------------------------------------------------
/imaginaire/models/base.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 |
15 |
16 | class Model(torch.nn.Module):
17 |
18 | def __init__(self, cfg_model, cfg_data):
19 | super().__init__()
20 |
21 | def get_param_groups(self, cfg_optim):
22 | """Allow the network to use different hyperparameters (e.g., learning rate) for different parameters.
23 | Returns:
24 | PyTorch parameter group (list or generator). See the PyTorch documentation for details.
25 | """
26 | return self.parameters()
27 |
28 | def device(self):
29 | """Return device on which model resides."""
30 | return next(self.parameters()).device
31 |
--------------------------------------------------------------------------------
/imaginaire/models/utils/init_weight.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 | from torch.nn import init
15 |
16 |
17 | def weights_init(init_type, gain, bias=None):
18 | r"""Initialize weights in the network.
19 |
20 | Args:
21 | init_type (str): The name of the initialization scheme.
22 | gain (float): The parameter that is required for the initialization
23 | scheme.
24 | bias (object): If not ``None``, specifies the initialization parameter
25 | for bias.
26 |
27 | Returns:
28 | (obj): init function to be applied.
29 | """
30 |
31 | def init_func(m):
32 | r"""Init function
33 |
34 | Args:
35 | m: module to be weight initialized.
36 | """
37 | class_name = m.__class__.__name__
38 | if hasattr(m, 'weight') and (
39 | class_name.find('Conv') != -1 or
40 | class_name.find('Linear') != -1 or
41 | class_name.find('Embedding') != -1):
42 | lr_mul = getattr(m, 'lr_mul', 1.)
43 | gain_final = gain / lr_mul
44 | if init_type == 'normal':
45 | init.normal_(m.weight.data, 0.0, gain_final)
46 | elif init_type == 'xavier':
47 | init.xavier_normal_(m.weight.data, gain=gain_final)
48 | elif init_type == 'xavier_uniform':
49 | init.xavier_uniform_(m.weight.data, gain=gain_final)
50 | elif init_type == 'kaiming':
51 | init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
52 | with torch.no_grad():
53 | m.weight.data *= gain_final
54 | elif init_type == 'kaiming_linear':
55 | init.kaiming_normal_(
56 | m.weight.data, a=0, mode='fan_in', nonlinearity='linear'
57 | )
58 | with torch.no_grad():
59 | m.weight.data *= gain_final
60 | elif init_type == 'orthogonal':
61 | init.orthogonal_(m.weight.data, gain=gain_final)
62 | elif init_type == 'none':
63 | pass
64 | else:
65 | raise NotImplementedError(
66 | 'initialization method [%s] is '
67 | 'not implemented' % init_type)
68 | if hasattr(m, 'bias') and m.bias is not None:
69 | if init_type == 'none':
70 | pass
71 | elif bias is not None:
72 | bias_type = getattr(bias, 'type', 'normal')
73 | if bias_type == 'normal':
74 | bias_gain = getattr(bias, 'gain', 0.5)
75 | init.normal_(m.bias.data, 0.0, bias_gain)
76 | else:
77 | raise NotImplementedError(
78 | 'initialization method [%s] is '
79 | 'not implemented' % bias_type)
80 | else:
81 | init.constant_(m.bias.data, 0.0)
82 | return init_func
83 |
84 |
85 | def weights_rescale():
86 | def init_func(m):
87 | if hasattr(m, 'init_gain'):
88 | for name, p in m.named_parameters():
89 | if 'output_scale' not in name:
90 | p.data.mul_(m.init_gain)
91 | return init_func
92 |
--------------------------------------------------------------------------------
/imaginaire/models/utils/model_average.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import copy
14 |
15 | import torch
16 | from torch import nn
17 | from imaginaire.utils.misc import requires_grad
18 |
19 |
20 | def reset_batch_norm(m):
21 | r"""Reset batch norm statistics
22 |
23 | Args:
24 | m: Pytorch module
25 | """
26 | if hasattr(m, 'reset_running_stats'):
27 | m.reset_running_stats()
28 |
29 |
30 | def calibrate_batch_norm_momentum(m):
31 | r"""Calibrate batch norm momentum
32 |
33 | Args:
34 | m: Pytorch module
35 | """
36 | if hasattr(m, 'reset_running_stats'):
37 | # if m._get_name() == 'SyncBatchNorm':
38 | if 'BatchNorm' in m._get_name():
39 | m.momentum = 1.0 / float(m.num_batches_tracked + 1)
40 |
41 |
42 | class ModelAverage(nn.Module):
43 | r"""In this model average implementation, the spectral layers are
44 | absorbed in the model parameter by default. If such options are
45 | turned on, be careful with how you do the training. Remember to
46 | re-estimate the batch norm parameters before using the model.
47 |
48 | Args:
49 | module (torch nn module): Torch network.
50 | beta (float): Moving average weights. How much we weight the past.
51 | start_iteration (int): From which iteration, we start the update.
52 | """
53 | def __init__(self, module, beta=0.9999, start_iteration=0):
54 | super(ModelAverage, self).__init__()
55 |
56 | self.module = module
57 | # A shallow copy creates a new object which stores the reference of
58 | # the original elements.
59 | # A deep copy creates a new object and recursively adds the copies of
60 | # nested objects present in the original elements.
61 | self._averaged_model = copy.deepcopy(self.module).to('cuda')
62 | self.stream = torch.cuda.Stream()
63 |
64 | self.beta = beta
65 |
66 | self.start_iteration = start_iteration
67 | # This buffer is to track how many iterations has the model been
68 | # trained for. We will ignore the first $(start_iterations) and start
69 | # the averaging after.
70 | self.register_buffer('num_updates_tracked',
71 | torch.tensor(0, dtype=torch.long))
72 | self.num_updates_tracked = self.num_updates_tracked.to('cuda')
73 | self.averaged_model.eval()
74 |
75 | # Averaged model does not require grad.
76 | requires_grad(self.averaged_model, False)
77 |
78 | @property
79 | def averaged_model(self):
80 | self.stream.synchronize()
81 | return self._averaged_model
82 |
83 | def forward(self, *inputs, **kwargs):
84 | r"""PyTorch module forward function overload."""
85 | return self.module(*inputs, **kwargs)
86 |
87 | @torch.no_grad()
88 | def update_average(self):
89 | r"""Update the moving average."""
90 | self.stream.wait_stream(torch.cuda.current_stream())
91 | with torch.cuda.stream(self.stream):
92 | self.num_updates_tracked += 1
93 | if self.num_updates_tracked <= self.start_iteration:
94 | beta = 0.
95 | else:
96 | beta = self.beta
97 | source_dict = self.module.state_dict()
98 | target_dict = self._averaged_model.state_dict()
99 | source_list = []
100 | target_list = []
101 | for key in target_dict:
102 | if 'num_batches_tracked' in key:
103 | continue
104 | source_list.append(source_dict[key].data)
105 | target_list.append(target_dict[key].data.float())
106 |
107 | torch._foreach_mul_(target_list, beta)
108 | torch._foreach_add_(target_list, source_list, alpha=1 - beta)
109 |
110 | def __repr__(self):
111 | r"""Returns a string that holds a printable representation of an
112 | object"""
113 | return self.module.__repr__()
114 |
--------------------------------------------------------------------------------
/imaginaire/trainers/utils/get_trainer.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import importlib
14 | import torch
15 | import torch.distributed as dist
16 | import torch.nn as nn
17 | from torch.optim import lr_scheduler
18 | from imaginaire.models.utils.model_average import ModelAverage
19 |
20 |
21 | def get_trainer(cfg, is_inference=True, seed=0):
22 | """Return the trainer object.
23 |
24 | Args:
25 | cfg (Config): Loaded config object.
26 | is_inference (bool): Inference mode.
27 |
28 | Returns:
29 | (obj): Trainer object.
30 | """
31 | trainer_lib = importlib.import_module(cfg.trainer.type)
32 | trainer = trainer_lib.Trainer(cfg, is_inference=is_inference, seed=seed)
33 | return trainer
34 |
35 |
36 | def wrap_model(cfg, model):
37 | r"""Wrap the networks with AMP DDP and (optionally) model average.
38 |
39 | Args:
40 | cfg (obj): Global configuration.
41 | model (obj): Model object.
42 |
43 | Returns:
44 | (dict):
45 | - model (obj): Model object.
46 | """
47 | # Apply model average wrapper.
48 | if cfg.trainer.ema_config.enabled:
49 | model = ModelAverage(model,
50 | cfg.trainer.ema_config.beta,
51 | cfg.trainer.ema_config.start_iteration,
52 | )
53 | model = _wrap_model(cfg, model)
54 | return model
55 |
56 |
57 | class WrappedModel(nn.Module):
58 | r"""Dummy wrapping the module.
59 | """
60 |
61 | def __init__(self, module):
62 | super(WrappedModel, self).__init__()
63 | self.module = module
64 |
65 | def forward(self, *args, **kwargs):
66 | r"""PyTorch module forward function overload."""
67 | return self.module(*args, **kwargs)
68 |
69 |
70 | def _wrap_model(cfg, model):
71 | r"""Wrap a model for distributed data parallel training.
72 |
73 | Args:
74 | model (obj): PyTorch network model.
75 |
76 | Returns:
77 | (obj): Wrapped PyTorch network model.
78 | """
79 | # Apply DDP wrapper.
80 | if dist.is_available() and dist.is_initialized():
81 | model = torch.nn.parallel.DistributedDataParallel(
82 | model,
83 | device_ids=[cfg.local_rank],
84 | output_device=cfg.local_rank,
85 | find_unused_parameters=cfg.trainer.ddp_config.find_unused_parameters,
86 | static_graph=cfg.trainer.ddp_config.static_graph,
87 | broadcast_buffers=False,
88 | )
89 | else:
90 | model = WrappedModel(model)
91 | return model
92 |
93 |
94 | def _calculate_model_size(model):
95 | r"""Calculate number of parameters in a PyTorch network.
96 |
97 | Args:
98 | model (obj): PyTorch network.
99 |
100 | Returns:
101 | (int): Number of parameters.
102 | """
103 | return sum(p.numel() for p in model.parameters() if p.requires_grad)
104 |
105 |
106 | def get_optimizer(cfg_optim, model):
107 | r"""Return the optimizer object.
108 |
109 | Args:
110 | cfg_optim (obj): Config for the specific optimization module (gen/dis).
111 | model (obj): PyTorch network object.
112 |
113 | Returns:
114 | (obj): Pytorch optimizer
115 | """
116 | if hasattr(model, 'get_param_groups'):
117 | # Allow the network to use different hyperparameters (e.g., learning rate) for different parameters.
118 | params = model.get_param_groups(cfg_optim)
119 | else:
120 | params = model.parameters()
121 |
122 | try:
123 | # Try the PyTorch optimizer class first.
124 | optimizer_class = getattr(torch.optim, cfg_optim.type)
125 | except AttributeError:
126 | raise NotImplementedError(f"Optimizer {cfg_optim.type} is not yet implemented.")
127 | optimizer_kwargs = cfg_optim.params
128 |
129 | # We will try to use fuse optimizers by default.
130 | try:
131 | from apex.optimizers import FusedAdam, FusedSGD
132 | fused_opt = cfg_optim.fused_opt
133 | except (ImportError, ModuleNotFoundError):
134 | fused_opt = False
135 |
136 | if fused_opt:
137 | if cfg_optim.type == 'Adam':
138 | optimizer_class = FusedAdam
139 | optimizer_kwargs['adam_w_mode'] = False
140 | elif cfg_optim.type == 'AdamW':
141 | optimizer_class = FusedAdam
142 | optimizer_kwargs['adam_w_mode'] = True
143 | elif cfg_optim.type == 'SGD':
144 | optimizer_class = FusedSGD
145 | if cfg_optim.type in ["RAdam", "RMSprop"]:
146 | optimizer_kwargs["foreach"] = fused_opt
147 |
148 | optim = optimizer_class(params, **optimizer_kwargs)
149 |
150 | return optim
151 |
152 |
153 | def get_scheduler(cfg_optim, optim):
154 | """Return the scheduler object.
155 |
156 | Args:
157 | cfg_optim (obj): Config for the specific optimization module (gen/dis).
158 | optim (obj): PyTorch optimizer object.
159 |
160 | Returns:
161 | (obj): Scheduler
162 | """
163 | if cfg_optim.sched.type == 'step':
164 | scheduler = lr_scheduler.StepLR(optim,
165 | step_size=cfg_optim.sched.step_size,
166 | gamma=cfg_optim.sched.gamma)
167 | elif cfg_optim.sched.type == 'constant':
168 | scheduler = lr_scheduler.LambdaLR(optim, lambda x: 1)
169 | elif cfg_optim.sched.type == 'linear_warmup':
170 | scheduler = lr_scheduler.LambdaLR(
171 | optim, lambda x: x * 1.0 / cfg_optim.sched.warmup if x < cfg_optim.sched.warmup else 1.0)
172 | elif cfg_optim.sched.type == 'cosine_warmup':
173 |
174 | warmup_scheduler = lr_scheduler.LinearLR(
175 | optim,
176 | start_factor=1.0 / cfg_optim.sched.warmup,
177 | end_factor=1.0,
178 | total_iters=cfg_optim.sched.warmup
179 | )
180 | T_max_val = cfg_optim.sched.decay_steps - cfg_optim.sched.warmup
181 | cosine_lr_scheduler = lr_scheduler.CosineAnnealingLR(
182 | optim,
183 | T_max=T_max_val,
184 | eta_min=getattr(cfg_optim.sched, 'eta_min', 0),
185 | )
186 | scheduler = lr_scheduler.SequentialLR(
187 | optim,
188 | schedulers=[warmup_scheduler, cosine_lr_scheduler],
189 | milestones=[cfg_optim.sched.warmup]
190 | )
191 |
192 | elif cfg_optim.sched.type == 'linear':
193 | # Start linear decay from here.
194 | decay_start = cfg_optim.sched.decay_start
195 | # End linear decay here.
196 | # Continue to train using the lowest learning rate till the end.
197 | decay_end = cfg_optim.sched.decay_end
198 | # Lowest learning rate multiplier.
199 | decay_target = cfg_optim.sched.decay_target
200 |
201 | def sch(x):
202 | decay = ((x - decay_start) * decay_target + decay_end - x) / (decay_end - decay_start)
203 | return min(max(decay, decay_target), 1.)
204 |
205 | scheduler = lr_scheduler.LambdaLR(optim, lambda x: sch(x))
206 | elif cfg_optim.sched.type == 'step_with_warmup':
207 | # The step_size and gamma follows the signature of lr_scheduler.StepLR.
208 | step_size = cfg_optim.sched.step_size,
209 | gamma = cfg_optim.sched.gamma
210 | # An additional parameter defines the warmup iteration.
211 | warmup_step_size = cfg_optim.sched.warmup_step_size
212 |
213 | def sch(x):
214 | lr_after_warmup = gamma ** (warmup_step_size // step_size)
215 | if x < warmup_step_size:
216 | return x / warmup_step_size * lr_after_warmup
217 | else:
218 | return gamma ** (x // step_size)
219 |
220 | scheduler = lr_scheduler.LambdaLR(optim, lambda x: sch(x))
221 | else:
222 | return NotImplementedError('Learning rate policy {} not implemented.'.format(cfg_optim.sched.type))
223 | return scheduler
224 |
--------------------------------------------------------------------------------
/imaginaire/trainers/utils/logging.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import datetime
14 | import os
15 |
16 | import torch.distributed as dist
17 |
18 | from imaginaire.utils.distributed import is_master, broadcast_object_list
19 | from imaginaire.utils.distributed import master_only_print as print
20 |
21 |
22 | def get_date_uid():
23 | """Generate a unique id based on date.
24 | Returns:
25 | str: Return uid string, e.g. '20171122171307111552'.
26 | """
27 | return str(datetime.datetime.now().strftime("%Y_%m%d_%H%M_%S"))
28 |
29 |
30 | def init_logging(config_path, logdir, makedir=True):
31 | r"""Create log directory for storing checkpoints and output images.
32 |
33 | Args:
34 | config_path (str): Path to the configuration file.
35 | logdir (str or None): Log directory name
36 | makedir (bool): Make a new dir or not
37 | Returns:
38 | str: Return log dir
39 | """
40 | def _create_logdir(_config_path, _logdir, _root_dir):
41 | config_file = os.path.basename(_config_path)
42 | date_uid = get_date_uid()
43 | # example: logs/2019_0125_1047_58_spade_cocostuff
44 | _log_file = '_'.join([date_uid, os.path.splitext(config_file)[0]])
45 | if _logdir is None:
46 | _logdir = os.path.join(_root_dir, _log_file)
47 | if makedir:
48 | print('Make folder {}'.format(_logdir))
49 | os.makedirs(_logdir, exist_ok=True)
50 | return _logdir
51 |
52 | root_dir = 'logs'
53 | if dist.is_available():
54 | if dist.is_initialized():
55 | message = [None]
56 | if is_master():
57 | logdir = _create_logdir(config_path, logdir, root_dir)
58 | message = [logdir]
59 |
60 | # Send logdir from master to all workers.
61 | message = broadcast_object_list(message=message, src=0)
62 | logdir = message[0]
63 | else:
64 | logdir = _create_logdir(config_path, logdir, root_dir)
65 | else:
66 | logdir = _create_logdir(config_path, logdir, root_dir)
67 |
68 | return logdir
69 |
--------------------------------------------------------------------------------
/imaginaire/trainers/utils/meters.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import math
14 | import torch
15 | import wandb
16 | from torch.utils.tensorboard import SummaryWriter
17 |
18 | from imaginaire.utils.distributed import master_only, dist_all_reduce_tensor, \
19 | is_master, get_rank
20 |
21 | from imaginaire.utils.distributed import master_only_print as print
22 |
23 | LOG_WRITER = None
24 | LOG_DIR = None
25 |
26 |
27 | @torch.no_grad()
28 | def sn_reshape_weight_to_matrix(weight):
29 | r"""Reshape weight to obtain the matrix form.
30 |
31 | Args:
32 | weight (Parameters): pytorch layer parameter tensor.
33 | """
34 | weight_mat = weight
35 | height = weight_mat.size(0)
36 | return weight_mat.reshape(height, -1)
37 |
38 |
39 | @torch.no_grad()
40 | def get_weight_stats(mod):
41 | r"""Get weight state
42 |
43 | Args:
44 | mod: Pytorch module
45 | """
46 | if mod.weight_orig.grad is not None:
47 | grad_norm = mod.weight_orig.grad.data.norm().item()
48 | else:
49 | grad_norm = 0.
50 | weight_norm = mod.weight_orig.data.norm().item()
51 | weight_mat = sn_reshape_weight_to_matrix(mod.weight_orig)
52 | sigma = torch.sum(mod.weight_u * torch.mv(weight_mat, mod.weight_v))
53 | return grad_norm, weight_norm, sigma
54 |
55 |
56 | @master_only
57 | def set_summary_writer(log_dir):
58 | r"""Set summary writer
59 |
60 | Args:
61 | log_dir (str): Log directory.
62 | """
63 | global LOG_DIR, LOG_WRITER
64 | LOG_DIR = log_dir
65 | LOG_WRITER = SummaryWriter(log_dir=log_dir)
66 |
67 |
68 | def write_summary(name, summary, step, hist=False):
69 | """Utility function for write summary to log_writer.
70 | """
71 | global LOG_WRITER
72 | lw = LOG_WRITER
73 | if lw is None:
74 | raise Exception("Log writer not set.")
75 | if hist:
76 | lw.add_histogram(name, summary, step)
77 | else:
78 | lw.add_scalar(name, summary, step)
79 |
80 |
81 | class Meter(object):
82 | """Meter is to keep track of statistics along steps.
83 | Meters write values for purpose like printing average values.
84 | Meters can be flushed to log files (i.e. TensorBoard for now)
85 | regularly.
86 |
87 | Args:
88 | name (str): the name of meter
89 | reduce (bool): If ``True``, perform a distributed reduce for the log
90 | values across all GPUs.
91 | """
92 |
93 | def __init__(self, name, reduce=True):
94 | self.name = name
95 | self.reduce = reduce
96 | self.values = []
97 |
98 | def reset(self):
99 | r"""Reset the meter values"""
100 | if not self.reduce and get_rank() != 0:
101 | return
102 | self.values = []
103 |
104 | def write(self, value):
105 | r"""Record the value"""
106 | if not self.reduce and get_rank() != 0:
107 | return
108 | if value is not None:
109 | self.values.append(value)
110 |
111 | def flush(self, step):
112 | r"""Write the value in the tensorboard.
113 |
114 | Args:
115 | step (int): Epoch or iteration number.
116 | """
117 | if not self.reduce and get_rank() != 0:
118 | return
119 | values = torch.tensor(self.values, device="cuda")
120 | if self.reduce:
121 | values = dist_all_reduce_tensor(values)
122 |
123 | if not all(math.isfinite(x) for x in values):
124 | print("meter {} contained a nan or inf.".format(self.name))
125 | filtered_values = list(filter(lambda x: math.isfinite(x), self.values))
126 | if float(len(filtered_values)) != 0:
127 | value = float(sum(filtered_values)) / float(len(filtered_values))
128 | if is_master():
129 | write_summary(self.name, value, step)
130 | wandb.log({self.name: value}, step=step)
131 | self.reset()
132 |
133 | @master_only
134 | def write_image(self, img_grid, step):
135 | r"""Write the value in the tensorboard.
136 |
137 | Args:
138 | img_grid:
139 | step (int): Epoch or iteration number.
140 | """
141 | if not self.reduce and get_rank() != 0:
142 | return
143 | global LOG_WRITER
144 | lw = LOG_WRITER
145 | if lw is None:
146 | raise Exception("Log writer not set.")
147 | lw.add_image("Visualizations", img_grid, step)
148 |
--------------------------------------------------------------------------------
/imaginaire/utils/cudnn.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch.backends.cudnn as cudnn
14 |
15 | from imaginaire.utils.distributed import master_only_print as print
16 |
17 |
18 | def init_cudnn(deterministic, benchmark):
19 | r"""Initialize the cudnn module. The two things to consider is whether to
20 | use cudnn benchmark and whether to use cudnn deterministic. If cudnn
21 | benchmark is set, then the cudnn deterministic is automatically false.
22 |
23 | Args:
24 | deterministic (bool): Whether to use cudnn deterministic.
25 | benchmark (bool): Whether to use cudnn benchmark.
26 | """
27 | cudnn.deterministic = deterministic
28 | cudnn.benchmark = benchmark
29 | print('cudnn benchmark: {}'.format(benchmark))
30 | print('cudnn deterministic: {}'.format(deterministic))
31 |
--------------------------------------------------------------------------------
/imaginaire/utils/distributed.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import functools
14 | import ctypes
15 |
16 | import torch
17 | import torch.distributed as dist
18 | from contextlib import contextmanager
19 |
20 |
21 | def init_dist(local_rank, backend='nccl', **kwargs):
22 | r"""Initialize distributed training"""
23 | if dist.is_available():
24 | if dist.is_initialized():
25 | return torch.cuda.current_device()
26 | torch.cuda.set_device(local_rank)
27 | dist.init_process_group(backend=backend, init_method='env://', **kwargs)
28 |
29 | # Increase the L2 fetch granularity for faster speed.
30 | _libcudart = ctypes.CDLL('libcudart.so')
31 | # Set device limit on the current device
32 | # cudaLimitMaxL2FetchGranularity = 0x05
33 | pValue = ctypes.cast((ctypes.c_int * 1)(), ctypes.POINTER(ctypes.c_int))
34 | _libcudart.cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128))
35 | _libcudart.cudaDeviceGetLimit(pValue, ctypes.c_int(0x05))
36 | # assert pValue.contents.value == 128
37 |
38 |
39 | def get_rank():
40 | r"""Get rank of the thread."""
41 | rank = 0
42 | if dist.is_available():
43 | if dist.is_initialized():
44 | rank = dist.get_rank()
45 | return rank
46 |
47 |
48 | def get_world_size():
49 | r"""Get world size. How many GPUs are available in this job."""
50 | world_size = 1
51 | if dist.is_available():
52 | if dist.is_initialized():
53 | world_size = dist.get_world_size()
54 | return world_size
55 |
56 |
57 | def broadcast_object_list(message, src=0):
58 | r"""Broadcast object list from the master to the others"""
59 | # Send logdir from master to all workers.
60 | if dist.is_available():
61 | if dist.is_initialized():
62 | torch.distributed.broadcast_object_list(message, src=src)
63 | return message
64 |
65 |
66 | def master_only(func):
67 | r"""Apply this function only to the master GPU."""
68 | @functools.wraps(func)
69 | def wrapper(*args, **kwargs):
70 | r"""Simple function wrapper for the master function"""
71 | if get_rank() == 0:
72 | return func(*args, **kwargs)
73 | else:
74 | return None
75 | return wrapper
76 |
77 |
78 | def is_master():
79 | r"""check if current process is the master"""
80 | return get_rank() == 0
81 |
82 |
83 | def is_dist():
84 | return dist.is_initialized()
85 |
86 |
87 | def barrier():
88 | if is_dist():
89 | dist.barrier()
90 |
91 |
92 | @contextmanager
93 | def master_first():
94 | if not is_master():
95 | barrier()
96 | yield
97 | if dist.is_initialized() and is_master():
98 | barrier()
99 |
100 |
101 | def is_local_master():
102 | return torch.cuda.current_device() == 0
103 |
104 |
105 | @master_only
106 | def master_only_print(*args):
107 | r"""master-only print"""
108 | print(*args)
109 |
110 |
111 | def dist_reduce_tensor(tensor, rank=0, reduce='mean'):
112 | r""" Reduce to rank 0 """
113 | world_size = get_world_size()
114 | if world_size < 2:
115 | return tensor
116 | with torch.no_grad():
117 | dist.reduce(tensor, dst=rank)
118 | if get_rank() == rank:
119 | if reduce == 'mean':
120 | tensor /= world_size
121 | elif reduce == 'sum':
122 | pass
123 | else:
124 | raise NotImplementedError
125 | return tensor
126 |
127 |
128 | def dist_all_reduce_tensor(tensor, reduce='mean'):
129 | r""" Reduce to all ranks """
130 | world_size = get_world_size()
131 | if world_size < 2:
132 | return tensor
133 | with torch.no_grad():
134 | dist.all_reduce(tensor)
135 | if reduce == 'mean':
136 | tensor /= world_size
137 | elif reduce == 'sum':
138 | pass
139 | else:
140 | raise NotImplementedError
141 | return tensor
142 |
143 |
144 | def dist_all_gather_tensor(tensor):
145 | r""" gather to all ranks """
146 | world_size = get_world_size()
147 | if world_size < 2:
148 | return [tensor]
149 | tensor_list = [
150 | torch.ones_like(tensor) for _ in range(dist.get_world_size())]
151 | with torch.no_grad():
152 | dist.all_gather(tensor_list, tensor)
153 | return tensor_list
154 |
--------------------------------------------------------------------------------
/imaginaire/utils/gpu_affinity.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import math
14 | import os
15 | # pynvml is a python bindings to the NVIDIA Management Library
16 | # https://developer.nvidia.com/nvidia-management-library-nvml
17 | # An API for monitoring and managing various states of the NVIDIA GPU devices.
18 | # It provides direct access to the queries and commands exposed via nvidia-smi.
19 |
20 | import pynvml
21 |
22 | pynvml.nvmlInit()
23 |
24 |
25 | def system_get_driver_version():
26 | r"""Get Driver Version"""
27 | return pynvml.nvmlSystemGetDriverVersion()
28 |
29 |
30 | def device_get_count():
31 | r"""Get number of devices"""
32 | return pynvml.nvmlDeviceGetCount()
33 |
34 |
35 | class Device(object):
36 | r"""Device used for nvml."""
37 | _nvml_affinity_elements = math.ceil(os.cpu_count() / 64)
38 |
39 | def __init__(self, device_idx):
40 | super().__init__()
41 | self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_idx)
42 |
43 | def get_name(self):
44 | r"""Get obect name"""
45 | return pynvml.nvmlDeviceGetName(self.handle)
46 |
47 | def get_cpu_affinity(self):
48 | r"""Get CPU affinity"""
49 | affinity_string = ''
50 | for j in pynvml.nvmlDeviceGetCpuAffinity(self.handle, Device._nvml_affinity_elements):
51 | # assume nvml returns list of 64 bit ints
52 | affinity_string = '{:064b}'.format(j) + affinity_string
53 | affinity_list = [int(x) for x in affinity_string]
54 | affinity_list.reverse() # so core 0 is in 0th element of list
55 |
56 | return [i for i, e in enumerate(affinity_list) if e != 0]
57 |
58 |
59 | def set_affinity(gpu_id=None):
60 | r"""Set GPU affinity
61 |
62 | Args:
63 | gpu_id (int): Which gpu device.
64 | """
65 | if gpu_id is None:
66 | gpu_id = int(os.getenv('LOCAL_RANK', 0))
67 |
68 | try:
69 | dev = Device(gpu_id)
70 | # os.sched_setaffinity() method in Python is used to set the CPU affinity mask of a process indicated
71 | # by the specified process id.
72 | # A process’s CPU affinity mask determines the set of CPUs on which it is eligible to run.
73 | # Syntax: os.sched_setaffinity(pid, mask)
74 | # pid=0 means the current process
75 | os.sched_setaffinity(0, dev.get_cpu_affinity())
76 | # list of ints
77 | # representing the logical cores this process is now affinitied with
78 | return os.sched_getaffinity(0)
79 |
80 | except pynvml.NVMLError:
81 | print("(Setting affinity with NVML failed, skipping...)")
82 |
--------------------------------------------------------------------------------
/imaginaire/utils/set_random_seed.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import random
14 | import numpy as np
15 | import torch
16 |
17 | from imaginaire.utils.distributed import get_rank
18 | from imaginaire.utils.distributed import master_only_print as print
19 |
20 |
21 | def set_random_seed(seed, by_rank=False):
22 | r"""Set random seeds for everything, including random, numpy, torch.manual_seed, torch.cuda_manual_seed.
23 | torch.cuda.manual_seed_all is not necessary (included in torch.manual_seed)
24 |
25 | Args:
26 | seed (int): Random seed.
27 | by_rank (bool): if true, each gpu will use a different random seed.
28 | """
29 | if by_rank:
30 | seed += get_rank()
31 | print(f"Using random seed {seed}")
32 | random.seed(seed)
33 | np.random.seed(seed)
34 | torch.manual_seed(seed) # sets seed on the current CPU & all GPUs
35 | torch.cuda.manual_seed(seed) # sets seed on current GPU
36 | # torch.cuda.manual_seed_all(seed) # included in torch.manual_seed
37 |
--------------------------------------------------------------------------------
/imaginaire/utils/termcolor.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import pprint
14 |
15 | import termcolor
16 |
17 |
18 | def red(x): return termcolor.colored(str(x), color="red")
19 | def green(x): return termcolor.colored(str(x), color="green")
20 | def blue(x): return termcolor.colored(str(x), color="blue")
21 | def cyan(x): return termcolor.colored(str(x), color="cyan")
22 | def yellow(x): return termcolor.colored(str(x), color="yellow")
23 | def magenta(x): return termcolor.colored(str(x), color="magenta")
24 | def grey(x): return termcolor.colored(str(x), color="grey")
25 |
26 |
27 | COLORS = {
28 | 'red': red, 'green': green, 'blue': blue, 'cyan': cyan, 'yellow': yellow, 'magenta': magenta, 'grey': grey
29 | }
30 |
31 |
32 | def PP(x):
33 | string = pprint.pformat(x, indent=2)
34 | if isinstance(x, dict):
35 | string = '{\n ' + string[1:-1] + '\n}'
36 | return string
37 |
38 |
39 | def alert(x, color='red'):
40 | color = COLORS[color]
41 | print(color('-' * 32))
42 | print(color(f'* {x}'))
43 | print(color('-' * 32))
44 |
--------------------------------------------------------------------------------
/imaginaire/utils/visualization.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import wandb
14 | import torch
15 | import torchvision
16 |
17 | from matplotlib import pyplot as plt
18 | from torchvision.transforms import functional as torchvision_F
19 |
20 |
21 | def wandb_image(images, from_range=(0, 1)):
22 | images = preprocess_image(images, from_range=from_range)
23 | image_grid = torchvision.utils.make_grid(images, nrow=1, pad_value=1)
24 | image_grid = torchvision_F.to_pil_image(image_grid)
25 | wandb_image = wandb.Image(image_grid)
26 | return wandb_image
27 |
28 |
29 | def preprocess_image(images, from_range=(0, 1), cmap="gray"):
30 | min, max = from_range
31 | images = (images - min) / (max - min)
32 | images = images.detach().cpu().float().clamp_(min=0, max=1)
33 | if images.shape[1] == 1:
34 | images = get_heatmap(images[:, 0], cmap=cmap)
35 | return images
36 |
37 |
38 | def get_heatmap(gray, cmap): # [N,H,W]
39 | color = plt.get_cmap(cmap)(gray.numpy())
40 | color = torch.from_numpy(color[..., :3]).permute(0, 3, 1, 2).float() # [N,3,H,W]
41 | return color
42 |
--------------------------------------------------------------------------------
/neuralangelo.yaml:
--------------------------------------------------------------------------------
1 | # conda env create --file neuralangelo.yaml
2 | name: neuralangelo
3 | channels:
4 | - conda-forge
5 | - pytorch
6 | dependencies:
7 | # general
8 | - gpustat
9 | - gdown
10 | - cudatoolkit-dev
11 | - cmake
12 | # python general
13 | - python=3.8
14 | - pip
15 | - numpy
16 | - scipy
17 | - ipython
18 | - jupyterlab
19 | - cython
20 | - ninja
21 | - diskcache
22 | # pytorch
23 | - pytorch
24 | - torchvision
25 | - pip:
26 | - -r requirements.txt
27 |
--------------------------------------------------------------------------------
/projects/nerf/configs/ingp_blender.yaml:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | _parent_: projects/nerf/configs/nerf_blender.yaml
12 |
13 | max_iter: 500000
14 |
15 | wandb_scalar_iter: 500
16 | wandb_image_iter: 10000
17 | validation_iter: 10000
18 |
19 | model:
20 | type: projects.nerf.models.ingp
21 | mlp:
22 | layers_feat: [null,64,64]
23 | layers_rgb: [null,64,3]
24 | voxel:
25 | levels:
26 | min: 4
27 | max: 12
28 | num: 16
29 | dict_size: 19
30 | dim: 4
31 | range: [-5,5]
32 | init_scale: 1e-4
33 | sample_intvs: 256
34 | fine_sampling: False
35 |
36 | optim:
37 | type: Adam
38 | params:
39 | lr: 0.01
40 | sched:
41 | gamma: 1
42 |
--------------------------------------------------------------------------------
/projects/nerf/configs/nerf_blender.yaml:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | aws_credentials_file: scripts/s3/pbss_dir.secret
12 | logging_iter: 9999999999999 # disable the printing logger
13 |
14 | max_iter: 500000
15 |
16 | wandb_scalar_iter: 100
17 | wandb_image_iter: 1000
18 | validation_iter: 2000
19 |
20 | speed_benchmark: False
21 |
22 | checkpoint:
23 | save_to_s3: False
24 | load_from_s3: False
25 | s3_credentials: scripts/s3/pbss_dir.secret
26 | s3_bucket: nerf
27 | save_iter: 10000
28 |
29 | trainer:
30 | type: projects.nerf.trainers.nerf
31 | ema_config:
32 | enabled: False
33 | load_ema_checkpoint: False
34 | loss_weight:
35 | render: 1.0
36 | render_fine: 1.0
37 | init:
38 | type: xavier
39 | amp_config:
40 | enabled: True
41 |
42 | model:
43 | type: projects.nerf.models.nerf
44 | mlp:
45 | layers_feat: [null,256,256,256,256,256,256,256,256]
46 | layers_rgb: [null,128,3]
47 | skip: [4]
48 | posenc:
49 | L_3D: 10
50 | L_view: 4
51 | density_activ: softplus
52 | view_dep: True
53 | dist:
54 | param: metric
55 | range: [2,6]
56 | sample_intvs: 64
57 | sample_stratified: True
58 | fine_sampling: True
59 | sample_intvs_fine: 128
60 | rand_rays: 1024
61 | density_noise_reg:
62 | opaque_background: False
63 | camera_ndc: False
64 |
65 | optim:
66 | type: Adam
67 | params:
68 | lr: 0.0005
69 | betas: [0.9, 0.999]
70 | sched:
71 | iteration_mode: False
72 | type: step
73 | step_size: 20
74 | gamma: 0.97724
75 |
76 | data:
77 | type: projects.nerf.datasets.nerf_blender
78 | use_multi_epoch_loader: True
79 | num_workers: 4
80 | root: datasets/nerf-synthetic/lego
81 | image_size: [400,400]
82 | preload: True
83 | bgcolor: 1
84 | train:
85 | batch_size: 2
86 | subset:
87 | val:
88 | batch_size: 2
89 | subset: 4
90 | max_viz_samples: 16
91 |
92 | test_data:
93 | type: projects.nerf.datasets.nerf_blender
94 | num_workers: 4
95 | root: datasets/nerf-synthetic/lego
96 | image_size: [400,400]
97 | preload: True
98 | bgcolor: 1
99 | test:
100 | batch_size: 2
101 | subset:
102 |
--------------------------------------------------------------------------------
/projects/nerf/configs/nerf_llff.yaml:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | _parent_: projects/nerf/configs/nerf_blender.yaml
12 |
13 | max_iter: 200000
14 |
15 | wandb_scalar_iter: 50
16 | wandb_image_iter: 500
17 | validation_iter: 1000
18 |
19 | model:
20 | type: projects.nerf.models.nerf
21 | dist:
22 | param: ndc
23 | range: [0,1]
24 | sample_intvs: 64
25 | fine_sampling: True
26 | sample_intvs_fine: 128
27 | rand_rays: 1024
28 | camera_ndc: True
29 |
30 | optim:
31 | type: Adam
32 | params:
33 | lr: 0.0005
34 | betas: [0.9, 0.999]
35 | sched:
36 | iteration_mode: False
37 | type: step
38 | step_size: 100
39 | gamma: 0.97724
40 |
41 | data:
42 | type: projects.nerf.datasets.nerf_llff
43 | use_multi_epoch_loader: True
44 | num_workers: 4
45 | root: datasets/nerf-llff/fern
46 | image_size: [480,640]
47 | preload: True
48 | val_ratio: 0.1
49 | train:
50 | batch_size: 2
51 | subset:
52 | val:
53 | batch_size: 2
54 | subset: 4
55 |
56 | test_data:
57 | type: projects.nerf.datasets.nerf_llff
58 | num_workers: 4
59 | root: datasets/nerf-llff/fern
60 | image_size: [480,640]
61 | preload: True
62 | test:
63 | batch_size: 2
64 | subset:
65 |
--------------------------------------------------------------------------------
/projects/nerf/datasets/base.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 | import tqdm
15 | import threading
16 | import queue
17 |
18 |
19 | class Dataset(torch.utils.data.Dataset):
20 |
21 | def __init__(self, cfg, is_inference=False, is_test=False):
22 | super().__init__()
23 | self.split = "test" if is_test else "val" if is_inference else "train"
24 |
25 | def _preload_worker(self, data_list, load_func, q, lock, idx_tqdm):
26 | # Keep preloading data in parallel.
27 | while True:
28 | idx = q.get()
29 | data_list[idx] = load_func(idx)
30 | with lock:
31 | idx_tqdm.update()
32 | q.task_done()
33 |
34 | def preload_threading(self, load_func, num_workers, data_str="images"):
35 | # Use threading to preload data in parallel.
36 | data_list = [None] * len(self)
37 | q = queue.Queue(maxsize=len(self))
38 | idx_tqdm = tqdm.tqdm(range(len(self)), desc=f"preloading {data_str} ({self.split})", leave=False)
39 | for i in range(len(self)):
40 | q.put(i)
41 | lock = threading.Lock()
42 | for ti in range(num_workers):
43 | t = threading.Thread(target=self._preload_worker,
44 | args=(data_list, load_func, q, lock, idx_tqdm), daemon=True)
45 | t.start()
46 | q.join()
47 | idx_tqdm.close()
48 | assert all(map(lambda x: x is not None, data_list))
49 | return data_list
50 |
51 | def __getitem__(self, idx):
52 | raise NotImplementedError
53 |
54 | def __len__(self):
55 | return len(self.list)
56 |
--------------------------------------------------------------------------------
/projects/nerf/datasets/nerf_blender.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import json
14 | import numpy as np
15 | import torch
16 | import torchvision.transforms.functional as torchvision_F
17 | from PIL import Image, ImageFile
18 |
19 | from projects.nerf.datasets import base
20 | from projects.nerf.utils import camera
21 |
22 | ImageFile.LOAD_TRUNCATED_IMAGES = True
23 |
24 |
25 | class Dataset(base.Dataset):
26 |
27 | def __init__(self, cfg, is_inference=False, is_test=False):
28 | super().__init__(cfg, is_inference=is_inference, is_test=is_test)
29 | cfg_data = cfg.test_data if self.split == "test" else cfg.data
30 | data_info = cfg_data[self.split]
31 | self.root = cfg_data.root
32 | self.preload = cfg_data.preload
33 | self.bgcolor = cfg_data.bgcolor
34 | self.raw_H, self.raw_W = 800, 800
35 | self.H, self.W = cfg_data.image_size
36 | meta_fname = f"{cfg_data.root}/transforms_{self.split}.json"
37 | with open(meta_fname) as file:
38 | self.meta = json.load(file)
39 | self.focal = 0.5 * self.raw_W / np.tan(0.5 * self.meta["camera_angle_x"])
40 | self.list = self.meta["frames"]
41 | # Consider only a subset of data.
42 | if data_info.subset:
43 | self.list = self.list[:data_info.subset]
44 | # Preload dataset if possible.
45 | if cfg_data.preload:
46 | self.images = self.preload_threading(self.get_image, cfg_data.num_workers)
47 | self.cameras = self.preload_threading(self.get_camera, cfg_data.num_workers, data_str="cameras")
48 |
49 | def __getitem__(self, idx):
50 | """Process raw data and return processed data in a dictionary.
51 |
52 | Args:
53 | idx: The index of the sample of the dataset.
54 | Returns: A dictionary containing the data.
55 | idx (scalar): The index of the sample of the dataset.
56 | image (3xHxW tensor): Image with pixel values in [0,1] for supervision.
57 | intr (3x3 tensor): The camera intrinsics of `image`.
58 | pose (3x4 tensor): The camera extrinsics [R,t] of `image`.
59 | """
60 | # Keep track of sample index for convenience.
61 | sample = dict(idx=idx)
62 | # Get the images.
63 | image = self.images[idx] if self.preload else self.get_image(idx)
64 | image = self.preprocess_image(image)
65 | # Get the cameras (intrinsics and pose).
66 | intr, pose = self.cameras[idx] if self.preload else self.get_camera(idx)
67 | intr, pose = self.preprocess_camera(intr, pose)
68 | # Update the data sample.
69 | sample.update(
70 | image=image,
71 | intr=intr,
72 | pose=pose,
73 | )
74 | return sample
75 |
76 | def get_image(self, idx):
77 | fpath = self.list[idx]["file_path"][2:]
78 | image_fname = f"{self.root}/{fpath}.png"
79 | image = Image.open(image_fname)
80 | image.load()
81 | return image
82 |
83 | def preprocess_image(self, image):
84 | # Resize the image.
85 | image = image.resize((self.W, self.H))
86 | image = torchvision_F.to_tensor(image)
87 | # Background masking.
88 | rgb, mask = image[:3], image[3:]
89 | if self.bgcolor is not None:
90 | rgb = rgb * mask + self.bgcolor * (1 - mask)
91 | return rgb
92 |
93 | def get_camera(self, idx):
94 | # Camera intrinsics.
95 | intr = torch.tensor([[self.focal, 0, self.raw_W / 2],
96 | [0, self.focal, self.raw_H / 2],
97 | [0, 0, 1]]).float()
98 | # Camera pose.
99 | pose_raw = torch.tensor(self.list[idx]["transform_matrix"], dtype=torch.float32)
100 | pose = self.parse_raw_camera(pose_raw)
101 | return intr, pose
102 |
103 | def preprocess_camera(self, intr, pose):
104 | # Adjust the intrinsics according to the resized image.
105 | intr = intr.clone()
106 | intr[0] *= self.W / self.raw_W
107 | intr[1] *= self.H / self.raw_H
108 | return intr, pose
109 |
110 | def parse_raw_camera(self, pose_raw):
111 | pose_flip = camera.pose(R=torch.diag(torch.tensor([1, -1, -1])))
112 | pose = camera.pose.compose([pose_flip, pose_raw[:3]])
113 | pose = camera.pose.invert(pose)
114 | return pose
115 |
--------------------------------------------------------------------------------
/projects/nerf/datasets/nerf_llff.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import numpy as np
14 | import torch
15 | import torch.nn.functional as torch_F
16 | import torchvision.transforms.functional as torchvision_F
17 | from PIL import Image, ImageFile
18 |
19 | from projects.nerf.datasets import base
20 | from projects.nerf.utils import camera
21 |
22 | ImageFile.LOAD_TRUNCATED_IMAGES = True
23 |
24 |
25 | class Dataset(base.Dataset):
26 |
27 | def __init__(self, cfg, is_inference=False, is_test=False):
28 | super().__init__(cfg, is_inference=is_inference, is_test=is_test)
29 | cfg_data = cfg.test_data if self.split == "test" else cfg.data
30 | data_info = cfg_data[self.split]
31 | self.root = cfg_data.root
32 | self.preload = cfg_data.preload
33 | self.raw_H, self.raw_W = 3024, 4032
34 | self.H, self.W = cfg_data.image_size
35 | list_fname = f"{cfg_data.root}/images.list"
36 | image_fnames = open(list_fname).read().splitlines()
37 | poses_raw, bounds = self.parse_cameras_and_bounds(cfg_data)
38 | self.list = list(zip(image_fnames, poses_raw, bounds))
39 | # Manually split train/val subsets.
40 | num_val_split = int(len(self) * cfg_data.val_ratio)
41 | self.list = self.list[:-num_val_split] if self.split == "train" else self.list[-num_val_split:]
42 | # Consider only a subset of data.
43 | if data_info.subset:
44 | self.list = self.list[:data_info.subset]
45 | # Preload dataset if possible.
46 | if cfg_data.preload:
47 | self.images = self.preload_threading(self.get_image, cfg_data.num_workers)
48 | self.cameras = self.preload_threading(self.get_camera, cfg_data.num_workers, data_str="cameras")
49 |
50 | def parse_cameras_and_bounds(self, cfg_data):
51 | fname = f"{cfg_data.root}/poses_bounds.npy"
52 | data = torch.tensor(np.load(fname), dtype=torch.float32)
53 | # Parse cameras (intrinsics and poses).
54 | cam_data = data[:, :-2].view([-1, 3, 5]) # [N,3,5]
55 | poses_raw = cam_data[..., :4] # [N,3,4]
56 | poses_raw[..., 0], poses_raw[..., 1] = poses_raw[..., 1], -poses_raw[..., 0]
57 | raw_H, raw_W, self.focal = cam_data[0, :, -1]
58 | assert self.raw_H == raw_H and self.raw_W == raw_W
59 | # Parse depth bounds.
60 | bounds = data[:, -2:] # [N,2]
61 | scale = 1. / (bounds.min() * 0.75) # Not sure how this was determined?
62 | poses_raw[..., 3] *= scale
63 | bounds *= scale
64 | # Roughly center camera poses.
65 | poses_raw = self.center_camera_poses(poses_raw)
66 | return poses_raw, bounds
67 |
68 | def center_camera_poses(self, poses):
69 | # Compute average pose.
70 | center = poses[..., 3].mean(dim=0)
71 | v1 = torch_F.normalize(poses[..., 1].mean(dim=0), dim=0)
72 | v2 = torch_F.normalize(poses[..., 2].mean(dim=0), dim=0)
73 | v0 = v1.cross(v2)
74 | pose_avg = torch.stack([v0, v1, v2, center], dim=-1)[None] # [1,3,4]
75 | # Apply inverse of averaged pose.
76 | poses = camera.pose.compose([poses, camera.pose.invert(pose_avg)])
77 | return poses
78 |
79 | def __getitem__(self, idx):
80 | """Process raw data and return processed data in a dictionary.
81 |
82 | Args:
83 | idx: The index of the sample of the dataset.
84 | Returns: A dictionary containing the data.
85 | idx (scalar): The index of the sample of the dataset.
86 | image (3xHxW tensor): Image with pixel values in [0,1] for supervision.
87 | intr (3x3 tensor): The camera intrinsics of `image`.
88 | pose (3x4 tensor): The camera extrinsics [R,t] of `image`.
89 | """
90 | # Keep track of sample index for convenience.
91 | sample = dict(idx=idx)
92 | # Get the images.
93 | image = self.images[idx] if self.preload else self.get_image(idx)
94 | image = self.preprocess_image(image)
95 | # Get the cameras (intrinsics and pose).
96 | intr, pose = self.cameras[idx] if self.preload else self.get_camera(idx)
97 | intr, pose = self.preprocess_camera(intr, pose)
98 | # Update the data sample.
99 | sample.update(
100 | image=image,
101 | intr=intr,
102 | pose=pose,
103 | )
104 | return sample
105 |
106 | def get_image(self, idx):
107 | image_fname = f"{self.root}/images/{self.list[idx][0]}"
108 | image = Image.open(image_fname)
109 | image.load()
110 | return image
111 |
112 | def preprocess_image(self, image):
113 | # Resize the image and convert to Pytorch.
114 | image = image.resize((self.W, self.H))
115 | image = torchvision_F.to_tensor(image)
116 | return image
117 |
118 | def get_camera(self, idx):
119 | # Camera intrinsics.
120 | intr = torch.tensor([[self.focal, 0, self.raw_W / 2],
121 | [0, self.focal, self.raw_H / 2],
122 | [0, 0, 1]]).float()
123 | # Camera pose.
124 | pose_raw = self.list[idx][1]
125 | pose = self.parse_raw_camera(pose_raw)
126 | return intr, pose
127 |
128 | def preprocess_camera(self, intr, pose):
129 | # Adjust the intrinsics according to the resized image.
130 | intr = intr.clone()
131 | intr[0] *= self.W / self.raw_W
132 | intr[1] *= self.H / self.raw_H
133 | return intr, pose
134 |
135 | def parse_raw_camera(self, pose_raw):
136 | pose_flip = camera.pose(R=torch.diag(torch.tensor([1, -1, -1])))
137 | pose = camera.pose.compose([pose_flip, pose_raw[:3]])
138 | pose = camera.pose.invert(pose)
139 | pose = camera.pose.compose([pose_flip, pose])
140 | return pose
141 |
--------------------------------------------------------------------------------
/projects/nerf/models/ingp.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import numpy as np
14 | import torch
15 | import tinycudann as tcnn
16 |
17 | from projects.nerf.models import nerf
18 |
19 |
20 | class Model(nerf.Model):
21 |
22 | def __init__(self, cfg_model, cfg_data):
23 | super().__init__(cfg_model, cfg_data)
24 | self.fine_sampling = False
25 | self.density_reg = cfg_model.density_noise_reg
26 | # Define models.
27 | self.nerf = InstantNGP(cfg_model)
28 |
29 |
30 | class InstantNGP(nerf.NeRF):
31 |
32 | def __init__(self, cfg_model):
33 | self.voxel = cfg_model.voxel
34 | super().__init__(cfg_model)
35 |
36 | def set_input_dims(self, cfg_model):
37 | # Define the input encoding dimensions.
38 | self.input_3D_dim = 3 + cfg_model.voxel.dim * cfg_model.voxel.levels.num
39 | self.input_view_dim = 3 if cfg_model.view_dep else None
40 |
41 | def build_model(self, cfg_model):
42 | super().build_model(cfg_model)
43 | # Build the tcnn hash grid.
44 | l_min, l_max = self.voxel.levels.min, self.voxel.levels.max
45 | r_min, r_max = 2 ** l_min, 2 ** l_max
46 | num_levels = self.voxel.levels.num
47 | growth_rate = np.exp((np.log(r_max) - np.log(r_min)) / (num_levels - 1))
48 | config = dict(
49 | otype="HashGrid",
50 | n_levels=cfg_model.voxel.levels.num,
51 | n_features_per_level=cfg_model.voxel.dim,
52 | log2_hashmap_size=cfg_model.voxel.dict_size,
53 | base_resolution=2 ** cfg_model.voxel.levels.min,
54 | per_level_scale=growth_rate,
55 | )
56 | self.tiny_cuda_encoding = tcnn.Encoding(3, config)
57 | # Compute resolutions of all levels.
58 | self.resolutions = []
59 | for lv in range(0, num_levels):
60 | size = np.floor(r_min * growth_rate ** lv).astype(int) + 1
61 | self.resolutions.append(size)
62 |
63 | def forward(self, points_3D, ray_unit, density_reg=None):
64 | return super().forward(points_3D, ray_unit, density_reg)
65 |
66 | def _encode_3D(self, points_3D):
67 | # Tri-linear interpolate the corresponding embeddings from the dictionary.
68 | vol_min, vol_max = self.voxel.range
69 | points_3D_normalized = (points_3D - vol_min) / (vol_max - vol_min) # Normalize to [0,1].
70 | tcnn_input = points_3D_normalized.view(-1, 3)
71 | tcnn_output = self.tiny_cuda_encoding(tcnn_input)
72 | points_enc = tcnn_output.view(*points_3D_normalized.shape[:-1], tcnn_output.shape[-1])
73 | points_enc = torch.cat([points_enc, points_3D], dim=-1) # [B,R,N,LD+3]
74 | return points_enc
75 |
76 | def _encode_view(self, ray_unit):
77 | return ray_unit
78 |
--------------------------------------------------------------------------------
/projects/nerf/trainers/base.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 | import wandb
15 | from imaginaire.trainers.base import BaseTrainer
16 | from imaginaire.utils.distributed import is_master, master_only
17 | from tqdm import tqdm
18 |
19 | from projects.nerf.utils.misc import collate_test_data_batches, get_unique_test_data, trim_test_samples
20 |
21 |
22 | class BaseTrainer(BaseTrainer):
23 | """
24 | A customized BaseTrainer.
25 | """
26 |
27 | def __init__(self, cfg, is_inference=True, seed=0):
28 | super().__init__(cfg, is_inference=is_inference, seed=seed)
29 | self.metrics = dict()
30 | # The below configs should be properly overridden.
31 | cfg.setdefault("wandb_scalar_iter", 9999999999999)
32 | cfg.setdefault("wandb_image_iter", 9999999999999)
33 | cfg.setdefault("validation_epoch", 9999999999999)
34 | cfg.setdefault("validation_iter", 9999999999999)
35 |
36 | def init_losses(self, cfg):
37 | super().init_losses(cfg)
38 | self.weights = {key: value for key, value in cfg.trainer.loss_weight.items() if value}
39 |
40 | def _end_of_iteration(self, data, current_epoch, current_iteration):
41 | # Log to wandb.
42 | if current_iteration % self.cfg.wandb_scalar_iter == 0:
43 | # Compute the elapsed time (as in the original base trainer).
44 | self.timer.time_iteration = self.elapsed_iteration_time / self.cfg.wandb_scalar_iter
45 | self.elapsed_iteration_time = 0
46 | # Log scalars.
47 | self.log_wandb_scalars(data, mode="train")
48 | # Exit if the training loss has gone to NaN/inf.
49 | if is_master() and self.losses["total"].isnan():
50 | self.finalize(self.cfg)
51 | raise ValueError("Training loss has gone to NaN!!!")
52 | if is_master() and self.losses["total"].isinf():
53 | self.finalize(self.cfg)
54 | raise ValueError("Training loss has gone to infinity!!!")
55 | if current_iteration % self.cfg.wandb_image_iter == 0:
56 | self.log_wandb_images(data, mode="train")
57 | # Run validation on val set.
58 | if current_iteration % self.cfg.validation_iter == 0:
59 | data_all = self.test(self.eval_data_loader, mode="val")
60 | # Log the results to W&B.
61 | if is_master():
62 | self.log_wandb_scalars(data_all, mode="val")
63 | self.log_wandb_images(data_all, mode="val", max_samples=self.cfg.data.val.max_viz_samples)
64 |
65 | def _end_of_epoch(self, data, current_epoch, current_iteration):
66 | # Run validation on val set.
67 | if current_epoch % self.cfg.validation_epoch == 0:
68 | data_all = self.test(self.eval_data_loader, mode="val")
69 | # Log the results to W&B.
70 | if is_master():
71 | self.log_wandb_scalars(data_all, mode="val")
72 | self.log_wandb_images(data_all, mode="val", max_samples=self.cfg.data.val.max_viz_samples)
73 |
74 | @master_only
75 | def log_wandb_scalars(self, data, mode=None):
76 | scalars = dict()
77 | # Log scalars (basic info & losses).
78 | if mode == "train":
79 | scalars.update({"optim/lr": self.sched.get_last_lr()[0]})
80 | scalars.update({"time/iteration": self.timer.time_iteration})
81 | scalars.update({"time/epoch": self.timer.time_epoch})
82 | scalars.update({f"{mode}/loss/{key}": value for key, value in self.losses.items()})
83 | scalars.update(iteration=self.current_iteration, epoch=self.current_epoch)
84 | wandb.log(scalars, step=self.current_iteration)
85 |
86 | @master_only
87 | def log_wandb_images(self, data, mode=None, max_samples=None):
88 | trim_test_samples(data, max_samples=max_samples)
89 |
90 | def model_forward(self, data):
91 | # Model forward.
92 | output = self.model(data) # data = self.model(data) will not return the same data in the case of DDP.
93 | data.update(output)
94 | # Compute loss.
95 | self.timer._time_before_loss()
96 | self._compute_loss(data, mode="train")
97 | total_loss = self._get_total_loss()
98 | return total_loss
99 |
100 | def _compute_loss(self, data, mode=None):
101 | raise NotImplementedError
102 |
103 | def train(self, cfg, data_loader, single_gpu=False, profile=False, show_pbar=False):
104 | self.current_epoch = self.checkpointer.resume_epoch or self.current_epoch
105 | self.current_iteration = self.checkpointer.resume_iteration or self.current_iteration
106 | if ((self.current_epoch % self.cfg.validation_epoch == 0 or
107 | self.current_iteration % self.cfg.validation_iter == 0)):
108 | # Do an initial validation.
109 | data_all = self.test(self.eval_data_loader, mode="val", show_pbar=show_pbar)
110 | # Log the results to W&B.
111 | if is_master():
112 | self.log_wandb_scalars(data_all, mode="val")
113 | self.log_wandb_images(data_all, mode="val", max_samples=self.cfg.data.val.max_viz_samples)
114 | # Train.
115 | super().train(cfg, data_loader, single_gpu, profile, show_pbar)
116 |
117 | @torch.no_grad()
118 | def test(self, data_loader, output_dir=None, inference_args=None, mode="test", show_pbar=False):
119 | """The evaluation/inference engine.
120 | Args:
121 | data_loader: The data loader.
122 | output_dir: Output directory to dump the test results.
123 | inference_args: (unused)
124 | mode: Evaluation mode {"val", "test"}. Can be other modes, but will only gather the data.
125 | Returns:
126 | data_all: A dictionary of all the data.
127 | """
128 | if self.cfg.trainer.ema_config.enabled:
129 | model = self.model.module.averaged_model
130 | else:
131 | model = self.model.module
132 | model.eval()
133 | if show_pbar:
134 | data_loader = tqdm(data_loader, desc="Evaluating", leave=False)
135 | data_batches = []
136 | for it, data in enumerate(data_loader):
137 | data = self.start_of_iteration(data, current_iteration=self.current_iteration)
138 | output = model.inference(data)
139 | data.update(output)
140 | data_batches.append(data)
141 | # Aggregate the data from all devices and process the results.
142 | data_gather = collate_test_data_batches(data_batches)
143 | # Only the master process should process the results; slaves will just return.
144 | if is_master():
145 | data_all = get_unique_test_data(data_gather, data_gather["idx"])
146 | tqdm.write(f"Evaluating with {len(data_all['idx'])} samples.")
147 | # Validate/test.
148 | if mode == "val":
149 | self._compute_loss(data_all, mode=mode)
150 | _ = self._get_total_loss()
151 | if mode == "test":
152 | # Dump the test results for postprocessing.
153 | self.dump_test_results(data_all, output_dir)
154 | return data_all
155 | else:
156 | return
157 |
158 | def dump_test_results(self, data_all, output_dir):
159 | raise NotImplementedError
160 |
--------------------------------------------------------------------------------
/projects/nerf/trainers/nerf.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 | import torch.nn.functional as torch_F
15 | import wandb
16 | import skvideo.io
17 |
18 | from imaginaire.utils.distributed import master_only
19 | from projects.nerf.trainers.base import BaseTrainer
20 | from imaginaire.utils.visualization import wandb_image, preprocess_image
21 |
22 |
23 | class Trainer(BaseTrainer):
24 |
25 | def __init__(self, cfg, is_inference=True, seed=0):
26 | super().__init__(cfg, is_inference=is_inference, seed=seed)
27 | self.batch_idx, _ = torch.meshgrid(torch.arange(cfg.data.train.batch_size),
28 | torch.arange(cfg.model.rand_rays), indexing="ij") # [B,R]
29 | self.batch_idx = self.batch_idx.cuda()
30 |
31 | def _init_loss(self, cfg):
32 | self.criteria["render"] = self.criteria["render_fine"] = torch.nn.MSELoss()
33 |
34 | def _compute_loss(self, data, mode=None):
35 | if mode == "train":
36 | # Extract the corresponding sampled rays.
37 | batch_size = len(data["image"])
38 | image_vec = data["image"].permute(0, 2, 3, 1).view(batch_size, -1, 3) # [B,HW,3]
39 | image_sampled = image_vec[self.batch_idx, data["ray_idx"]] # [B,R,3]
40 | # Compute loss only on randomly sampled rays.
41 | self.losses["render"] = self.criteria["render"](data["rgb"], image_sampled)
42 | self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb"], image_sampled).log10()
43 | if self.cfg.model.fine_sampling:
44 | self.losses["render_fine"] = self.criteria["render_fine"](data["rgb_fine"], image_sampled)
45 | self.metrics["psnr_fine"] = -10 * torch_F.mse_loss(data["rgb_fine"], image_sampled).log10()
46 | else:
47 | # Compute loss on the entire image.
48 | self.losses["render"] = self.criteria["render"](data["rgb_map"], data["image"])
49 | self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb_map"], data["image"]).log10()
50 | if self.cfg.model.fine_sampling:
51 | self.losses["render_fine"] = self.criteria["render_fine"](data["rgb_map_fine"], data["image"])
52 | self.metrics["psnr_fine"] = -10 * torch_F.mse_loss(data["rgb_map_fine"], data["image"]).log10()
53 |
54 | @master_only
55 | def log_wandb_scalars(self, data, mode=None):
56 | super().log_wandb_scalars(data, mode=mode)
57 | scalars = {f"{mode}/PSNR/nerf": self.metrics["psnr"].detach()}
58 | if "render_fine" in self.losses:
59 | scalars.update({f"{mode}/PSNR/nerf_fine": self.metrics["psnr_fine"].detach()})
60 | wandb.log(scalars, step=self.current_iteration)
61 |
62 | @master_only
63 | def log_wandb_images(self, data, mode=None, max_samples=None):
64 | super().log_wandb_images(data, mode=mode, max_samples=max_samples)
65 | images = {f"{mode}/image_target": wandb_image(data["image"])}
66 | if mode == "val":
67 | images_error = (data["rgb_map"] - data["image"]).abs()
68 | images.update({
69 | f"{mode}/images": wandb_image(data["rgb_map"]),
70 | f"{mode}/images_error": wandb_image(images_error),
71 | f"{mode}/inv_depth": wandb_image(data["inv_depth_map"]),
72 | })
73 | if self.cfg.model.fine_sampling:
74 | images_error_fine = (data["rgb_map_fine"] - data["image"]).abs()
75 | images.update({
76 | f"{mode}/images_fine": wandb_image(data["rgb_map_fine"]),
77 | f"{mode}/images_error_fine": wandb_image(images_error_fine),
78 | f"{mode}/inv_depth_fine": wandb_image(data["inv_depth_map_fine"]),
79 | })
80 | images.update({"iteration": self.current_iteration})
81 | images.update({"epoch": self.current_epoch})
82 | wandb.log(images, step=self.current_iteration)
83 |
84 | def dump_test_results(self, data_all, output_dir):
85 | results = dict(
86 | images_target=preprocess_image(data_all["images_target"]),
87 | image=preprocess_image(data_all["rgb_map"]),
88 | inv_depth=preprocess_image(data_all["inv_depth_map"]),
89 | )
90 | if self.cfg.model.fine_sampling:
91 | results.update(
92 | image_fine=preprocess_image(data_all["rgb_map_fine"]),
93 | inv_depth_fine=preprocess_image(data_all["inv_depth_map_fine"]),
94 | )
95 | # Write results as videos.
96 | inputdict, outputdict = self._get_ffmpeg_dicts()
97 | for key, image_list in results.items():
98 | print(f"writing video ({key})...")
99 | video_fname = f"{output_dir}/{key}.mp4"
100 | video_writer = skvideo.io.FFmpegWriter(video_fname, inputdict=inputdict, outputdict=outputdict)
101 | for image in image_list:
102 | image = (image * 255).byte().permute(1, 2, 0).numpy()
103 | video_writer.writeFrame(image)
104 | video_writer.close()
105 |
106 | def _get_ffmpeg_dicts(self):
107 | inputdict = {"-r": str(30)}
108 | outputdict = {"-crf": str(10), "-pix_fmt": "yuv420p"}
109 | return inputdict, outputdict
110 |
--------------------------------------------------------------------------------
/projects/nerf/utils/misc.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 |
15 | from imaginaire.utils.distributed import dist_all_gather_tensor
16 |
17 |
18 | def collate_test_data_batches(data_batches):
19 | """Aggregate the list of test data from all devices and process the results.
20 | Args:
21 | data_batches (list): List of (hierarchical) dictionaries, where leaf entries are tensors.
22 | Returns:
23 | data_gather (dict): (hierarchical) dictionaries, where leaf entries are concatenated tensors.
24 | """
25 | data_gather = dict()
26 | for key in data_batches[0].keys():
27 | data_list = [data[key] for data in data_batches]
28 | if isinstance(data_batches[0][key], dict):
29 | data_gather[key] = collate_test_data_batches(data_list)
30 | elif isinstance(data_batches[0][key], torch.Tensor):
31 | data_gather[key] = torch.cat(data_list, dim=0)
32 | data_gather[key] = torch.cat(dist_all_gather_tensor(data_gather[key].contiguous()), dim=0)
33 | else:
34 | raise TypeError
35 | return data_gather
36 |
37 |
38 | def get_unique_test_data(data_gather, idx):
39 | """Aggregate the list of test data from all devices and process the results.
40 | Args:
41 | data_gather (dict): (hierarchical) dictionaries, where leaf entries are tensors.
42 | idx (tensor): sample indices.
43 | Returns:
44 | data_all (dict): (hierarchical) dictionaries, where leaf entries are tensors ordered by idx.
45 | """
46 | data_all = dict()
47 | for key, value in data_gather.items():
48 | if isinstance(value, dict):
49 | data_all[key] = get_unique_test_data(value, idx)
50 | elif isinstance(value, torch.Tensor):
51 | data_all[key] = []
52 | for i in range(max(idx) + 1):
53 | # If multiple occurrences of the same idx, just choose the first one. If no occurrence, just ignore.
54 | matches = (idx == i).nonzero(as_tuple=True)[0]
55 | if matches.numel() != 0:
56 | data_all[key].append(value[matches[0]])
57 | data_all[key] = torch.stack(data_all[key], dim=0)
58 | else:
59 | raise TypeError
60 | return data_all
61 |
62 |
63 | def trim_test_samples(data, max_samples=None):
64 | for key, value in data.items():
65 | if isinstance(value, dict):
66 | data[key] = trim_test_samples(value, max_samples=max_samples)
67 | elif isinstance(value, torch.Tensor):
68 | if max_samples is not None:
69 | data[key] = value[:max_samples]
70 | else:
71 | raise TypeError
72 |
--------------------------------------------------------------------------------
/projects/nerf/utils/render.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 | from torch.cuda.amp import autocast
15 |
16 |
17 | def volume_rendering_weights(ray, densities, depths, depth_far=None):
18 | """The volume rendering function. Details can be found in the NeRF paper.
19 | Args:
20 | ray (tensor [batch,ray,3]): The ray directions in world space.
21 | densities (tensor [batch,ray,samples]): The predicted volume density samples.
22 | depths (tensor [batch,ray,samples,1]): The corresponding depth samples.
23 | depth_far (tensor [batch,ray,1,1]): The farthest depth for computing the last interval.
24 | Returns:
25 | weights (tensor [batch,ray,samples,1]): The predicted weight of each sampled point along the ray (in [0,1]).
26 | """
27 | ray_length = ray.norm(dim=-1, keepdim=True) # [B,R,1]
28 | if depth_far is None:
29 | depth_far = torch.empty_like(depths[..., :1, :]).fill_(1e10) # [B,R,1,1]
30 | depths_aug = torch.cat([depths, depth_far], dim=2) # [B,R,N+1,1]
31 | dists = depths_aug * ray_length[..., None] # [B,R,N+1,1]
32 | # Volume rendering: compute rendering weights (using quadrature).
33 | dist_intvs = dists[..., 1:, 0] - dists[..., :-1, 0] # [B,R,N]
34 | sigma_delta = densities * dist_intvs # [B,R,N]
35 | sigma_delta_0 = torch.cat([torch.zeros_like(sigma_delta[..., :1]),
36 | sigma_delta[..., :-1]], dim=2) # [B,R,N]
37 | T = (-sigma_delta_0.cumsum(dim=2)).exp_() # [B,R,N]
38 | alphas = 1 - (-sigma_delta).exp_() # [B,R,N]
39 | # Compute weights for compositing samples.
40 | weights = (T * alphas)[..., None] # [B,R,N,1]
41 | return weights
42 |
43 |
44 | def volume_rendering_weights_dist(densities, dists, dist_far=None):
45 | """The volume rendering function. Details can be found in the NeRF paper.
46 | Args:
47 | densities (tensor [batch,ray,samples]): The predicted volume density samples.
48 | dists (tensor [batch,ray,samples,1]): The corresponding distance samples.
49 | dist_far (tensor [batch,ray,1,1]): The farthest distance for computing the last interval.
50 | Returns:
51 | weights (tensor [batch,ray,samples,1]): The predicted weight of each sampled point along the ray (in [0,1]).
52 | """
53 | # TODO: re-consolidate!!
54 | if dist_far is None:
55 | dist_far = torch.empty_like(dists[..., :1, :]).fill_(1e10) # [B,R,1,1]
56 | dists = torch.cat([dists, dist_far], dim=2) # [B,R,N+1,1]
57 | # Volume rendering: compute rendering weights (using quadrature).
58 | dist_intvs = dists[..., 1:, 0] - dists[..., :-1, 0] # [B,R,N]
59 | sigma_delta = densities * dist_intvs # [B,R,N]
60 | sigma_delta_0 = torch.cat([torch.zeros_like(sigma_delta[..., :1]), sigma_delta[..., :-1]], dim=2) # [B,R,N]
61 | T = (-sigma_delta_0.cumsum(dim=2)).exp_() # [B,R,N]
62 | alphas = 1 - (-sigma_delta).exp_() # [B,R,N]
63 | # Compute weights for compositing samples.
64 | weights = (T * alphas)[..., None] # [B,R,N,1]
65 | return weights
66 |
67 |
68 | def volume_rendering_alphas_dist(densities, dists, dist_far=None):
69 | """The volume rendering function. Details can be found in the NeRF paper.
70 | Args:
71 | densities (tensor [batch,ray,samples]): The predicted volume density samples.
72 | dists (tensor [batch,ray,samples,1]): The corresponding distance samples.
73 | dist_far (tensor [batch,ray,1,1]): The farthest distance for computing the last interval.
74 | Returns:
75 | alphas (tensor [batch,ray,samples,1]): The occupancy of each sampled point along the ray (in [0,1]).
76 | """
77 | if dist_far is None:
78 | dist_far = torch.empty_like(dists[..., :1, :]).fill_(1e10) # [B,R,1,1]
79 | dists = torch.cat([dists, dist_far], dim=2) # [B,R,N+1,1]
80 | # Volume rendering: compute rendering weights (using quadrature).
81 | dist_intvs = dists[..., 1:, 0] - dists[..., :-1, 0] # [B,R,N]
82 | sigma_delta = densities * dist_intvs # [B,R,N]
83 | alphas = 1 - (-sigma_delta).exp_() # [B,R,N]
84 | return alphas
85 |
86 |
87 | def alpha_compositing_weights(alphas):
88 | """Alpha compositing of (sampled) MPIs given their RGBs and alphas.
89 | Args:
90 | alphas (tensor [batch,ray,samples]): The predicted opacity values.
91 | Returns:
92 | weights (tensor [batch,ray,samples,1]): The predicted weight of each MPI (in [0,1]).
93 | """
94 | alphas_front = torch.cat([torch.zeros_like(alphas[..., :1]),
95 | alphas[..., :-1]], dim=2) # [B,R,N]
96 | with autocast(enabled=False): # TODO: may be unstable in some cases.
97 | visibility = (1 - alphas_front).cumprod(dim=2) # [B,R,N]
98 | weights = (alphas * visibility)[..., None] # [B,R,N,1]
99 | return weights
100 |
101 |
102 | def composite(quantities, weights):
103 | """Composite the samples to render the RGB/depth/opacity of the corresponding pixels.
104 | Args:
105 | quantities (tensor [batch,ray,samples,k]): The quantity to be weighted summed.
106 | weights (tensor [batch,ray,samples,1]): The predicted weight of each sampled point along the ray.
107 | Returns:
108 | quantity (tensor [batch,ray,k]): The expected (rendered) quantity.
109 | """
110 | # Integrate RGB and depth weighted by probability.
111 | quantity = (quantities * weights).sum(dim=2) # [B,R,K]
112 | return quantity
113 |
--------------------------------------------------------------------------------
/projects/nerf/utils/visualize.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import numpy as np
14 | import torch
15 | import matplotlib.pyplot as plt
16 | import plotly.graph_objs as go
17 | import k3d
18 |
19 | from projects.nerf.utils import camera
20 |
21 |
22 | def get_camera_mesh(pose, depth=1):
23 | vertices = torch.tensor([[-0.5, -0.5, 1],
24 | [0.5, -0.5, 1],
25 | [0.5, 0.5, 1],
26 | [-0.5, 0.5, 1],
27 | [0, 0, 0]]) * depth # [6,3]
28 | faces = torch.tensor([[0, 1, 2],
29 | [0, 2, 3],
30 | [0, 1, 4],
31 | [1, 2, 4],
32 | [2, 3, 4],
33 | [3, 0, 4]]) # [6,3]
34 | vertices = camera.cam2world(vertices[None], pose) # [N,6,3]
35 | wireframe = vertices[:, [0, 1, 2, 3, 0, 4, 1, 2, 4, 3]] # [N,10,3]
36 | return vertices, faces, wireframe
37 |
38 |
39 | def merge_meshes(vertices, faces):
40 | mesh_N, vertex_N = vertices.shape[:2]
41 | faces_merged = torch.cat([faces + i * vertex_N for i in range(mesh_N)], dim=0)
42 | vertices_merged = vertices.view(-1, vertices.shape[-1])
43 | return vertices_merged, faces_merged
44 |
45 |
46 | def merge_wireframes_k3d(wireframe):
47 | wf_first, wf_last, wf_dummy = wireframe[:, :1], wireframe[:, -1:], wireframe[:, :1] * np.nan
48 | wireframe_merged = torch.cat([wf_first, wireframe, wf_last, wf_dummy], dim=1)
49 | return wireframe_merged
50 |
51 |
52 | def merge_wireframes_plotly(wireframe):
53 | wf_dummy = wireframe[:, :1] * np.nan
54 | wireframe_merged = torch.cat([wireframe, wf_dummy], dim=1).view(-1, 3)
55 | return wireframe_merged
56 |
57 |
58 | def get_xyz_indicators(pose, length=0.1):
59 | xyz = torch.eye(4, 3)[None] * length
60 | xyz = camera.cam2world(xyz, pose)
61 | return xyz
62 |
63 |
64 | def merge_xyz_indicators_k3d(xyz): # [N,4,3]
65 | xyz = xyz[:, [[-1, 0], [-1, 1], [-1, 2]]] # [N,3,2,3]
66 | xyz_0, xyz_1 = xyz.unbind(dim=2) # [N,3,3]
67 | xyz_dummy = xyz_0 * np.nan
68 | xyz_merged = torch.stack([xyz_0, xyz_0, xyz_1, xyz_1, xyz_dummy], dim=2) # [N,3,5,3]
69 | return xyz_merged
70 |
71 |
72 | def merge_xyz_indicators_plotly(xyz): # [N,4,3]
73 | xyz = xyz[:, [[-1, 0], [-1, 1], [-1, 2]]] # [N,3,2,3]
74 | xyz_0, xyz_1 = xyz.unbind(dim=2) # [N,3,3]
75 | xyz_dummy = xyz_0 * np.nan
76 | xyz_merged = torch.stack([xyz_0, xyz_1, xyz_dummy], dim=2) # [N,3,3,3]
77 | xyz_merged = xyz_merged.view(-1, 3)
78 | return xyz_merged
79 |
80 |
81 | def k3d_visualize_pose(poses, vis_depth=0.5, xyz_length=0.1, center_size=0.1, xyz_width=0.02, mesh_opacity=0.05):
82 | # poses has shape [N,3,4] potentially in sequential order
83 | N = len(poses)
84 | centers_cam = torch.zeros(N, 1, 3)
85 | centers_world = camera.cam2world(centers_cam, poses)
86 | centers_world = centers_world[:, 0]
87 | # Get the camera wireframes.
88 | vertices, faces, wireframe = get_camera_mesh(poses, depth=vis_depth)
89 | xyz = get_xyz_indicators(poses, length=xyz_length)
90 | vertices_merged, faces_merged = merge_meshes(vertices, faces)
91 | wireframe_merged = merge_wireframes_k3d(wireframe)
92 | xyz_merged = merge_xyz_indicators_k3d(xyz)
93 | # Set the color map for the camera trajectory and the xyz indicators.
94 | color_map = plt.get_cmap("gist_rainbow")
95 | center_color = []
96 | vertices_merged_color = []
97 | wireframe_color = []
98 | xyz_color = []
99 | x_hex, y_hex, z_hex = int(255) << 16, int(255) << 8, int(255)
100 | for i in range(N):
101 | # Set the camera pose colors (with a smooth gradient color map).
102 | r, g, b, _ = color_map(i / (N - 1))
103 | r, g, b = r * 0.8, g * 0.8, b * 0.8
104 | pose_rgb_hex = (int(r * 255) << 16) + (int(g * 255) << 8) + int(b * 255)
105 | center_color += [pose_rgb_hex]
106 | vertices_merged_color += [pose_rgb_hex] * 5
107 | wireframe_color += [pose_rgb_hex] * 13
108 | # Set the xyz indicator colors.
109 | xyz_color += [x_hex] * 5 + [y_hex] * 5 + [z_hex] * 5
110 | # Plot in K3D.
111 | k3d_objects = [
112 | k3d.points(centers_world, colors=center_color, point_size=center_size, shader="3d"),
113 | k3d.mesh(vertices_merged, faces_merged, colors=vertices_merged_color, side="double", opacity=mesh_opacity),
114 | k3d.line(wireframe_merged, colors=wireframe_color, shader="simple"),
115 | k3d.line(xyz_merged, colors=xyz_color, shader="thick", width=xyz_width),
116 | ]
117 | return k3d_objects
118 |
119 |
120 | def plotly_visualize_pose(poses, vis_depth=0.5, xyz_length=0.5, center_size=2, xyz_width=5, mesh_opacity=0.05):
121 | # poses has shape [N,3,4] potentially in sequential order
122 | N = len(poses)
123 | centers_cam = torch.zeros(N, 1, 3)
124 | centers_world = camera.cam2world(centers_cam, poses)
125 | centers_world = centers_world[:, 0]
126 | # Get the camera wireframes.
127 | vertices, faces, wireframe = get_camera_mesh(poses, depth=vis_depth)
128 | xyz = get_xyz_indicators(poses, length=xyz_length)
129 | vertices_merged, faces_merged = merge_meshes(vertices, faces)
130 | wireframe_merged = merge_wireframes_plotly(wireframe)
131 | xyz_merged = merge_xyz_indicators_plotly(xyz)
132 | # Break up (x,y,z) coordinates.
133 | wireframe_x, wireframe_y, wireframe_z = wireframe_merged.unbind(dim=-1)
134 | xyz_x, xyz_y, xyz_z = xyz_merged.unbind(dim=-1)
135 | centers_x, centers_y, centers_z = centers_world.unbind(dim=-1)
136 | vertices_x, vertices_y, vertices_z = vertices_merged.unbind(dim=-1)
137 | # Set the color map for the camera trajectory and the xyz indicators.
138 | color_map = plt.get_cmap("gist_rainbow")
139 | center_color = []
140 | faces_merged_color = []
141 | wireframe_color = []
142 | xyz_color = []
143 | x_color, y_color, z_color = *np.eye(3).T,
144 | for i in range(N):
145 | # Set the camera pose colors (with a smooth gradient color map).
146 | r, g, b, _ = color_map(i / (N - 1))
147 | rgb = np.array([r, g, b]) * 0.8
148 | wireframe_color += [rgb] * 11
149 | center_color += [rgb]
150 | faces_merged_color += [rgb] * 6
151 | xyz_color += [x_color] * 3 + [y_color] * 3 + [z_color] * 3
152 | # Plot in plotly.
153 | plotly_traces = [
154 | go.Scatter3d(x=wireframe_x, y=wireframe_y, z=wireframe_z, mode="lines",
155 | line=dict(color=wireframe_color, width=1)),
156 | go.Scatter3d(x=xyz_x, y=xyz_y, z=xyz_z, mode="lines", line=dict(color=xyz_color, width=xyz_width)),
157 | go.Scatter3d(x=centers_x, y=centers_y, z=centers_z, mode="markers",
158 | marker=dict(color=center_color, size=center_size, opacity=1)),
159 | go.Mesh3d(x=vertices_x, y=vertices_y, z=vertices_z,
160 | i=[f[0] for f in faces_merged], j=[f[1] for f in faces_merged], k=[f[2] for f in faces_merged],
161 | facecolor=faces_merged_color, opacity=mesh_opacity),
162 | ]
163 | return plotly_traces
164 |
--------------------------------------------------------------------------------
/projects/neuralangelo/configs/base.yaml:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | logging_iter: 9999999999999 # disable the printing logger
12 |
13 | max_iter: 500000
14 |
15 | wandb_scalar_iter: 100
16 | wandb_image_iter: 10000
17 | validation_iter: 5000
18 | speed_benchmark: False
19 |
20 | checkpoint:
21 | save_iter: 20000
22 |
23 | trainer:
24 | type: projects.neuralangelo.trainer
25 | ema_config:
26 | enabled: False
27 | load_ema_checkpoint: False
28 | loss_weight:
29 | render: 1.0
30 | eikonal: 0.1
31 | curvature: 5e-4
32 | init:
33 | type: none
34 | amp_config:
35 | enabled: False
36 | depth_vis_scale: 0.5
37 |
38 | model:
39 | type: projects.neuralangelo.model
40 | object:
41 | sdf:
42 | mlp:
43 | num_layers: 1
44 | hidden_dim: 256
45 | skip: []
46 | activ: softplus
47 | activ_params:
48 | beta: 100
49 | geometric_init: True
50 | weight_norm: True
51 | out_bias: 0.5
52 | inside_out: False
53 | encoding:
54 | type: hashgrid
55 | levels: 16
56 | hashgrid:
57 | min_logres: 5
58 | max_logres: 11
59 | dict_size: 22
60 | dim: 8
61 | range: [-2,2]
62 | coarse2fine:
63 | enabled: True
64 | init_active_level: 4
65 | step: 5000
66 | gradient:
67 | mode: numerical
68 | taps: 4
69 | rgb:
70 | mlp:
71 | num_layers: 4
72 | hidden_dim: 256
73 | skip: []
74 | activ: relu_
75 | activ_params: {}
76 | weight_norm: True
77 | mode: idr
78 | encoding_view:
79 | type: spherical
80 | levels: 3
81 | s_var:
82 | init_val: 3.
83 | anneal_end: 0.1
84 | background:
85 | enabled: True
86 | white: False
87 | mlp:
88 | num_layers: 8
89 | hidden_dim: 256
90 | skip: [4]
91 | num_layers_rgb: 2
92 | hidden_dim_rgb: 128
93 | skip_rgb: []
94 | activ: relu
95 | activ_params: {}
96 | activ_density: softplus
97 | activ_density_params: {}
98 | view_dep: True
99 | encoding:
100 | type: fourier
101 | levels: 10
102 | encoding_view:
103 | type: spherical
104 | levels: 3
105 | render:
106 | rand_rays: 512
107 | num_samples:
108 | coarse: 64
109 | fine: 16
110 | background: 32
111 | num_sample_hierarchy: 4
112 | stratified: True
113 | appear_embed:
114 | enabled: False
115 | dim: 8
116 |
117 | optim:
118 | type: AdamW
119 | params:
120 | lr: 1e-3
121 | weight_decay: 1e-2
122 | sched:
123 | iteration_mode: True
124 | type: two_steps_with_warmup
125 | warm_up_end: 5000
126 | two_steps: [300000,400000]
127 | gamma: 10.0
128 |
129 | data:
130 | type: projects.nerf.datasets.nerf_blender
131 | root: datasets/nerf-synthetic/lego
132 | use_multi_epoch_loader: True
133 | num_workers: 4
134 | preload: True
135 | num_images: # The number of training images.
136 | train:
137 | image_size: [800,800]
138 | batch_size: 2
139 | subset:
140 | val:
141 | image_size: [400,400]
142 | batch_size: 2
143 | subset: 4
144 | max_viz_samples: 16
145 | readjust:
146 | center: [0.,0.,0.]
147 | scale: 1.
148 |
--------------------------------------------------------------------------------
/projects/neuralangelo/configs/custom/template.yaml:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | # NOTE: this template is an example for designing your custom config file. It is not intended to be used directly.
12 | # Please preprocess your custom data and auto-generate the config file before customizing.
13 |
14 | _parent_: projects/neuralangelo/configs/base.yaml
15 |
16 | model:
17 | object:
18 | sdf:
19 | mlp:
20 | inside_out: False
21 | encoding:
22 | coarse2fine:
23 | init_active_level: 8
24 | appear_embed:
25 | enabled: True
26 | dim: 8
27 |
28 | data:
29 | type: projects.neuralangelo.data
30 | root: # The root path of the dataset.
31 | num_images: # The number of training images.
32 | train:
33 | image_size: [1200,1600]
34 | batch_size: 1
35 | subset:
36 | val:
37 | image_size: [300,400]
38 | batch_size: 1
39 | subset: 1
40 | max_viz_samples:
41 | readjust:
42 | center: [0.,0.,0.]
43 | scale: 1.
44 |
--------------------------------------------------------------------------------
/projects/neuralangelo/configs/dtu.yaml:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | _parent_: projects/neuralangelo/configs/base.yaml
12 |
13 | model:
14 | object:
15 | sdf:
16 | mlp:
17 | inside_out: False
18 | encoding:
19 | coarse2fine:
20 | init_active_level: 4
21 | s_var:
22 | init_val: 1.4
23 | appear_embed:
24 | enabled: False
25 |
26 | data:
27 | type: projects.neuralangelo.data
28 | root: datasets/dtu/dtu_scan24
29 | train:
30 | image_size: [1200,1600]
31 | batch_size: 1
32 | subset:
33 | val:
34 | image_size: [300,400]
35 | batch_size: 1
36 | subset: 1
37 | max_viz_samples: 16
38 |
--------------------------------------------------------------------------------
/projects/neuralangelo/configs/tnt.yaml:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | _parent_: projects/neuralangelo/configs/base.yaml
12 |
13 | model:
14 | object:
15 | sdf:
16 | mlp:
17 | inside_out: False # True for Meetingroom.
18 | encoding:
19 | coarse2fine:
20 | init_active_level: 8
21 | appear_embed:
22 | enabled: True
23 | dim: 8
24 |
25 | data:
26 | type: projects.neuralangelo.data
27 | root: datasets/tanks_and_temples/Barn
28 | num_images: 410 # The number of training images.
29 | train:
30 | image_size: [835,1500]
31 | batch_size: 1
32 | subset:
33 | val:
34 | image_size: [300,540]
35 | batch_size: 1
36 | subset: 1
37 | max_viz_samples: 16
38 |
--------------------------------------------------------------------------------
/projects/neuralangelo/data.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import json
14 | import numpy as np
15 | import torch
16 | import torchvision.transforms.functional as torchvision_F
17 | from PIL import Image, ImageFile
18 |
19 | from projects.nerf.datasets import base
20 | from projects.nerf.utils import camera
21 |
22 | ImageFile.LOAD_TRUNCATED_IMAGES = True
23 |
24 |
25 | class Dataset(base.Dataset):
26 |
27 | def __init__(self, cfg, is_inference=False):
28 | super().__init__(cfg, is_inference=is_inference, is_test=False)
29 | cfg_data = cfg.data
30 | self.root = cfg_data.root
31 | self.preload = cfg_data.preload
32 | self.H, self.W = cfg_data.val.image_size if is_inference else cfg_data.train.image_size
33 | meta_fname = f"{cfg_data.root}/transforms.json"
34 | with open(meta_fname) as file:
35 | self.meta = json.load(file)
36 | self.list = self.meta["frames"]
37 | if cfg_data[self.split].subset:
38 | subset = cfg_data[self.split].subset
39 | subset_idx = np.linspace(0, len(self.list), subset+1)[:-1].astype(int)
40 | self.list = [self.list[i] for i in subset_idx]
41 | self.num_rays = cfg.model.render.rand_rays
42 | self.readjust = getattr(cfg_data, "readjust", None)
43 | # Preload dataset if possible.
44 | if cfg_data.preload:
45 | self.images = self.preload_threading(self.get_image, cfg_data.num_workers)
46 | self.cameras = self.preload_threading(self.get_camera, cfg_data.num_workers, data_str="cameras")
47 |
48 | def __getitem__(self, idx):
49 | """Process raw data and return processed data in a dictionary.
50 |
51 | Args:
52 | idx: The index of the sample of the dataset.
53 | Returns: A dictionary containing the data.
54 | idx (scalar): The index of the sample of the dataset.
55 | image (R tensor): Image idx for per-image embedding.
56 | image (Rx3 tensor): Image with pixel values in [0,1] for supervision.
57 | intr (3x3 tensor): The camera intrinsics of `image`.
58 | pose (3x4 tensor): The camera extrinsics [R,t] of `image`.
59 | """
60 | # Keep track of sample index for convenience.
61 | sample = dict(idx=idx)
62 | # Get the images.
63 | image, image_size_raw = self.images[idx] if self.preload else self.get_image(idx)
64 | image = self.preprocess_image(image)
65 | # Get the cameras (intrinsics and pose).
66 | intr, pose = self.cameras[idx] if self.preload else self.get_camera(idx)
67 | intr, pose = self.preprocess_camera(intr, pose, image_size_raw)
68 | # Pre-sample ray indices.
69 | if self.split == "train":
70 | ray_idx = torch.randperm(self.H * self.W)[:self.num_rays] # [R]
71 | image_sampled = image.flatten(1, 2)[:, ray_idx].t() # [R,3]
72 | sample.update(
73 | ray_idx=ray_idx,
74 | image_sampled=image_sampled,
75 | intr=intr,
76 | pose=pose,
77 | )
78 | else: # keep image during inference
79 | sample.update(
80 | image=image,
81 | intr=intr,
82 | pose=pose,
83 | )
84 | return sample
85 |
86 | def get_image(self, idx):
87 | fpath = self.list[idx]["file_path"]
88 | image_fname = f"{self.root}/{fpath}"
89 | image = Image.open(image_fname)
90 | image.load()
91 | image_size_raw = image.size
92 | return image, image_size_raw
93 |
94 | def preprocess_image(self, image):
95 | # Resize the image.
96 | image = image.resize((self.W, self.H))
97 | image = torchvision_F.to_tensor(image)
98 | rgb = image[:3]
99 | return rgb
100 |
101 | def get_camera(self, idx):
102 | # Camera intrinsics.
103 | intr = torch.tensor([[self.meta["fl_x"], self.meta["sk_x"], self.meta["cx"]],
104 | [self.meta["sk_y"], self.meta["fl_y"], self.meta["cy"]],
105 | [0, 0, 1]]).float()
106 | # Camera pose.
107 | c2w_gl = torch.tensor(self.list[idx]["transform_matrix"], dtype=torch.float32)
108 | c2w = self._gl_to_cv(c2w_gl)
109 | # center scene
110 | center = np.array(self.meta["sphere_center"])
111 | center += np.array(getattr(self.readjust, "center", [0])) if self.readjust else 0.
112 | c2w[:3, -1] -= center
113 | # scale scene
114 | scale = np.array(self.meta["sphere_radius"])
115 | scale *= getattr(self.readjust, "scale", 1.) if self.readjust else 1.
116 | c2w[:3, -1] /= scale
117 | w2c = camera.Pose().invert(c2w[:3])
118 | return intr, w2c
119 |
120 | def preprocess_camera(self, intr, pose, image_size_raw):
121 | # Adjust the intrinsics according to the resized image.
122 | intr = intr.clone()
123 | raw_W, raw_H = image_size_raw
124 | intr[0] *= self.W / raw_W
125 | intr[1] *= self.H / raw_H
126 | return intr, pose
127 |
128 | def _gl_to_cv(self, gl):
129 | # convert to CV convention used in Imaginaire
130 | cv = gl * torch.tensor([1, -1, -1, 1])
131 | return cv
132 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/convert_dtu_to_json.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import numpy as np
14 | import json
15 | from argparse import ArgumentParser
16 | import os
17 | import cv2
18 | from PIL import Image, ImageFile
19 | from glob import glob
20 | import math
21 | import sys
22 | from pathlib import Path
23 |
24 |
25 | dir_path = Path(os.path.dirname(os.path.realpath(__file__))).parents[2]
26 | sys.path.append(dir_path.__str__())
27 | from projects.neuralangelo.scripts.convert_data_to_json import _cv_to_gl # noqa: E402
28 |
29 | ImageFile.LOAD_TRUNCATED_IMAGES = True
30 |
31 |
32 | def load_K_Rt_from_P(filename, P=None):
33 | # This function is borrowed from IDR: https://github.com/lioryariv/idr
34 | if P is None:
35 | lines = open(filename).read().splitlines()
36 | if len(lines) == 4:
37 | lines = lines[1:]
38 | lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
39 | P = np.asarray(lines).astype(np.float32).squeeze()
40 |
41 | out = cv2.decomposeProjectionMatrix(P)
42 | K = out[0]
43 | R = out[1]
44 | t = out[2]
45 |
46 | K = K / K[2, 2]
47 | intrinsics = np.eye(4)
48 | intrinsics[:3, :3] = K
49 |
50 | pose = np.eye(4, dtype=np.float32)
51 | pose[:3, :3] = R.transpose()
52 | pose[:3, 3] = (t[:3] / t[3])[:, 0]
53 |
54 | return intrinsics, pose
55 |
56 |
57 | def dtu_to_json(args):
58 | assert args.dtu_path, "Provide path to DTU dataset"
59 | scene_list = os.listdir(args.dtu_path)
60 |
61 | for scene in scene_list:
62 | scene_path = os.path.join(args.dtu_path, scene)
63 | if not os.path.isdir(scene_path) or 'scan' not in scene:
64 | continue
65 |
66 | out = {
67 | "k1": 0.0, # take undistorted images only
68 | "k2": 0.0,
69 | "k3": 0.0,
70 | "k4": 0.0,
71 | "p1": 0.0,
72 | "p2": 0.0,
73 | "is_fisheye": False,
74 | "frames": []
75 | }
76 |
77 | camera_param = dict(np.load(os.path.join(scene_path, 'cameras_sphere.npz')))
78 | images_lis = sorted(glob(os.path.join(scene_path, 'image/*.png')))
79 | for idx, image in enumerate(images_lis):
80 | image = os.path.basename(image)
81 |
82 | world_mat = camera_param['world_mat_%d' % idx]
83 | scale_mat = camera_param['scale_mat_%d' % idx]
84 |
85 | # scale and decompose
86 | P = world_mat @ scale_mat
87 | P = P[:3, :4]
88 | intrinsic_param, c2w = load_K_Rt_from_P(None, P)
89 | c2w_gl = _cv_to_gl(c2w)
90 |
91 | frame = {"file_path": 'image/' + image, "transform_matrix": c2w_gl.tolist()}
92 | out["frames"].append(frame)
93 |
94 | fl_x = intrinsic_param[0][0]
95 | fl_y = intrinsic_param[1][1]
96 | cx = intrinsic_param[0][2]
97 | cy = intrinsic_param[1][2]
98 | sk_x = intrinsic_param[0][1]
99 | sk_y = intrinsic_param[1][0]
100 | w, h = Image.open(os.path.join(scene_path, 'image', image)).size
101 |
102 | angle_x = math.atan(w / (fl_x * 2)) * 2
103 | angle_y = math.atan(h / (fl_y * 2)) * 2
104 |
105 | scale_mat = scale_mat.astype(float)
106 |
107 | out.update({
108 | "camera_angle_x": angle_x,
109 | "camera_angle_y": angle_y,
110 | "fl_x": fl_x,
111 | "fl_y": fl_y,
112 | "cx": cx,
113 | "cy": cy,
114 | "sk_x": sk_x,
115 | "sk_y": sk_y,
116 | "w": int(w),
117 | "h": int(h),
118 | "aabb_scale": np.exp2(np.rint(np.log2(scale_mat[0, 0]))), # power of two, for INGP resolution computation
119 | "sphere_center": [0., 0., 0.],
120 | "sphere_radius": 1.,
121 | })
122 |
123 | file_path = os.path.join(scene_path, 'transforms.json')
124 | with open(file_path, "w") as outputfile:
125 | json.dump(out, outputfile, indent=2)
126 | print('Writing data to json file: ', file_path)
127 |
128 |
129 | if __name__ == '__main__':
130 | parser = ArgumentParser()
131 | parser.add_argument('--dtu_path', type=str, default=None)
132 |
133 | args = parser.parse_args()
134 |
135 | dtu_to_json(args)
136 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/extract_mesh.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import argparse
14 | import json
15 | import os
16 | import sys
17 | import numpy as np
18 | from functools import partial
19 |
20 | sys.path.append(os.getcwd())
21 | from imaginaire.config import Config, recursive_update_strict, parse_cmdline_arguments # noqa: E402
22 | from imaginaire.utils.distributed import init_dist, get_world_size, is_master, master_only_print as print # noqa: E402
23 | from imaginaire.utils.gpu_affinity import set_affinity # noqa: E402
24 | from imaginaire.trainers.utils.get_trainer import get_trainer # noqa: E402
25 | from projects.neuralangelo.utils.mesh import extract_mesh, extract_texture # noqa: E402
26 |
27 |
28 | def parse_args():
29 | parser = argparse.ArgumentParser(description="Training")
30 | parser.add_argument("--config", required=True, help="Path to the training config file.")
31 | parser.add_argument("--checkpoint", default="", help="Checkpoint path.")
32 | parser.add_argument('--local_rank', type=int, default=os.getenv('LOCAL_RANK', 0))
33 | parser.add_argument('--single_gpu', action='store_true')
34 | parser.add_argument("--resolution", default=512, type=int, help="Marching cubes resolution")
35 | parser.add_argument("--block_res", default=64, type=int, help="Block-wise resolution for marching cubes")
36 | parser.add_argument("--output_file", default="mesh.ply", type=str, help="Output file name")
37 | parser.add_argument("--textured", action="store_true", help="Export mesh with texture")
38 | parser.add_argument("--keep_lcc", action="store_true",
39 | help="Keep only largest connected component. May remove thin structures.")
40 | args, cfg_cmd = parser.parse_known_args()
41 | return args, cfg_cmd
42 |
43 |
44 | def main():
45 | args, cfg_cmd = parse_args()
46 | set_affinity(args.local_rank)
47 | cfg = Config(args.config)
48 |
49 | cfg_cmd = parse_cmdline_arguments(cfg_cmd)
50 | recursive_update_strict(cfg, cfg_cmd)
51 |
52 | # If args.single_gpu is set to True, we will disable distributed data parallel.
53 | if not args.single_gpu:
54 | # this disables nccl timeout
55 | os.environ["NCLL_BLOCKING_WAIT"] = "0"
56 | os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "0"
57 | cfg.local_rank = args.local_rank
58 | init_dist(cfg.local_rank, rank=-1, world_size=-1)
59 | print(f"Running mesh extraction with {get_world_size()} GPUs.")
60 |
61 | cfg.logdir = ''
62 |
63 | # Initialize data loaders and models.
64 | trainer = get_trainer(cfg, is_inference=True, seed=0)
65 | # Load checkpoint.
66 | trainer.checkpointer.load(args.checkpoint, load_opt=False, load_sch=False)
67 | trainer.model.eval()
68 |
69 | # Set the coarse-to-fine levels.
70 | trainer.current_iteration = trainer.checkpointer.eval_iteration
71 | if cfg.model.object.sdf.encoding.coarse2fine.enabled:
72 | trainer.model_module.neural_sdf.set_active_levels(trainer.current_iteration)
73 | if cfg.model.object.sdf.gradient.mode == "numerical":
74 | trainer.model_module.neural_sdf.set_normal_epsilon()
75 |
76 | meta_fname = f"{cfg.data.root}/transforms.json"
77 | with open(meta_fname) as file:
78 | meta = json.load(file)
79 |
80 | if "aabb_range" in meta:
81 | bounds = (np.array(meta["aabb_range"]) - np.array(meta["sphere_center"])[..., None]) / meta["sphere_radius"]
82 | else:
83 | bounds = np.array([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]])
84 |
85 | sdf_func = lambda x: -trainer.model_module.neural_sdf.sdf(x) # noqa: E731
86 | texture_func = partial(extract_texture, neural_sdf=trainer.model_module.neural_sdf,
87 | neural_rgb=trainer.model_module.neural_rgb,
88 | appear_embed=trainer.model_module.appear_embed) if args.textured else None
89 | mesh = extract_mesh(sdf_func=sdf_func, bounds=bounds, intv=(2.0 / args.resolution),
90 | block_res=args.block_res, texture_func=texture_func, filter_lcc=args.keep_lcc)
91 |
92 | if is_master():
93 | print(f"vertices: {len(mesh.vertices)}")
94 | print(f"faces: {len(mesh.faces)}")
95 | if args.textured:
96 | print(f"colors: {len(mesh.visual.vertex_colors)}")
97 | # center and scale
98 | mesh.vertices = mesh.vertices * meta["sphere_radius"] + np.array(meta["sphere_center"])
99 | mesh.update_faces(mesh.nondegenerate_faces())
100 | os.makedirs(os.path.dirname(args.output_file), exist_ok=True)
101 | mesh.export(args.output_file)
102 |
103 |
104 | if __name__ == "__main__":
105 | main()
106 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/generate_config.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import os
14 | import sys
15 | from argparse import ArgumentParser
16 | from pathlib import Path
17 | import yaml
18 | from addict import Dict
19 | from PIL import Image, ImageFile
20 |
21 | dir_path = Path(os.path.dirname(os.path.realpath(__file__))).parents[2]
22 | sys.path.append(dir_path.__str__())
23 |
24 | ImageFile.LOAD_TRUNCATED_IMAGES = True
25 |
26 |
27 | def generate_config(args):
28 | cfg = Dict()
29 | cfg._parent_ = "projects/neuralangelo/configs/base.yaml"
30 | num_images = len(os.listdir(os.path.join(args.data_dir, "images")))
31 | # model cfg
32 | if args.auto_exposure_wb:
33 | cfg.data.num_images = num_images
34 | cfg.model.appear_embed.enabled = True
35 | cfg.model.appear_embed.dim = 8
36 | if num_images < 4: # default is 4
37 | cfg.data.val.subset = num_images
38 | else:
39 | cfg.model.appear_embed.enabled = False
40 | if args.scene_type == "outdoor":
41 | cfg.model.object.sdf.mlp.inside_out = False
42 | cfg.model.object.sdf.encoding.coarse2fine.init_active_level = 8
43 | elif args.scene_type == "indoor":
44 | cfg.model.object.sdf.mlp.inside_out = True
45 | cfg.model.object.sdf.encoding.coarse2fine.init_active_level = 8
46 | cfg.model.background.enabled = False
47 | cfg.model.render.num_samples.background = 0
48 | elif args.scene_type == "object":
49 | cfg.model.object.sdf.mlp.inside_out = False
50 | cfg.model.object.sdf.encoding.coarse2fine.init_active_level = 4
51 | else:
52 | raise TypeError("Unknown scene type")
53 | # data config
54 | cfg.data.type = "projects.neuralangelo.data"
55 | cfg.data.root = args.data_dir
56 | img = Image.open(os.path.join(args.data_dir, "images", os.listdir(os.path.join(args.data_dir, "images"))[0]))
57 | w, h = img.size
58 | cfg.data.train.image_size = [h, w]
59 | short_size = args.val_short_size
60 | cfg.data.val.image_size = [short_size, int(w/h*short_size)] if w > h else [int(h/w*short_size), short_size]
61 | cfg.data.readjust.center = [0., 0., 0.]
62 | cfg.data.readjust.scale = 1.
63 | # export cfg
64 | cfg_fname = os.path.join(dir_path, "projects/neuralangelo/configs", f"custom/{args.sequence_name}.yaml")
65 | with open(cfg_fname, "w") as file:
66 | yaml.safe_dump(cfg.to_dict(), file, default_flow_style=False, indent=4)
67 | print("Config generated to file:", cfg_fname)
68 | return
69 |
70 |
71 | if __name__ == "__main__":
72 | parser = ArgumentParser()
73 | parser.add_argument("--sequence_name", type=str, default="recon", help="Name of sequence")
74 | parser.add_argument("--data_dir", type=str, default=None, help="Path to data")
75 | parser.add_argument("--auto_exposure_wb", action="store_true",
76 | help="Video capture with auto-exposure or white-balance")
77 | parser.add_argument("--scene_type", type=str, default="outdoor", choices=["outdoor", "indoor", "object"],
78 | help="Select scene type. Outdoor for building-scale reconstruction; "
79 | "indoor for room-scale reconstruction; object for object-centric scene reconstruction.")
80 | parser.add_argument("--val_short_size", type=int, default=300,
81 | help="Set the short side of validation images (for saving compute when rendering val images)")
82 | args = parser.parse_args()
83 | generate_config(args)
84 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/preprocess.sh:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | # usage: preprocess.sh
12 |
13 | data_path=datasets/${1}_ds${3}
14 | bash projects/neuralangelo/scripts/run_ffmpeg.sh ${1} ${2} ${3}
15 | bash projects/neuralangelo/scripts/run_colmap.sh ${data_path}
16 | python3 projects/neuralangelo/scripts/convert_data_to_json.py --data_dir ${data_path} --scene_type ${4}
17 | python3 projects/neuralangelo/scripts/generate_config.py --sequence_name ${1} --data_dir ${data_path} --scene_type ${4}
18 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/preprocess_dtu.sh:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | # usage: dtu_download.sh
12 |
13 | echo "Download DTU data"
14 | mkdir -p "${1}"
15 | curl -L -o data.zip https://www.dropbox.com/sh/w0y8bbdmxzik3uk/AAAaZffBiJevxQzRskoOYcyja?dl=1
16 | unzip data.zip "data_DTU.zip"
17 | rm data.zip
18 | unzip -q data_DTU.zip -d ${1}
19 | rm data_DTU.zip
20 | echo "Generate json files"
21 | python3 projects/neuralangelo/scripts/convert_dtu_to_json.py --dtu_path ${1}
22 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/preprocess_tnt.sh:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | # usage: tnt_download.sh
12 |
13 | echo "Download fixed poses for Courthouse"
14 | gdown 10pcCwaQY6hqyiegJGdgmLp_HMFOnsmgq
15 | gdown 19TT5aTz3z60eUVIDhFJ7EgGqpcqQnJEi
16 | mv Courthouse_COLMAP_SfM.log ${1}/Courthouse/Courthouse_COLMAP_SfM.log
17 | mv Courthouse_trans.txt ${1}/Courthouse/Courthouse_trans.txt
18 |
19 | echo "Compute intrinsics, undistort images and generate json files. This may take a while"
20 | python3 projects/neuralangelo/scripts/convert_tnt_to_json.py --tnt_path ${1}
21 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/run_colmap.sh:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | # usage: run_colmap.sh
12 |
13 | colmap feature_extractor \
14 | --database_path=${1}/database.db \
15 | --image_path=${1}/images_raw \
16 | --ImageReader.camera_model=SIMPLE_RADIAL \
17 | --ImageReader.single_camera=true \
18 | --SiftExtraction.use_gpu=true \
19 | --SiftExtraction.num_threads=32
20 |
21 | colmap sequential_matcher \
22 | --database_path=${1}/database.db \
23 | --SiftMatching.use_gpu=true
24 |
25 | mkdir -p ${1}/sparse
26 | colmap mapper \
27 | --database_path=${1}/database.db \
28 | --image_path=${1}/images_raw \
29 | --output_path=${1}/sparse
30 |
31 | cp ${1}/sparse/0/*.bin ${1}/sparse/
32 | for path in ${1}/sparse/*/; do
33 | m=$(basename ${path})
34 | if [ ${m} != "0" ]; then
35 | colmap model_merger \
36 | --input_path1=${1}/sparse \
37 | --input_path2=${1}/sparse/${m} \
38 | --output_path=${1}/sparse
39 | colmap bundle_adjuster \
40 | --input_path=${1}/sparse \
41 | --output_path=${1}/sparse
42 | fi
43 | done
44 |
45 | colmap image_undistorter \
46 | --image_path=${1}/images_raw \
47 | --input_path=${1}/sparse \
48 | --output_path=${1} \
49 | --output_type=COLMAP
50 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/run_ffmpeg.sh:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
5 | # and proprietary rights in and to this software, related documentation
6 | # and any modifications thereto. Any use, reproduction, disclosure or
7 | # distribution of this software and related documentation without an express
8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
9 | # -----------------------------------------------------------------------------
10 |
11 | # usage: run_ffmpeg.sh
12 |
13 | data_path=datasets/${1}_ds${3}
14 | image_path=${data_path}/images_raw
15 | mkdir -p ${image_path}
16 | ffmpeg -i ${2} -vf "select=not(mod(n\,$3))" -vsync vfr -q:v 2 ${image_path}/%06d.jpg
17 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/visualize_colmap.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "# Set the work directory to the imaginaire root.\n",
11 | "import os, sys, time\n",
12 | "import pathlib\n",
13 | "root_dir = pathlib.Path().absolute().parents[2]\n",
14 | "os.chdir(root_dir)\n",
15 | "print(f\"Root Directory Path: {root_dir}\")"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62",
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "# Import Python libraries.\n",
26 | "import numpy as np\n",
27 | "import torch\n",
28 | "import k3d\n",
29 | "import json\n",
30 | "import plotly.graph_objs as go\n",
31 | "from collections import OrderedDict\n",
32 | "# Import imaginaire modules.\n",
33 | "from projects.nerf.utils import camera, visualize\n",
34 | "from third_party.colmap.scripts.python.read_write_model import read_model"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "id": "76033016-2d92-4a5d-9e50-3978553e8df4",
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "# Read the COLMAP data.\n",
45 | "colmap_path = \"datasets/lego_ds2\"\n",
46 | "cameras, images, points_3D = read_model(path=f\"{colmap_path}/sparse\", ext=\".bin\")\n",
47 | "# Convert camera poses.\n",
48 | "images = OrderedDict(sorted(images.items()))\n",
49 | "qvecs = torch.from_numpy(np.stack([image.qvec for image in images.values()]))\n",
50 | "tvecs = torch.from_numpy(np.stack([image.tvec for image in images.values()]))\n",
51 | "Rs = camera.quaternion.q_to_R(qvecs)\n",
52 | "poses = torch.cat([Rs, tvecs[..., None]], dim=-1) # [N,3,4]\n",
53 | "print(f\"# images: {len(poses)}\")\n",
54 | "# Get the sparse 3D points and the colors.\n",
55 | "xyzs = torch.from_numpy(np.stack([point.xyz for point in points_3D.values()]))\n",
56 | "rgbs = np.stack([point.rgb for point in points_3D.values()])\n",
57 | "rgbs_int32 = (rgbs[:, 0] * 2**16 + rgbs[:, 1] * 2**8 + rgbs[:, 2]).astype(np.uint32)\n",
58 | "print(f\"# points: {len(xyzs)}\")"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "id": "47862ee1-286c-4877-a181-4b33b7733719",
65 | "metadata": {},
66 | "outputs": [],
67 | "source": [
68 | "vis_depth = 0.2"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "id": "b6cf60ec-fe6a-43ba-9aaf-e3c7afd88208",
75 | "metadata": {},
76 | "outputs": [],
77 | "source": [
78 | "# Visualize the bounding sphere.\n",
79 | "json_fname = f\"{colmap_path}/transforms.json\"\n",
80 | "with open(json_fname) as file:\n",
81 | " meta = json.load(file)\n",
82 | "center = meta[\"sphere_center\"]\n",
83 | "radius = meta[\"sphere_radius\"]\n",
84 | "# ------------------------------------------------------------------------------------\n",
85 | "# These variables can be adjusted to make the bounding sphere fit the region of interest.\n",
86 | "# The adjusted values can then be set in the config as data.readjust.center and data.readjust.scale\n",
87 | "readjust_center = np.array([0., 0., 0.])\n",
88 | "readjust_scale = 1.\n",
89 | "# ------------------------------------------------------------------------------------\n",
90 | "center += readjust_center\n",
91 | "radius *= readjust_scale\n",
92 | "# Make some points to hallucinate a bounding sphere.\n",
93 | "sphere_points = np.random.randn(100000, 3)\n",
94 | "sphere_points = sphere_points / np.linalg.norm(sphere_points, axis=-1, keepdims=True)\n",
95 | "sphere_points = sphere_points * radius + center"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "id": "e986aed0-1aaf-4772-937c-136db7f2eaec",
102 | "metadata": {},
103 | "outputs": [],
104 | "source": [
105 | "# You can choose to visualize with Plotly...\n",
106 | "x, y, z = *xyzs.T,\n",
107 | "colors = rgbs / 255.0\n",
108 | "sphere_x, sphere_y, sphere_z = *sphere_points.T,\n",
109 | "sphere_colors = [\"#4488ff\"] * len(sphere_points)\n",
110 | "traces_poses = visualize.plotly_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.05)\n",
111 | "trace_points = go.Scatter3d(x=x, y=y, z=z, mode=\"markers\", marker=dict(size=1, color=colors, opacity=1), hoverinfo=\"skip\")\n",
112 | "trace_sphere = go.Scatter3d(x=sphere_x, y=sphere_y, z=sphere_z, mode=\"markers\", marker=dict(size=0.5, color=sphere_colors, opacity=0.7), hoverinfo=\"skip\")\n",
113 | "traces_all = traces_poses + [trace_points, trace_sphere]\n",
114 | "layout = go.Layout(scene=dict(xaxis=dict(showspikes=False, backgroundcolor=\"rgba(0,0,0,0)\", gridcolor=\"rgba(0,0,0,0.1)\"),\n",
115 | " yaxis=dict(showspikes=False, backgroundcolor=\"rgba(0,0,0,0)\", gridcolor=\"rgba(0,0,0,0.1)\"),\n",
116 | " zaxis=dict(showspikes=False, backgroundcolor=\"rgba(0,0,0,0)\", gridcolor=\"rgba(0,0,0,0.1)\"),\n",
117 | " xaxis_title=\"X\", yaxis_title=\"Y\", zaxis_title=\"Z\", dragmode=\"orbit\",\n",
118 | " aspectratio=dict(x=1, y=1, z=1), aspectmode=\"data\"), height=800)\n",
119 | "fig = go.Figure(data=traces_all, layout=layout)\n",
120 | "fig.show()"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": null,
126 | "id": "fdde170b-4546-4617-9162-a9fcb936347d",
127 | "metadata": {},
128 | "outputs": [],
129 | "source": [
130 | "# ... or visualize with K3D.\n",
131 | "plot = k3d.plot(name=\"poses\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n",
132 | "k3d_objects = visualize.k3d_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.05)\n",
133 | "for k3d_object in k3d_objects:\n",
134 | " plot += k3d_object\n",
135 | "plot += k3d.points(xyzs, colors=rgbs_int32, point_size=0.02, shader=\"flat\")\n",
136 | "plot += k3d.points(sphere_points, color=0x4488ff, point_size=0.01, shader=\"flat\")\n",
137 | "plot.display()\n",
138 | "plot.camera_fov = 30.0"
139 | ]
140 | }
141 | ],
142 | "metadata": {
143 | "kernelspec": {
144 | "display_name": "Python 3 (ipykernel)",
145 | "language": "python",
146 | "name": "python3"
147 | },
148 | "language_info": {
149 | "codemirror_mode": {
150 | "name": "ipython",
151 | "version": 3
152 | },
153 | "file_extension": ".py",
154 | "mimetype": "text/x-python",
155 | "name": "python",
156 | "nbconvert_exporter": "python",
157 | "pygments_lexer": "ipython3",
158 | "version": "3.9.13"
159 | }
160 | },
161 | "nbformat": 4,
162 | "nbformat_minor": 5
163 | }
164 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/visualize_mesh.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "# Set the work directory to the imaginaire root.\n",
11 | "import os, sys, time\n",
12 | "import pathlib\n",
13 | "root_dir = pathlib.Path().absolute().parents[2]\n",
14 | "os.chdir(root_dir)\n",
15 | "print(f\"Root Directory Path: {root_dir}\")"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62",
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "# Import Python libraries.\n",
26 | "import numpy as np\n",
27 | "import trimesh\n",
28 | "import k3d"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": null,
34 | "id": "84604c4a-8d95-462a-b7f0-acaa0b9f563d",
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "# Load the 3D mesh.\n",
39 | "ply_fname = \"logs/0_debug/18/mesh.ply\"\n",
40 | "mesh = trimesh.load(ply_fname)\n",
41 | "print(f\"# vertices: {len(mesh.vertices)}\")\n",
42 | "print(f\"# faces: {len(mesh.faces)}\")"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": null,
48 | "id": "d1f8df0b-8361-40f3-a801-0cc42b920fed",
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "# Visualize with K3D.\n",
53 | "plot = k3d.plot(name=\"mesh\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n",
54 | "C = mesh.visual.vertex_colors\n",
55 | "colors = C[:,0]*256**2 + C[:,1]*256 + C[:,2]\n",
56 | "plot += k3d.mesh(mesh.vertices, mesh.faces, colors=colors, side=\"double\", opacity=1)\n",
57 | "plot.display()\n",
58 | "plot.camera_fov = 30.0"
59 | ]
60 | }
61 | ],
62 | "metadata": {
63 | "kernelspec": {
64 | "display_name": "Python 3 (ipykernel)",
65 | "language": "python",
66 | "name": "python3"
67 | },
68 | "language_info": {
69 | "codemirror_mode": {
70 | "name": "ipython",
71 | "version": 3
72 | },
73 | "file_extension": ".py",
74 | "mimetype": "text/x-python",
75 | "name": "python",
76 | "nbconvert_exporter": "python",
77 | "pygments_lexer": "ipython3",
78 | "version": "3.9.13"
79 | }
80 | },
81 | "nbformat": 4,
82 | "nbformat_minor": 5
83 | }
84 |
--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/visualize_transforms.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "# Set the work directory to the imaginaire root.\n",
11 | "import os, sys, time\n",
12 | "import pathlib\n",
13 | "root_dir = pathlib.Path().absolute().parents[2]\n",
14 | "os.chdir(root_dir)\n",
15 | "print(f\"Root Directory Path: {root_dir}\")"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62",
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "# Import Python libraries.\n",
26 | "import numpy as np\n",
27 | "import torch\n",
28 | "import k3d\n",
29 | "import json\n",
30 | "from collections import OrderedDict\n",
31 | "# Import imaginaire modules.\n",
32 | "from projects.nerf.utils import camera, visualize\n",
33 | "from third_party.colmap.scripts.python.read_write_model import read_model"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "id": "97bedecf-da68-44b1-96cf-580ef7e7f3f0",
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "# Read the COLMAP data.\n",
44 | "colmap_path = \"datasets/lego_ds2\"\n",
45 | "json_fname = f\"{colmap_path}/transforms.json\"\n",
46 | "with open(json_fname) as file:\n",
47 | " meta = json.load(file)\n",
48 | "center = meta[\"sphere_center\"]\n",
49 | "radius = meta[\"sphere_radius\"]\n",
50 | "# Convert camera poses.\n",
51 | "poses = []\n",
52 | "for frame in meta[\"frames\"]:\n",
53 | " c2w = torch.tensor(frame[\"transform_matrix\"])\n",
54 | " c2w[:, 1:3] *= -1\n",
55 | " w2c = c2w.inverse()\n",
56 | " pose = w2c[:3] # [3,4]\n",
57 | " poses.append(pose)\n",
58 | "poses = torch.stack(poses, dim=0)\n",
59 | "print(f\"# images: {len(poses)}\")"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": null,
65 | "id": "2016d20c-1e58-407f-9810-cbe76dc5ccec",
66 | "metadata": {},
67 | "outputs": [],
68 | "source": [
69 | "vis_depth = 0.2\n",
70 | "k3d_textures = []"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "id": "d7168a09-6654-4660-b140-66b9dfd6f1e8",
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "# (optional) visualize the images.\n",
81 | "# This block can be skipped if we don't want to visualize the image observations.\n",
82 | "for i, frame in enumerate(meta[\"frames\"]):\n",
83 | " image_fname = frame[\"file_path\"]\n",
84 | " image_path = f\"{colmap_path}/{image_fname}\"\n",
85 | " with open(image_path, \"rb\") as file:\n",
86 | " binary = file.read()\n",
87 | " # Compute the corresponding image corners in 3D.\n",
88 | " pose = poses[i]\n",
89 | " corners = torch.tensor([[-0.5, 0.5, 1], [0.5, 0.5, 1], [-0.5, -0.5, 1]])\n",
90 | " corners *= vis_depth\n",
91 | " corners = camera.cam2world(corners, pose)\n",
92 | " puv = [corners[0].tolist(), (corners[1]-corners[0]).tolist(), (corners[2]-corners[0]).tolist()]\n",
93 | " k3d_texture = k3d.texture(binary, file_format=\"jpg\", puv=puv)\n",
94 | " k3d_textures.append(k3d_texture)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": null,
100 | "id": "b6cf60ec-fe6a-43ba-9aaf-e3c7afd88208",
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "# Visualize the bounding sphere.\n",
105 | "json_fname = f\"{colmap_path}/transforms.json\"\n",
106 | "with open(json_fname) as file:\n",
107 | " meta = json.load(file)\n",
108 | "center = meta[\"sphere_center\"]\n",
109 | "radius = meta[\"sphere_radius\"]\n",
110 | "# ------------------------------------------------------------------------------------\n",
111 | "# These variables can be adjusted to make the bounding sphere fit the region of interest.\n",
112 | "# The adjusted values can then be set in the config as data.readjust.center and data.readjust.scale\n",
113 | "readjust_center = np.array([0., 0., 0.])\n",
114 | "readjust_scale = 1.\n",
115 | "# ------------------------------------------------------------------------------------\n",
116 | "center += readjust_center\n",
117 | "radius *= readjust_scale\n",
118 | "# Make some points to hallucinate a bounding sphere.\n",
119 | "sphere_points = np.random.randn(100000, 3)\n",
120 | "sphere_points = sphere_points / np.linalg.norm(sphere_points, axis=-1, keepdims=True)\n",
121 | "sphere_points = sphere_points * radius + center"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": null,
127 | "id": "fdde170b-4546-4617-9162-a9fcb936347d",
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "# Visualize with K3D.\n",
132 | "plot = k3d.plot(name=\"poses\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n",
133 | "k3d_objects = visualize.k3d_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.)\n",
134 | "for k3d_object in k3d_objects:\n",
135 | " plot += k3d_object\n",
136 | "for k3d_texture in k3d_textures:\n",
137 | " plot += k3d_texture\n",
138 | "plot += k3d.points(sphere_points, color=0x4488ff, point_size=0.01, shader=\"flat\")\n",
139 | "plot.display()\n",
140 | "plot.camera_fov = 30.0"
141 | ]
142 | }
143 | ],
144 | "metadata": {
145 | "kernelspec": {
146 | "display_name": "Python 3 (ipykernel)",
147 | "language": "python",
148 | "name": "python3"
149 | },
150 | "language_info": {
151 | "codemirror_mode": {
152 | "name": "ipython",
153 | "version": 3
154 | },
155 | "file_extension": ".py",
156 | "mimetype": "text/x-python",
157 | "name": "python",
158 | "nbconvert_exporter": "python",
159 | "pygments_lexer": "ipython3",
160 | "version": "3.9.13"
161 | }
162 | },
163 | "nbformat": 4,
164 | "nbformat_minor": 5
165 | }
166 |
--------------------------------------------------------------------------------
/projects/neuralangelo/trainer.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 | import torch.nn.functional as torch_F
15 | import wandb
16 |
17 | from imaginaire.utils.distributed import master_only
18 | from imaginaire.utils.visualization import wandb_image
19 | from projects.nerf.trainers.base import BaseTrainer
20 | from projects.neuralangelo.utils.misc import get_scheduler, eikonal_loss, curvature_loss
21 |
22 |
23 | class Trainer(BaseTrainer):
24 |
25 | def __init__(self, cfg, is_inference=True, seed=0):
26 | super().__init__(cfg, is_inference=is_inference, seed=seed)
27 | self.metrics = dict()
28 | self.warm_up_end = cfg.optim.sched.warm_up_end
29 | self.cfg_gradient = cfg.model.object.sdf.gradient
30 | if cfg.model.object.sdf.encoding.type == "hashgrid" and cfg.model.object.sdf.encoding.coarse2fine.enabled:
31 | self.c2f_step = cfg.model.object.sdf.encoding.coarse2fine.step
32 | self.model.module.neural_sdf.warm_up_end = self.warm_up_end
33 |
34 | def _init_loss(self, cfg):
35 | self.criteria["render"] = torch.nn.L1Loss()
36 |
37 | def setup_scheduler(self, cfg, optim):
38 | return get_scheduler(cfg.optim, optim)
39 |
40 | def _compute_loss(self, data, mode=None):
41 | if mode == "train":
42 | # Compute loss only on randomly sampled rays.
43 | self.losses["render"] = self.criteria["render"](data["rgb"], data["image_sampled"]) * 3 # FIXME:sumRGB?!
44 | self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb"], data["image_sampled"]).log10()
45 | if "eikonal" in self.weights.keys():
46 | self.losses["eikonal"] = eikonal_loss(data["gradients"], outside=data["outside"])
47 | if "curvature" in self.weights:
48 | self.losses["curvature"] = curvature_loss(data["hessians"], outside=data["outside"])
49 | else:
50 | # Compute loss on the entire image.
51 | self.losses["render"] = self.criteria["render"](data["rgb_map"], data["image"])
52 | self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb_map"], data["image"]).log10()
53 |
54 | def get_curvature_weight(self, current_iteration, init_weight):
55 | if "curvature" in self.weights:
56 | if current_iteration <= self.warm_up_end:
57 | self.weights["curvature"] = current_iteration / self.warm_up_end * init_weight
58 | else:
59 | model = self.model_module
60 | decay_factor = model.neural_sdf.growth_rate ** (model.neural_sdf.anneal_levels - 1)
61 | self.weights["curvature"] = init_weight / decay_factor
62 |
63 | def _start_of_iteration(self, data, current_iteration):
64 | model = self.model_module
65 | self.progress = model.progress = current_iteration / self.cfg.max_iter
66 | if self.cfg.model.object.sdf.encoding.coarse2fine.enabled:
67 | model.neural_sdf.set_active_levels(current_iteration)
68 | if self.cfg_gradient.mode == "numerical":
69 | model.neural_sdf.set_normal_epsilon()
70 | self.get_curvature_weight(current_iteration, self.cfg.trainer.loss_weight.curvature)
71 | elif self.cfg_gradient.mode == "numerical":
72 | model.neural_sdf.set_normal_epsilon()
73 |
74 | return super()._start_of_iteration(data, current_iteration)
75 |
76 | @master_only
77 | def log_wandb_scalars(self, data, mode=None):
78 | super().log_wandb_scalars(data, mode=mode)
79 | scalars = {
80 | f"{mode}/PSNR": self.metrics["psnr"].detach(),
81 | f"{mode}/s-var": self.model_module.s_var.item(),
82 | }
83 | if "curvature" in self.weights:
84 | scalars[f"{mode}/curvature_weight"] = self.weights["curvature"]
85 | if "eikonal" in self.weights:
86 | scalars[f"{mode}/eikonal_weight"] = self.weights["eikonal"]
87 | if mode == "train" and self.cfg_gradient.mode == "numerical":
88 | scalars[f"{mode}/epsilon"] = self.model.module.neural_sdf.normal_eps
89 | if self.cfg.model.object.sdf.encoding.coarse2fine.enabled:
90 | scalars[f"{mode}/active_levels"] = self.model.module.neural_sdf.active_levels
91 | wandb.log(scalars, step=self.current_iteration)
92 |
93 | @master_only
94 | def log_wandb_images(self, data, mode=None, max_samples=None):
95 | images = {"iteration": self.current_iteration, "epoch": self.current_epoch}
96 | if mode == "val":
97 | images_error = (data["rgb_map"] - data["image"]).abs()
98 | images.update({
99 | f"{mode}/vis/rgb_target": wandb_image(data["image"]),
100 | f"{mode}/vis/rgb_render": wandb_image(data["rgb_map"]),
101 | f"{mode}/vis/rgb_error": wandb_image(images_error),
102 | f"{mode}/vis/normal": wandb_image(data["normal_map"], from_range=(-1, 1)),
103 | f"{mode}/vis/inv_depth": wandb_image(1 / (data["depth_map"] + 1e-8) * self.cfg.trainer.depth_vis_scale),
104 | f"{mode}/vis/opacity": wandb_image(data["opacity_map"]),
105 | })
106 | wandb.log(images, step=self.current_iteration)
107 |
108 | def train(self, cfg, data_loader, single_gpu=False, profile=False, show_pbar=False):
109 | self.progress = self.model_module.progress = self.current_iteration / self.cfg.max_iter
110 | super().train(cfg, data_loader, single_gpu, profile, show_pbar)
111 |
--------------------------------------------------------------------------------
/projects/neuralangelo/utils/mesh.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import numpy as np
14 | import trimesh
15 | import mcubes
16 | import torch
17 | import torch.distributed as dist
18 | import torch.nn.functional as torch_F
19 | from tqdm import tqdm
20 |
21 | from imaginaire.utils.distributed import get_world_size, is_master
22 |
23 |
24 | @torch.no_grad()
25 | def extract_mesh(sdf_func, bounds, intv, block_res=64, texture_func=None, filter_lcc=False):
26 | lattice_grid = LatticeGrid(bounds, intv=intv, block_res=block_res)
27 | data_loader = get_lattice_grid_loader(lattice_grid)
28 | mesh_blocks = []
29 | if is_master():
30 | data_loader = tqdm(data_loader, leave=False)
31 | for it, data in enumerate(data_loader):
32 | xyz = data["xyz"][0]
33 | xyz_cuda = xyz.cuda()
34 | sdf_cuda = sdf_func(xyz_cuda)[..., 0]
35 | sdf = sdf_cuda.cpu()
36 | mesh = marching_cubes(sdf.numpy(), xyz.numpy(), intv, texture_func, filter_lcc)
37 | mesh_blocks.append(mesh)
38 | mesh_blocks_gather = [None] * get_world_size()
39 | if dist.is_initialized():
40 | dist.all_gather_object(mesh_blocks_gather, mesh_blocks)
41 | else:
42 | mesh_blocks_gather = [mesh_blocks]
43 | if is_master():
44 | mesh_blocks_all = [mesh for mesh_blocks in mesh_blocks_gather for mesh in mesh_blocks
45 | if mesh.vertices.shape[0] > 0]
46 | mesh = trimesh.util.concatenate(mesh_blocks_all)
47 | return mesh
48 | else:
49 | return None
50 |
51 |
52 | @torch.no_grad()
53 | def extract_texture(xyz, neural_rgb, neural_sdf, appear_embed):
54 | num_samples, _ = xyz.shape
55 | xyz_cuda = torch.from_numpy(xyz).float().cuda()[None, None] # [N,3] -> [1,1,N,3]
56 | sdfs, feats = neural_sdf(xyz_cuda)
57 | gradients, _ = neural_sdf.compute_gradients(xyz_cuda, training=False, sdf=sdfs)
58 | normals = torch_F.normalize(gradients, dim=-1)
59 | if appear_embed is not None:
60 | feat_dim = appear_embed.embedding_dim # [1,1,N,C]
61 | app = torch.zeros([1, 1, num_samples, feat_dim], device=sdfs.device) # TODO: hard-coded to zero. better way?
62 | else:
63 | app = None
64 | rgbs = neural_rgb.forward(xyz_cuda, normals, -normals, feats, app=app) # [1,1,N,3]
65 | return (rgbs.squeeze().cpu().numpy() * 255).astype(np.uint8)
66 |
67 |
68 | class LatticeGrid(torch.utils.data.Dataset):
69 |
70 | def __init__(self, bounds, intv, block_res=64):
71 | super().__init__()
72 | self.block_res = block_res
73 | ((x_min, x_max), (y_min, y_max), (z_min, z_max)) = bounds
74 | self.x_grid = torch.arange(x_min, x_max, intv)
75 | self.y_grid = torch.arange(y_min, y_max, intv)
76 | self.z_grid = torch.arange(z_min, z_max, intv)
77 | res_x, res_y, res_z = len(self.x_grid), len(self.y_grid), len(self.z_grid)
78 | print("Extracting surface at resolution", res_x, res_y, res_z)
79 | self.num_blocks_x = int(np.ceil(res_x / block_res))
80 | self.num_blocks_y = int(np.ceil(res_y / block_res))
81 | self.num_blocks_z = int(np.ceil(res_z / block_res))
82 |
83 | def __getitem__(self, idx):
84 | # Keep track of sample index for convenience.
85 | sample = dict(idx=idx)
86 | block_idx_x = idx // (self.num_blocks_y * self.num_blocks_z)
87 | block_idx_y = (idx // self.num_blocks_z) % self.num_blocks_y
88 | block_idx_z = idx % self.num_blocks_z
89 | xi = block_idx_x * self.block_res
90 | yi = block_idx_y * self.block_res
91 | zi = block_idx_z * self.block_res
92 | x, y, z = torch.meshgrid(self.x_grid[xi:xi+self.block_res+1],
93 | self.y_grid[yi:yi+self.block_res+1],
94 | self.z_grid[zi:zi+self.block_res+1], indexing="ij")
95 | xyz = torch.stack([x, y, z], dim=-1)
96 | sample.update(xyz=xyz)
97 | return sample
98 |
99 | def __len__(self):
100 | return self.num_blocks_x * self.num_blocks_y * self.num_blocks_z
101 |
102 |
103 | def get_lattice_grid_loader(dataset, num_workers=8):
104 | if dist.is_initialized():
105 | sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=False)
106 | else:
107 | sampler = None
108 | return torch.utils.data.DataLoader(
109 | dataset,
110 | batch_size=1,
111 | shuffle=False,
112 | sampler=sampler,
113 | pin_memory=True,
114 | num_workers=num_workers,
115 | drop_last=False
116 | )
117 |
118 |
119 | def marching_cubes(sdf, xyz, intv, texture_func, filter_lcc):
120 | # marching cubes
121 | V, F = mcubes.marching_cubes(sdf, 0.)
122 | if V.shape[0] > 0:
123 | V = V * intv + xyz[0, 0, 0]
124 | if texture_func is not None:
125 | C = texture_func(V)
126 | mesh = trimesh.Trimesh(V, F, vertex_colors=C)
127 | else:
128 | mesh = trimesh.Trimesh(V, F)
129 | mesh = filter_points_outside_bounding_sphere(mesh)
130 | mesh = filter_largest_cc(mesh) if filter_lcc else mesh
131 | else:
132 | mesh = trimesh.Trimesh()
133 | return mesh
134 |
135 |
136 | def filter_points_outside_bounding_sphere(old_mesh):
137 | mask = np.linalg.norm(old_mesh.vertices, axis=-1) < 1.0
138 | if np.any(mask):
139 | indices = np.ones(len(old_mesh.vertices), dtype=int) * -1
140 | indices[mask] = np.arange(mask.sum())
141 | faces_mask = mask[old_mesh.faces[:, 0]] & mask[old_mesh.faces[:, 1]] & mask[old_mesh.faces[:, 2]]
142 | new_faces = indices[old_mesh.faces[faces_mask]]
143 | new_vertices = old_mesh.vertices[mask]
144 | new_colors = old_mesh.visual.vertex_colors[mask]
145 | new_mesh = trimesh.Trimesh(new_vertices, new_faces, vertex_colors=new_colors)
146 | else:
147 | new_mesh = trimesh.Trimesh()
148 | return new_mesh
149 |
150 |
151 | def filter_largest_cc(mesh):
152 | components = mesh.split(only_watertight=False)
153 | areas = np.array([c.area for c in components], dtype=float)
154 | if len(areas) > 0 and mesh.vertices.shape[0] > 0:
155 | new_mesh = components[areas.argmax()]
156 | else:
157 | new_mesh = trimesh.Trimesh()
158 | return new_mesh
159 |
--------------------------------------------------------------------------------
/projects/neuralangelo/utils/misc.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | from functools import partial
14 | import numpy as np
15 | import torch
16 | import torch.nn.functional as torch_F
17 | import imaginaire.trainers.utils
18 | from torch.optim import lr_scheduler
19 |
20 | flip_mat = np.array([
21 | [1, 0, 0, 0],
22 | [0, -1, 0, 0],
23 | [0, 0, -1, 0],
24 | [0, 0, 0, 1]
25 | ])
26 |
27 |
28 | def get_scheduler(cfg_opt, opt):
29 | """Return the scheduler object.
30 |
31 | Args:
32 | cfg_opt (obj): Config for the specific optimization module (gen/dis).
33 | opt (obj): PyTorch optimizer object.
34 |
35 | Returns:
36 | (obj): Scheduler
37 | """
38 | if cfg_opt.sched.type == 'two_steps_with_warmup':
39 | warm_up_end = cfg_opt.sched.warm_up_end
40 | two_steps = cfg_opt.sched.two_steps
41 | gamma = cfg_opt.sched.gamma
42 |
43 | def sch(x):
44 | if x < warm_up_end:
45 | return x / warm_up_end
46 | else:
47 | if x > two_steps[1]:
48 | return 1.0 / gamma ** 2
49 | elif x > two_steps[0]:
50 | return 1.0 / gamma
51 | else:
52 | return 1.0
53 |
54 | scheduler = lr_scheduler.LambdaLR(opt, lambda x: sch(x))
55 | elif cfg_opt.sched.type == 'cos_with_warmup':
56 | alpha = cfg_opt.sched.alpha
57 | max_iter = cfg_opt.sched.max_iter
58 | warm_up_end = cfg_opt.sched.warm_up_end
59 |
60 | def sch(x):
61 | if x < warm_up_end:
62 | return x / warm_up_end
63 | else:
64 | progress = (x - warm_up_end) / (max_iter - warm_up_end)
65 | learning_factor = (np.cos(np.pi * progress) + 1.0) * 0.5 * (1 - alpha) + alpha
66 | return learning_factor
67 |
68 | scheduler = lr_scheduler.LambdaLR(opt, lambda x: sch(x))
69 | else:
70 | return imaginaire.trainers.utils.get_scheduler()
71 | return scheduler
72 |
73 |
74 | def eikonal_loss(gradients, outside=None):
75 | gradient_error = (gradients.norm(dim=-1) - 1.0) ** 2 # [B,R,N]
76 | gradient_error = gradient_error.nan_to_num(nan=0.0, posinf=0.0, neginf=0.0) # [B,R,N]
77 | if outside is not None:
78 | return (gradient_error * (~outside).float()).mean()
79 | else:
80 | return gradient_error.mean()
81 |
82 |
83 | def curvature_loss(hessian, outside=None):
84 | laplacian = hessian.sum(dim=-1).abs() # [B,R,N]
85 | laplacian = laplacian.nan_to_num(nan=0.0, posinf=0.0, neginf=0.0) # [B,R,N]
86 | if outside is not None:
87 | return (laplacian * (~outside).float()).mean()
88 | else:
89 | return laplacian.mean()
90 |
91 |
92 | def get_activation(activ, **kwargs):
93 | func = dict(
94 | identity=lambda x: x,
95 | relu=torch_F.relu,
96 | relu_=torch_F.relu_,
97 | abs=torch.abs,
98 | abs_=torch.abs_,
99 | sigmoid=torch.sigmoid,
100 | sigmoid_=torch.sigmoid_,
101 | exp=torch.exp,
102 | exp_=torch.exp_,
103 | softplus=torch_F.softplus,
104 | silu=torch_F.silu,
105 | silu_=partial(torch_F.silu, inplace=True),
106 | )[activ]
107 | return partial(func, **kwargs)
108 |
109 |
110 | def to_full_image(image, image_size=None, from_vec=True):
111 | # if from_vec is True: [B,HW,...,K] --> [B,K,H,W,...]
112 | # if from_vec is False: [B,H,W,...,K] --> [B,K,H,W,...]
113 | if from_vec:
114 | assert image_size is not None
115 | image = image.unflatten(dim=1, sizes=image_size)
116 | image = image.moveaxis(-1, 1)
117 | return image
118 |
--------------------------------------------------------------------------------
/projects/neuralangelo/utils/mlp.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import numpy as np
14 | import torch
15 | import torch.nn.functional as torch_F
16 |
17 |
18 | class MLPforNeuralSDF(torch.nn.Module):
19 |
20 | def __init__(self, layer_dims, skip_connection=[], activ=None, use_layernorm=False, use_weightnorm=False,
21 | geometric_init=False, out_bias=0., invert=False):
22 | """Initialize a multi-layer perceptron with skip connection.
23 | Args:
24 | layer_dims: A list of integers representing the number of channels in each layer.
25 | skip_connection: A list of integers representing the index of layers to add skip connection.
26 | """
27 | super().__init__()
28 | self.skip_connection = skip_connection
29 | self.use_layernorm = use_layernorm
30 | self.linears = torch.nn.ModuleList()
31 | if use_layernorm:
32 | self.layer_norm = torch.nn.ModuleList()
33 | # Hidden layers
34 | layer_dim_pairs = list(zip(layer_dims[:-1], layer_dims[1:]))
35 | for li, (k_in, k_out) in enumerate(layer_dim_pairs):
36 | if li in self.skip_connection:
37 | k_in += layer_dims[0]
38 | linear = torch.nn.Linear(k_in, k_out)
39 | if geometric_init:
40 | self._geometric_init(linear, k_in, k_out, first=(li == 0),
41 | skip_dim=(layer_dims[0] if li in self.skip_connection else 0))
42 | if use_weightnorm:
43 | linear = torch.nn.utils.weight_norm(linear)
44 | self.linears.append(linear)
45 | if use_layernorm and li != len(layer_dim_pairs) - 1:
46 | self.layer_norm.append(torch.nn.LayerNorm(k_out))
47 | if li == len(layer_dim_pairs) - 1:
48 | self.linears[-1].bias.data.fill_(0.0)
49 | # SDF prediction layer
50 | self.linear_sdf = torch.nn.Linear(k_in, 1)
51 | if geometric_init:
52 | self._geometric_init_sdf(self.linear_sdf, k_in, out_bias=out_bias, invert=invert)
53 | self.activ = activ or torch_F.relu_
54 |
55 | def forward(self, input, with_sdf=True, with_feat=True):
56 | feat = input
57 | for li, linear in enumerate(self.linears):
58 | if li in self.skip_connection:
59 | feat = torch.cat([feat, input], dim=-1)
60 | if li != len(self.linears) - 1 or with_feat:
61 | feat_pre = linear(feat)
62 | if self.use_layernorm:
63 | feat_pre = self.layer_norm[li](feat_pre)
64 | feat_activ = self.activ(feat_pre)
65 | if li == len(self.linears) - 1:
66 | out = [self.linear_sdf(feat) if with_sdf else None,
67 | feat_activ if with_feat else None]
68 | feat = feat_activ
69 | return out
70 |
71 | def _geometric_init(self, linear, k_in, k_out, first=False, skip_dim=0):
72 | torch.nn.init.constant_(linear.bias, 0.0)
73 | torch.nn.init.normal_(linear.weight, 0.0, np.sqrt(2 / k_out))
74 | if first:
75 | torch.nn.init.constant_(linear.weight[:, 3:], 0.0) # positional encodings
76 | if skip_dim:
77 | torch.nn.init.constant_(linear.weight[:, -skip_dim:], 0.0) # skip connections
78 |
79 | def _geometric_init_sdf(self, linear, k_in, out_bias=0., invert=False):
80 | torch.nn.init.normal_(linear.weight, mean=np.sqrt(np.pi / k_in), std=0.0001)
81 | torch.nn.init.constant_(linear.bias, -out_bias)
82 | if invert:
83 | linear.weight.data *= -1
84 | linear.bias.data *= -1
85 |
--------------------------------------------------------------------------------
/projects/neuralangelo/utils/spherical_harmonics.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import torch
14 |
15 |
16 | SH_C0 = 0.28209479177387814
17 | SH_C1 = 0.4886025119029199
18 | SH_C2 = [
19 | 1.0925484305920792,
20 | -1.0925484305920792,
21 | 0.31539156525252005,
22 | -1.0925484305920792,
23 | 0.5462742152960396
24 | ]
25 | SH_C3 = [
26 | -0.5900435899266435,
27 | 2.890611442640554,
28 | -0.4570457994644658,
29 | 0.3731763325901154,
30 | -0.4570457994644658,
31 | 1.445305721320277,
32 | -0.5900435899266435
33 | ]
34 | SH_C4 = [
35 | 2.5033429417967046,
36 | -1.7701307697799304,
37 | 0.9461746957575601,
38 | -0.6690465435572892,
39 | 0.10578554691520431,
40 | -0.6690465435572892,
41 | 0.47308734787878004,
42 | -1.7701307697799304,
43 | 0.6258357354491761,
44 | ]
45 |
46 |
47 | def get_spherical_harmonics(dirs, levels):
48 | # Evaluate spherical harmonics bases at unit directions, without taking linear combination.
49 | vals = torch.empty((*dirs.shape[:-1], (levels + 1) ** 2), device=dirs.device)
50 | vals[..., 0] = SH_C0
51 | if levels >= 1:
52 | x, y, z = dirs.unbind(-1)
53 | vals[..., 1] = -SH_C1 * y
54 | vals[..., 2] = SH_C1 * z
55 | vals[..., 3] = -SH_C1 * x
56 | if levels >= 2:
57 | xx, yy, zz = x * x, y * y, z * z
58 | xy, yz, xz = x * y, y * z, x * z
59 | vals[..., 4] = SH_C2[0] * xy
60 | vals[..., 5] = SH_C2[1] * yz
61 | vals[..., 6] = SH_C2[2] * (2.0 * zz - xx - yy)
62 | vals[..., 7] = SH_C2[3] * xz
63 | vals[..., 8] = SH_C2[4] * (xx - yy)
64 | if levels >= 3:
65 | vals[..., 9] = SH_C3[0] * y * (3 * xx - yy)
66 | vals[..., 10] = SH_C3[1] * xy * z
67 | vals[..., 11] = SH_C3[2] * y * (4 * zz - xx - yy)
68 | vals[..., 12] = SH_C3[3] * z * (2 * zz - 3 * xx - 3 * yy)
69 | vals[..., 13] = SH_C3[4] * x * (4 * zz - xx - yy)
70 | vals[..., 14] = SH_C3[5] * z * (xx - yy)
71 | vals[..., 15] = SH_C3[6] * x * (xx - 3 * yy)
72 | if levels >= 4:
73 | vals[..., 16] = SH_C4[0] * xy * (xx - yy)
74 | vals[..., 17] = SH_C4[1] * yz * (3 * xx - yy)
75 | vals[..., 18] = SH_C4[2] * xy * (7 * zz - 1)
76 | vals[..., 19] = SH_C4[3] * yz * (7 * zz - 3)
77 | vals[..., 20] = SH_C4[4] * (zz * (35 * zz - 30) + 3)
78 | vals[..., 21] = SH_C4[5] * xz * (7 * zz - 3)
79 | vals[..., 22] = SH_C4[6] * (xx - yy) * (7 * zz - 1)
80 | vals[..., 23] = SH_C4[7] * xz * (xx - 3 * yy)
81 | vals[..., 24] = SH_C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy))
82 | if levels >= 5:
83 | raise NotImplementedError
84 | return vals
85 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | addict
2 | gdown
3 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
4 | gpustat
5 | icecream
6 | imageio-ffmpeg
7 | imutils
8 | ipdb
9 | k3d
10 | kornia
11 | lpips
12 | matplotlib
13 | mediapy
14 | nvidia-ml-py3
15 | open3d
16 | opencv-python-headless
17 | OpenEXR
18 | pathlib
19 | pillow
20 | plotly
21 | pyequilib
22 | pyexr
23 | PyMCubes
24 | pyquaternion
25 | pyyaml
26 | requests
27 | scikit-image
28 | scikit-video
29 | scipy
30 | seaborn
31 | tensorboard
32 | termcolor
33 | tqdm
34 | trimesh
35 | wandb
36 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | '''
2 | -----------------------------------------------------------------------------
3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
4 |
5 | NVIDIA CORPORATION and its licensors retain all intellectual property
6 | and proprietary rights in and to this software, related documentation
7 | and any modifications thereto. Any use, reproduction, disclosure or
8 | distribution of this software and related documentation without an express
9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 |
13 | import argparse
14 | import os
15 |
16 | import imaginaire.config
17 | from imaginaire.config import Config, recursive_update_strict, parse_cmdline_arguments
18 | from imaginaire.utils.cudnn import init_cudnn
19 | from imaginaire.utils.distributed import init_dist, get_world_size, master_only_print as print, is_master
20 | from imaginaire.utils.gpu_affinity import set_affinity
21 | from imaginaire.trainers.utils.logging import init_logging
22 | from imaginaire.trainers.utils.get_trainer import get_trainer
23 | from imaginaire.utils.set_random_seed import set_random_seed
24 |
25 |
26 | def parse_args():
27 | parser = argparse.ArgumentParser(description='Training')
28 | parser.add_argument('--config', help='Path to the training config file.', required=True)
29 | parser.add_argument('--logdir', help='Dir for saving logs and models.', default=None)
30 | parser.add_argument('--checkpoint', default=None, help='Checkpoint path.')
31 | parser.add_argument('--seed', type=int, default=0, help='Random seed.')
32 | parser.add_argument('--local_rank', type=int, default=os.getenv('LOCAL_RANK', 0))
33 | parser.add_argument('--single_gpu', action='store_true')
34 | parser.add_argument('--debug', action='store_true')
35 | parser.add_argument('--profile', action='store_true')
36 | parser.add_argument('--show_pbar', action='store_true')
37 | parser.add_argument('--wandb', action='store_true', help="Enable using Weights & Biases as the logger")
38 | parser.add_argument('--wandb_name', default='default', type=str)
39 | parser.add_argument('--resume', action='store_true')
40 | args, cfg_cmd = parser.parse_known_args()
41 | return args, cfg_cmd
42 |
43 |
44 | def main():
45 | args, cfg_cmd = parse_args()
46 | set_affinity(args.local_rank)
47 | cfg = Config(args.config)
48 |
49 | cfg_cmd = parse_cmdline_arguments(cfg_cmd)
50 | recursive_update_strict(cfg, cfg_cmd)
51 |
52 | # If args.single_gpu is set to True, we will disable distributed data parallel.
53 | if not args.single_gpu:
54 | # this disables nccl timeout
55 | os.environ["NCLL_BLOCKING_WAIT"] = "0"
56 | os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "0"
57 | cfg.local_rank = args.local_rank
58 | init_dist(cfg.local_rank, rank=-1, world_size=-1)
59 | print(f"Training with {get_world_size()} GPUs.")
60 |
61 | # set random seed by rank
62 | set_random_seed(args.seed, by_rank=True)
63 |
64 | # Global arguments.
65 | imaginaire.config.DEBUG = args.debug
66 |
67 | # Create log directory for storing training results.
68 | cfg.logdir = init_logging(args.config, args.logdir, makedir=True)
69 |
70 | # Print and save final config
71 | if is_master():
72 | cfg.print_config()
73 | cfg.save_config(cfg.logdir)
74 |
75 | # Initialize cudnn.
76 | init_cudnn(cfg.cudnn.deterministic, cfg.cudnn.benchmark)
77 |
78 | # Initialize data loaders and models.
79 | trainer = get_trainer(cfg, is_inference=False, seed=args.seed)
80 | trainer.set_data_loader(cfg, split="train")
81 | trainer.set_data_loader(cfg, split="val")
82 | trainer.checkpointer.load(args.checkpoint, args.resume, load_sch=True, load_opt=True)
83 |
84 | # Initialize Wandb.
85 | trainer.init_wandb(cfg,
86 | project=args.wandb_name,
87 | mode="disabled" if args.debug or not args.wandb else "online",
88 | resume=args.resume,
89 | use_group=True)
90 |
91 | trainer.mode = 'train'
92 | # Start training.
93 | trainer.train(cfg,
94 | trainer.train_data_loader,
95 | single_gpu=args.single_gpu,
96 | profile=args.profile,
97 | show_pbar=args.show_pbar)
98 |
99 | # Finalize training.
100 | trainer.finalize(cfg)
101 |
102 |
103 | if __name__ == "__main__":
104 | main()
105 |
--------------------------------------------------------------------------------