├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── DATA_PROCESSING.md
├── LICENSE.md
├── README.md
├── assets
    └── teaser.gif
├── docker
    ├── Dockerfile-colmap
    └── Dockerfile-neuralangelo
├── imaginaire
    ├── config.py
    ├── config_base.yaml
    ├── datasets
    │   ├── base.py
    │   └── utils
    │   │   ├── dataloader.py
    │   │   ├── get_dataloader.py
    │   │   └── sampler.py
    ├── models
    │   ├── base.py
    │   └── utils
    │   │   ├── init_weight.py
    │   │   └── model_average.py
    ├── trainers
    │   ├── base.py
    │   └── utils
    │   │   ├── get_trainer.py
    │   │   ├── logging.py
    │   │   └── meters.py
    └── utils
    │   ├── cudnn.py
    │   ├── distributed.py
    │   ├── gpu_affinity.py
    │   ├── misc.py
    │   ├── set_random_seed.py
    │   ├── termcolor.py
    │   └── visualization.py
├── neuralangelo.yaml
├── projects
    ├── nerf
    │   ├── configs
    │   │   ├── ingp_blender.yaml
    │   │   ├── nerf_blender.yaml
    │   │   └── nerf_llff.yaml
    │   ├── datasets
    │   │   ├── base.py
    │   │   ├── nerf_blender.py
    │   │   └── nerf_llff.py
    │   ├── models
    │   │   ├── ingp.py
    │   │   └── nerf.py
    │   ├── trainers
    │   │   ├── base.py
    │   │   └── nerf.py
    │   └── utils
    │   │   ├── camera.py
    │   │   ├── misc.py
    │   │   ├── nerf_util.py
    │   │   ├── render.py
    │   │   └── visualize.py
    └── neuralangelo
    │   ├── configs
    │       ├── base.yaml
    │       ├── custom
    │       │   └── template.yaml
    │       ├── dtu.yaml
    │       └── tnt.yaml
    │   ├── data.py
    │   ├── model.py
    │   ├── scripts
    │       ├── convert_data_to_json.py
    │       ├── convert_dtu_to_json.py
    │       ├── convert_tnt_to_json.py
    │       ├── extract_mesh.py
    │       ├── generate_config.py
    │       ├── preprocess.sh
    │       ├── preprocess_dtu.sh
    │       ├── preprocess_tnt.sh
    │       ├── run_colmap.sh
    │       ├── run_ffmpeg.sh
    │       ├── visualize_colmap.ipynb
    │       ├── visualize_mesh.ipynb
    │       └── visualize_transforms.ipynb
    │   ├── trainer.py
    │   └── utils
    │       ├── mesh.py
    │       ├── misc.py
    │       ├── mlp.py
    │       ├── modules.py
    │       └── spherical_harmonics.py
├── requirements.txt
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | checkpoints
  2 | 
  3 | # Other uncheckable file types
  4 | *.zip
  5 | *.exe
  6 | *.dll
  7 | *.swp
  8 | *.vscode
  9 | *.ipynb
 10 | *.DS_Store
 11 | *.pyc
 12 | 
 13 | # Credential information that should never be checked in
 14 | *.secret
 15 | 
 16 | # Data types
 17 | *.png
 18 | *.hdr
 19 | *.jpg
 20 | *.jpeg
 21 | *.pgm
 22 | *.tiff
 23 | *.tif
 24 | *.mp4
 25 | *.MOV
 26 | *.tar
 27 | *.tar.gz
 28 | *.pkl
 29 | *.pt
 30 | *.bin
 31 | *.ply
 32 | 
 33 | # log folder
 34 | logs/
 35 | 
 36 | # dataset folder
 37 | datasets/
 38 | /datasets/
 39 | 
 40 | # config folder
 41 | !projects/neuralangelo/configs/custom/template.yaml
 42 | projects/neuralangelo/configs/custom
 43 | 
 44 | # ------------------------ BELOW IS AUTO-GENERATED FOR PYTHON REPOS ------------------------
 45 | 
 46 | # Byte-compiled / optimized / DLL files
 47 | __pycache__/
 48 | *.py[cod]
 49 | *$py.class
 50 | 
 51 | # C extensions
 52 | *.so
 53 | 
 54 | # Distribution / packaging
 55 | .Python
 56 | build/
 57 | develop-eggs/
 58 | dist/
 59 | downloads/
 60 | eggs/
 61 | .eggs/
 62 | lib/
 63 | lib64/
 64 | parts/
 65 | sdist/
 66 | var/
 67 | wheels/
 68 | share/python-wheels/
 69 | *.egg-info/
 70 | .installed.cfg
 71 | *.egg
 72 | MANIFEST
 73 | 
 74 | # PyInstaller
 75 | #  Usually these files are written by a python script from a template
 76 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 77 | *.manifest
 78 | *.spec
 79 | 
 80 | # Installer logs
 81 | pip-log.txt
 82 | pip-delete-this-directory.txt
 83 | 
 84 | # Unit test / coverage reports
 85 | htmlcov/
 86 | .tox/
 87 | .nox/
 88 | .coverage
 89 | .coverage.*
 90 | .cache
 91 | nosetests.xml
 92 | coverage.xml
 93 | *.cover
 94 | *.py,cover
 95 | .hypothesis/
 96 | .pytest_cache/
 97 | cover/
 98 | 
 99 | # Translations
100 | *.mo
101 | *.pot
102 | 
103 | # Django stuff:
104 | *.log
105 | local_settings.py
106 | db.sqlite3
107 | db.sqlite3-journal
108 | 
109 | # Flask stuff:
110 | instance/
111 | .webassets-cache
112 | 
113 | # Scrapy stuff:
114 | .scrapy
115 | 
116 | # Sphinx documentation
117 | docs/_build/
118 | 
119 | # PyBuilder
120 | .pybuilder/
121 | target/
122 | 
123 | # Jupyter Notebook
124 | .ipynb_checkpoints
125 | 
126 | # IPython
127 | profile_default/
128 | ipython_config.py
129 | 
130 | # pyenv
131 | #   For a library or package, you might want to ignore these files since the code is
132 | #   intended to run in multiple environments; otherwise, check them in:
133 | # .python-version
134 | 
135 | # pipenv
136 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
137 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
138 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
139 | #   install all needed dependencies.
140 | #Pipfile.lock
141 | 
142 | # poetry
143 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
144 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
145 | #   commonly ignored for libraries.
146 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
147 | #poetry.lock
148 | 
149 | # pdm
150 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
151 | #pdm.lock
152 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
153 | #   in version control.
154 | #   https://pdm.fming.dev/#use-with-ide
155 | .pdm.toml
156 | 
157 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
158 | __pypackages__/
159 | 
160 | # Celery stuff
161 | celerybeat-schedule
162 | celerybeat.pid
163 | 
164 | # SageMath parsed files
165 | *.sage.py
166 | 
167 | # Environments
168 | .env
169 | .venv
170 | env/
171 | venv/
172 | ENV/
173 | env.bak/
174 | venv.bak/
175 | 
176 | # Spyder project settings
177 | .spyderproject
178 | .spyproject
179 | 
180 | # Rope project settings
181 | .ropeproject
182 | 
183 | # mkdocs documentation
184 | /site
185 | 
186 | # mypy
187 | .mypy_cache/
188 | .dmypy.json
189 | dmypy.json
190 | 
191 | # Pyre type checker
192 | .pyre/
193 | 
194 | # pytype static type analyzer
195 | .pytype/
196 | 
197 | # Cython debug symbols
198 | cython_debug/
199 | 
200 | # PyCharm
201 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
202 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
203 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
204 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
205 | #.idea/
206 | CLIP
207 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third_party/colmap"]
2 | 	path = third_party/colmap
3 | 	url = https://github.com/colmap/colmap.git
4 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pycqa/flake8
3 |   rev: 4.0.0
4 |   hooks:
5 |   - id: flake8
6 |     args: [--max-line-length=120]
7 |     exclude: third_party
8 | 


--------------------------------------------------------------------------------
/DATA_PROCESSING.md:
--------------------------------------------------------------------------------
  1 | # Data Preparation
  2 | 
  3 | *Note: please use respecting the license terms of each dataset. Each user is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.*
  4 | 
  5 | The following sections provide a guide on how to preprocess input videos for Neuralangelo.
  6 | 
  7 | ## Prerequisites
  8 | Initialize the COLMAP submodule:
  9 | ```bash
 10 | git submodule update --init --recursive
 11 | ```
 12 | 
 13 | ## Self-captured video sequence
 14 | To capture your own data, we recommend using a high shutter speed to avoid motion blur (which is very common when using a phone camera). We provide a synthetic [Lego sequence](https://drive.google.com/file/d/1yWoZ4Hk3FgmV3pd34ZbW7jEqgqyJgzHy/view?usp=drive_link) (from the original [NeRF](https://github.com/bmild/nerf)) as a toy example video for testing the workflow. There are two steps:
 15 | 1. [preprocessing](#preprocessing) the data and running COLMAP,
 16 | 2. [inspecting](#inspect-and-adjust-colmap-results) and refining the bounding sphere of interest for running Neuralangelo.
 17 | 
 18 | ### Preprocessing
 19 | First, set some environment variables:
 20 | ```bash
 21 | SEQUENCE=lego
 22 | PATH_TO_VIDEO=lego.mp4
 23 | DOWNSAMPLE_RATE=2
 24 | SCENE_TYPE=object
 25 | ```
 26 | where
 27 | - `SEQUENCE`: your custom name for the video sequence.
 28 | - `PATH_TO_VIDEO`: absolute/relative path to your video.
 29 | - `DOWNSAMPLE_RATE`: temporal downsampling rate of video sequence (for extracting video frames).
 30 | - `SCENE_TYPE`: can be one of ` {outdoor,indoor,object}`.
 31 | 
 32 | To preprocess your data, you can choose to either
 33 | 
 34 | - Run the following end-to-end script:
 35 |     ```bash
 36 |     bash projects/neuralangelo/scripts/preprocess.sh ${SEQUENCE} ${PATH_TO_VIDEO} ${DOWNSAMPLE_RATE} ${SCENE_TYPE}
 37 |     ```
 38 | 
 39 | - Or you can follow the steps below if you want more fine-grained control:
 40 | 
 41 |     1. Extract images from the input video
 42 | 
 43 |         ```bash
 44 |         bash projects/neuralangelo/scripts/run_ffmpeg.sh ${SEQUENCE} ${PATH_TO_VIDEO} ${DOWNSAMPLE_RATE}
 45 |         ```
 46 |         This will create a directory `datasets/{SEQUENCE}_ds{DOWNSAMPLE_RATE}` (set as `DATA_PATH` onwards), which stores all the processed data.
 47 |         The extracted images will be stored in `{DATA_PATH}/images_raw`.
 48 | 
 49 |     2. Run COLMAP
 50 | 
 51 |         ```bash
 52 |         DATA_PATH=datasets/${SEQUENCE}_ds${DOWNSAMPLE_RATE}
 53 |         bash projects/neuralangelo/scripts/run_colmap.sh ${DATA_PATH}
 54 |         ```
 55 |         `DATA_PATH`: path to processed data.
 56 | 
 57 |         After COLMAP finishes, the folder structure will look like following:
 58 |         ```
 59 |         DATA_PATH
 60 |         ├─ database.db      (COLMAP database)
 61 |         ├─ images           (undistorted input images)
 62 |         ├─ images_raw       (raw input images)
 63 |         ├─ sparse           (COLMAP data from SfM)
 64 |         │  ├─ cameras.bin   (camera parameters)
 65 |         │  ├─ images.bin    (images and camera poses)
 66 |         │  ├─ points3D.bin  (sparse point clouds)
 67 |         │  ├─ 0             (a directory containing individual SfM models. There could also be 1, 2... etc.)
 68 |         │  ...
 69 |         ├─ stereo (COLMAP data for MVS, not used here)
 70 |         ...
 71 |         ```
 72 |         `{DATA_PATH}/images` will be the input image observations for surface reconstruction.
 73 | 
 74 |     3. Generate JSON file for data loading
 75 | 
 76 |         In this step, we define the bounding region for reconstruction and convert the COLMAP data to JSON format following Instant NGP.
 77 |         It is strongly recommended to [inspect](#inspect-and-adjust-colmap-results) the results to verify and adjust the bounding region for improved performance.
 78 |         ```bash
 79 |         python3 projects/neuralangelo/scripts/convert_data_to_json.py --data_dir ${DATA_PATH} --scene_type ${SCENE_TYPE}
 80 |         ```
 81 |         The JSON file will be generated in `{DATA_PATH}/transforms.json`.
 82 | 
 83 |     4. Config files
 84 | 
 85 |         Use the following to configure and generate your config files:
 86 |         ```bash
 87 |         python3 projects/neuralangelo/scripts/generate_config.py --sequence_name ${SEQUENCE} --data_dir ${DATA_PATH} --scene_type ${SCENE_TYPE}
 88 |         ```
 89 |         The config file will be generated as `projects/neuralangelo/configs/custom/{SEQUENCE}.yaml`.
 90 |         You can add the `--help` flag to list all arguments; for example, consider adding `--auto_exposure_wb` for modeling varying lighting/appearances in the video.
 91 |         Alternatively, you can directly modify the hyperparameters in the generated config file.
 92 | 
 93 | ### Inspect and adjust COLMAP results
 94 | 
 95 | For certain cases, the camera poses estimated by COLMAP could be erroneous. In addition, the automated estimation of the bounding sphere could be inaccurate (which ideally should include the scene/object of interest). It is highly recommended that the bounding sphere is adjusted. 
 96 | We offer some tools to to inspect and adjust the pre-processing results. Below are some options:
 97 | 
 98 | - Blender: Download [Blender](https://www.blender.org/download/) and follow the instructions in our [add-on repo](https://github.com/mli0603/BlenderNeuralangelo). The add-on will save your adjustment of the bounding sphere.
 99 | - This [Jupyter notebook](projects/neuralangelo/scripts/visualize_colmap.ipynb) (using K3D) can be helpful for visualizing the COLMAP results. You can adjust the bounding sphere by manually specifying the refining sphere center and size in the `data.readjust` config.
100 | 
101 | For certain cases, an exhaustive feature matcher may be able to estimate more accurate camera poses.
102 | This could be done by changing `sequential_matcher` to `exhaustive_matcher` in [run_colmap.sh](https://github.com/NVlabs/neuralangelo/blob/main/projects/neuralangelo/scripts/run_colmap.sh#L24).
103 | However, this would take more time to process and could sometimes result in "broken trajectories" (from COLMAP failing due to ambiguous matches).
104 | For more details, please refer to the COLMAP [documentation](https://colmap.github.io/).
105 | 
106 | ## DTU dataset
107 | You can run the following command to download [the DTU dataset](https://roboimagedata.compute.dtu.dk/?page_id=36) that is preprocessed by NeuS authors and generate json files:
108 | ```bash
109 | PATH_TO_DTU=datasets/dtu  # Modify this to be the DTU dataset root directory.
110 | bash projects/neuralangelo/scripts/preprocess_dtu.sh ${PATH_TO_DTU}
111 | ```
112 | 
113 | ## Tanks and Temples dataset
114 | Download the data from [Tanks and Temples](https://tanksandtemples.org/download/) website.
115 | You will also need to download additional [COLMAP/camera/alignment](https://drive.google.com/file/d/1jAr3IDvhVmmYeDWi0D_JfgiHcl70rzVE/view?resourcekey=) and the images of each scene.  
116 | The file structure should look like (you need to move the downloaded images to folder `images_raw`):
117 | ```
118 | tanks_and_temples
119 | ├─ Barn
120 | │  ├─ Barn_COLMAP_SfM.log   (camera poses)
121 | │  ├─ Barn.json             (cropfiles)
122 | │  ├─ Barn.ply              (ground-truth point cloud)
123 | │  ├─ Barn_trans.txt        (colmap-to-ground-truth transformation)
124 | │  └─ images_raw            (raw input images downloaded from Tanks and Temples website)
125 | │     ├─ 000001.png
126 | │     ├─ 000002.png
127 | │     ...
128 | ├─ Caterpillar
129 | │  ├─ ...
130 | ...
131 | ```
132 | Run the following command to generate json files:
133 | ```bash
134 | PATH_TO_TNT=datasets/tanks_and_temples  # Modify this to be the Tanks and Temples root directory.
135 | bash projects/neuralangelo/scripts/preprocess_tnt.sh ${PATH_TO_TNT}
136 | ```
137 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # NVIDIA Source Code License for Neuralangelo
 2 | 
 3 | ## 1. Definitions 
 4 |  
 5 | - “Licensor” means any person or entity that distributes its Work. 
 6 | 
 7 | -  “Software” means the original work of authorship made available under this License. 
 8 | 
 9 | -  “Work” means the Software and any additions to or derivative works of the Software that are made available under this License. 
10 | 
11 | -  “NVIDIA Processors” means any central processing unit (CPU), graphics processing unit (GPU), field-programmable gate array (FPGA), application-specific integrated circuit (ASIC) or any combination thereof designed, made, sold, or provided by NVIDIA or its affiliates. 
12 | 
13 | -  The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. 
14 | 
15 | -  Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 
16 | 
17 | ## 2. License Grant 
18 | 
19 | ### 2.1 Copyright Grant. 
20 | 
21 | Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
22 | 
23 | ## 3. Limitations
24 | 
25 | ### 3.1 Redistribution. 
26 | 
27 | You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work.
28 | 
29 | ### 3.2 Derivative Works.
30 | 
31 | You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself.
32 |  
33 | ### 3.3 Use Limitation.
34 | 
35 | The Work and any derivative works thereof only may be used or intended for use non-commercially and with NVIDIA Processors. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
36 | 
37 | ### 3.4 Patent Claims.
38 | 
39 | If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately.
40 | 
41 | ### 3.5 Trademarks.
42 | 
43 | This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License.
44 | 
45 | ### 3.6 Termination.
46 | 
47 | If you violate any term of this License, then your rights under this License (including the grant in Section 2.1) will terminate immediately.
48 | 
49 | ## 4. Disclaimer of Warranty. 
50 | 
51 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.  
52 | 
53 | ## 5. Limitation of Liability. 
54 | 
55 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Neuralangelo
  2 | This is the official implementation of **Neuralangelo: High-Fidelity Neural Surface Reconstruction**.
  3 | 
  4 | [Zhaoshuo Li](https://mli0603.github.io/),
  5 | [Thomas Müller](https://tom94.net/),
  6 | [Alex Evans](https://research.nvidia.com/person/alex-evans),
  7 | [Russell H. Taylor](https://www.cs.jhu.edu/~rht/),
  8 | [Mathias Unberath](https://mathiasunberath.github.io/),
  9 | [Ming-Yu Liu](https://mingyuliu.net/),
 10 | [Chen-Hsuan Lin](https://chenhsuanlin.bitbucket.io/)  
 11 | IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2023
 12 | 
 13 | ### [Project page](https://research.nvidia.com/labs/dir/neuralangelo/) | [Paper](https://arxiv.org/abs/2306.03092/) | [Colab notebook](https://colab.research.google.com/drive/13u8DX9BNzQwiyPPCB7_4DbSxiQ5-_nGF)
 14 | 
 15 | <img src="assets/teaser.gif">
 16 | 
 17 | The code is built upon the Imaginaire library from the Deep Imagination Research Group at NVIDIA.  
 18 | For business inquiries, please submit the [NVIDIA research licensing form](https://www.nvidia.com/en-us/research/inquiries/).
 19 | 
 20 | --------------------------------------
 21 | 
 22 | ## Installation
 23 | We offer two ways to setup the environment:
 24 | 1. We provide prebuilt Docker images, where
 25 |     - `docker.io/chenhsuanlin/colmap:3.8` is for running COLMAP and the data preprocessing scripts. This includes the prebuilt COLMAP library (CUDA-supported).
 26 |     - `docker.io/chenhsuanlin/neuralangelo:23.04-py3` is for running the main Neuralangelo pipeline.
 27 | 
 28 |     The corresponding Dockerfiles can be found in the `docker` directory.
 29 | 2. The conda environment for Neuralangelo. Install the dependencies and activate the environment `neuralangelo` with
 30 |     ```bash
 31 |     conda env create --file neuralangelo.yaml
 32 |     conda activate neuralangelo
 33 |     ```
 34 | For COLMAP, alternative installation options are also available on the [COLMAP website](https://colmap.github.io/).
 35 | 
 36 | --------------------------------------
 37 | 
 38 | ## Data preparation
 39 | Please refer to [Data Preparation](DATA_PROCESSING.md) for step-by-step instructions.  
 40 | We assume known camera poses for each extracted frame from the video.
 41 | The code uses the same json format as [Instant NGP](https://github.com/NVlabs/instant-ngp).
 42 | 
 43 | --------------------------------------
 44 | 
 45 | ## Run Neuralangelo!
 46 | ```bash
 47 | EXPERIMENT=toy_example
 48 | GROUP=example_group
 49 | NAME=example_name
 50 | CONFIG=projects/neuralangelo/configs/custom/${EXPERIMENT}.yaml
 51 | GPUS=1  # use >1 for multi-GPU training!
 52 | torchrun --nproc_per_node=${GPUS} train.py \
 53 |     --logdir=logs/${GROUP}/${NAME} \
 54 |     --config=${CONFIG} \
 55 |     --show_pbar
 56 | ```
 57 | Some useful notes:
 58 | - This codebase supports logging with [Weights & Biases](https://wandb.ai/site). You should have a W&B account for this.
 59 |     - Add `--wandb` to the command line argument to enable W&B logging.
 60 |     - Add `--wandb_name` to specify the W&B project name.
 61 |     - More detailed control can be found in the `init_wandb()` function in `imaginaire/trainers/base.py`.
 62 | - Configs can be overridden through the command line (e.g. `--optim.params.lr=1e-2`).
 63 | - Set `--checkpoint={CHECKPOINT_PATH}` to initialize with a certain checkpoint; set `--resume` to resume training.
 64 | - If appearance embeddings are enabled, make sure `data.num_images` is set to the number of training images.
 65 | 
 66 | --------------------------------------
 67 | 
 68 | ## Isosurface extraction
 69 | Use the following command to run isosurface mesh extraction:
 70 | ```bash
 71 | CHECKPOINT=logs/${GROUP}/${NAME}/xxx.pt
 72 | OUTPUT_MESH=xxx.ply
 73 | CONFIG=logs/${GROUP}/${NAME}/config.yaml
 74 | RESOLUTION=2048
 75 | BLOCK_RES=128
 76 | GPUS=1  # use >1 for multi-GPU mesh extraction
 77 | torchrun --nproc_per_node=${GPUS} projects/neuralangelo/scripts/extract_mesh.py \
 78 |     --config=${CONFIG} \
 79 |     --checkpoint=${CHECKPOINT} \
 80 |     --output_file=${OUTPUT_MESH} \
 81 |     --resolution=${RESOLUTION} \
 82 |     --block_res=${BLOCK_RES}
 83 | ```
 84 | Some useful notes:
 85 | - Add `--textured` to extract meshes with textures.
 86 | - Add `--keep_lcc` to remove noises. May also remove thin structures.
 87 | - Lower `BLOCK_RES` to reduce GPU memory usage.
 88 | - Lower `RESOLUTION` to reduce mesh size.
 89 | 
 90 | --------------------------------------
 91 | 
 92 | ## Frequently asked questions (FAQ)
 93 | 1. **Q:** CUDA out of memory. How do I decrease the memory footprint?  
 94 |     **A:** Neuralangelo requires at least 24GB GPU memory with our default configuration. If you run out of memory, consider adjusting the following hyperparameters under `model.object.sdf.encoding.hashgrid` (with suggested values):
 95 | 
 96 |     | GPU VRAM      | Hyperparameter          |
 97 |     | :-----------: | :---------------------: |
 98 |     | 8GB           | `dict_size=20`, `dim=4` |
 99 |     | 12GB          | `dict_size=21`, `dim=4` |
100 |     | 16GB          | `dict_size=21`, `dim=8` |
101 | 
102 |     Please note that the above hyperparameter adjustment may sacrifice the reconstruction quality.
103 | 
104 |    If Neuralangelo runs fine during training but CUDA out of memory during evaluation, consider adjusting the evaluation parameters under `data.val`, including setting smaller `image_size` (e.g., maximum resolution 200x200), and setting `batch_size=1`, `subset=1`.
105 | 
106 | 2. **Q:** The reconstruction of my custom dataset is bad. What can I do?  
107 |     **A:** It is worth looking into the following:
108 |     - The camera poses recovered by COLMAP may be off. We have implemented tools (using [Blender](https://github.com/mli0603/BlenderNeuralangelo) or [Jupyter notebook](projects/neuralangelo/scripts/visualize_colmap.ipynb)) to inspect the COLMAP results.
109 |     - The computed bounding regions may be off and/or too small/large. Please refer to [data preprocessing](DATA_PROCESSING.md) on how to adjust the bounding regions manually.
110 |     - The video capture sequence may contain significant motion blur or out-of-focus frames. Higher shutter speed (reducing motion blur) and smaller aperture (increasing focus range) are very helpful.
111 | 
112 | --------------------------------------
113 | 
114 | ## Citation
115 | If you find our code useful for your research, please cite
116 | ```
117 | @inproceedings{li2023neuralangelo,
118 |   title={Neuralangelo: High-Fidelity Neural Surface Reconstruction},
119 |   author={Li, Zhaoshuo and M\"uller, Thomas and Evans, Alex and Taylor, Russell H and Unberath, Mathias and Liu, Ming-Yu and Lin, Chen-Hsuan},
120 |   booktitle={IEEE Conference on Computer Vision and Pattern Recognition ({CVPR})},
121 |   year={2023}
122 | }
123 | ```
124 | 


--------------------------------------------------------------------------------
/assets/teaser.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/neuralangelo/94390b64683c067c620d9e075224ccfe582647d0/assets/teaser.gif


--------------------------------------------------------------------------------
/docker/Dockerfile-colmap:
--------------------------------------------------------------------------------
 1 | # docker build -f docker/Dockerfile-colmap -t chenhsuanlin/colmap:3.8 .
 2 | # docker push chenhsuanlin/colmap:3.8
 3 | 
 4 | FROM nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04
 5 | ARG DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | # colmap dependencies
 8 | RUN apt-get update && apt-get install -y \
 9 |     git \
10 |     cmake \
11 |     ninja-build \
12 |     build-essential \
13 |     libboost-program-options-dev \
14 |     libboost-filesystem-dev \
15 |     libboost-graph-dev \
16 |     libboost-system-dev \
17 |     libboost-test-dev \
18 |     libeigen3-dev \
19 |     libflann-dev \
20 |     libfreeimage-dev \
21 |     libmetis-dev \
22 |     libgoogle-glog-dev \
23 |     libgflags-dev \
24 |     libsqlite3-dev \
25 |     libglew-dev \
26 |     qtbase5-dev \
27 |     libqt5opengl5-dev \
28 |     libcgal-dev \
29 |     libceres-dev
30 | # headless servers
31 | RUN apt-get update && apt-get install -y \
32 |     xvfb
33 | # Colmap
34 | RUN git clone https://github.com/colmap/colmap.git && cd colmap && git checkout 3.8
35 | RUN cd colmap && mkdir build && cd build && cmake .. -DCUDA_ENABLED=ON -DCMAKE_CUDA_ARCHITECTURES="70;72;75;80;86" -GNinja
36 | RUN cd colmap/build && ninja && ninja install
37 | 
38 | # additional python packages
39 | RUN apt-get update && apt-get install -y \
40 |     pip \
41 |     ffmpeg
42 | RUN pip install \
43 |     addict \
44 |     k3d \
45 |     opencv-python-headless \
46 |     pillow \
47 |     plotly \
48 |     pyyaml \
49 |     trimesh
50 | 


--------------------------------------------------------------------------------
/docker/Dockerfile-neuralangelo:
--------------------------------------------------------------------------------
 1 | # docker build -f docker/Dockerfile-neuralangelo -t chenhsuanlin/neuralangelo:23.04-py3 .
 2 | # docker push chenhsuanlin/neuralangelo:23.04-py3
 3 | 
 4 | FROM nvcr.io/nvidia/pytorch:23.04-py3
 5 | ARG DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | # Install basics
 8 | RUN apt-get update && apt-get install -y --no-install-recommends \
 9 |     build-essential \
10 |     bzip2 \
11 |     ca-certificates \
12 |     cmake \
13 |     curl \
14 |     ffmpeg \
15 |     g++ \
16 |     git \
17 |     libx264-dev \
18 |     tmux \
19 |     wget
20 | 
21 | # Update pip
22 | RUN pip install --upgrade pip
23 | 
24 | # Code formatting
25 | RUN pip install --upgrade \
26 |     flake8 \
27 |     pre-commit
28 | 
29 | # Install base Python libraries for Imaginaire
30 | COPY requirements.txt requirements.txt
31 | ARG FORCE_CUDA=1
32 | ARG TCNN_CUDA_ARCHITECTURES=70,72,75,80,86
33 | RUN pip install --upgrade -r requirements.txt
34 | 


--------------------------------------------------------------------------------
/imaginaire/config_base.yaml:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  5 | # and proprietary rights in and to this software, related documentation
  6 | # and any modifications thereto. Any use, reproduction, disclosure or
  7 | # distribution of this software and related documentation without an express
  8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  9 | # -----------------------------------------------------------------------------
 10 | 
 11 | # This is the base configuration file.
 12 | 
 13 | # We often dump images to understand what's going on in the training.
 14 | # image_save_iter specifies how often we dump images.
 15 | image_save_iter: 9999999999
 16 | # metrics_iter and metrics_epoch specify how often we compute the performance metrics
 17 | # If these two numbers are not set, they are copied from checkpoint.save_iter and checkpoint.save_epoch respectively.
 18 | metrics_iter:
 19 | metrics_epoch:
 20 | # max_epoch and max_iter specify what is the maximum epoch and iteration that we will train our model.
 21 | # min( max_epoch * dataset_size / batch_size, max_iter) will be the total number of iterations that the model will be trained.
 22 | max_epoch: 9999999999
 23 | max_iter: 9999999999
 24 | # logging_iter controls how often we log the training stats.
 25 | logging_iter: 100
 26 | # If speed_benchmark is True, we will print out time required for forward, backward, and gradient update.
 27 | speed_benchmark: False
 28 | # Kill the process if `timeout_period` seconds have passed since the last iteration. This usually means the process gets stuck.
 29 | timeout_period: 9999999
 30 | 
 31 | # Default local rank
 32 | local_rank: 0
 33 | # Toggle NVTX profiler
 34 | nvtx_profile: False
 35 | 
 36 | # Checkpointer
 37 | checkpoint:
 38 |     # If save_iter is set to M, then we save the checkpoint every M iteration.
 39 |     # If save_latest_iter is set to M, then we save the checkpoint every M iteration using the name
 40 |     # 'latest_checkpoint.pt', so that the new checkpoint will overwrite previous ones.
 41 |     # If save_epoch is set to N, then we save the checkpoint every N epoch.
 42 |     # Both can be set at the same time.
 43 |     save_iter: 9999999999
 44 |     save_latest_iter: 9999999999
 45 |     save_epoch: 9999999999
 46 |     save_period: 9999999999
 47 |     # If True, load state_dict to the models in strict mode
 48 |     strict_resume: True
 49 | 
 50 | # Trainer
 51 | trainer:
 52 |     ema_config:
 53 |         enabled: False
 54 |         beta: 0.9999
 55 |         start_iteration: 0
 56 | 
 57 |     image_to_tensorboard: False
 58 |     ddp_config:
 59 |         find_unused_parameters: False
 60 |         static_graph: True
 61 |     init:
 62 |         type: none
 63 |         gain:
 64 |     amp_config:
 65 |         init_scale: 65536.0
 66 |         growth_factor: 2.0
 67 |         backoff_factor: 0.5
 68 |         growth_interval: 2000
 69 |         enabled: False
 70 |     grad_accum_iter: 1
 71 | 
 72 | # Networks
 73 | model:
 74 |     type: dummy
 75 | 
 76 | # Optimizers
 77 | optim:
 78 |     type: Adam
 79 |     params:
 80 |         # This defines the parameters for the specified PyTorch optimizer class (e.g. betas, eps).
 81 |         lr: 0.0001
 82 |     fused_opt: False
 83 |     # Default learning rate policy is step with iteration_mode=False (epoch mode), step_size=10^10, and gamma=1.
 84 |     # This means a constant learning rate
 85 |     sched:
 86 |         iteration_mode: False
 87 |         type: step
 88 |         step_size: 9999999999
 89 |         gamma: 1
 90 | 
 91 | # Data
 92 | data:
 93 |     name: dummy
 94 |     type: imaginaire.datasets.images
 95 |     use_multi_epoch_loader: False
 96 |     num_workers: 0
 97 | test_data:
 98 |     name: dummy
 99 |     type: imaginaire.datasets.images
100 |     num_workers: 0
101 |     test:
102 |         is_lmdb: False
103 |         roots:
104 |         batch_size: 1
105 | 
106 | # cuDNN
107 | # set deterministic to True for better reproducibility of the results. When deterministic is True, it will only use CUDNN functions that are deterministic.
108 | # If benchmark is set to True, cudnn will benchmark several algorithms and pick that which it found to be fastest at the first iteration.
109 | cudnn:
110 |     deterministic: False
111 |     benchmark: True
112 | 
113 | # Others
114 | pretrained_weight:
115 | inference_args: {}
116 | 


--------------------------------------------------------------------------------
/imaginaire/datasets/utils/dataloader.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import torch
14 | 
15 | 
16 | class MultiEpochsDataLoader(torch.utils.data.DataLoader):
17 |     """
18 |     Relentlessly sample from the dataset.
19 |     This eliminates the overhead of prefetching data before each epoch.
20 |     https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/loader.py
21 |     """
22 | 
23 |     def __init__(self, *args, **kwargs):
24 |         super().__init__(*args, **kwargs)
25 |         self._DataLoader__initialized = False
26 |         self.batch_sampler = _RepeatSampler(self.batch_sampler)
27 |         self._DataLoader__initialized = True
28 |         self.iterator = super().__iter__()
29 | 
30 |     def __len__(self):
31 |         return len(self.batch_sampler.sampler)
32 | 
33 |     def __iter__(self):
34 |         for i in range(len(self)):
35 |             yield next(self.iterator)
36 | 
37 | 
38 | class _RepeatSampler(object):
39 |     """ Sampler that repeats forever.
40 |     Args:
41 |         sampler (Sampler)
42 |     """
43 | 
44 |     def __init__(self, sampler):
45 |         self.sampler = sampler
46 | 
47 |     def __iter__(self):
48 |         while True:
49 |             yield from iter(self.sampler)
50 | 


--------------------------------------------------------------------------------
/imaginaire/datasets/utils/get_dataloader.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import importlib
 14 | 
 15 | import torch
 16 | import torch.distributed as dist
 17 | 
 18 | from imaginaire.utils.distributed import master_only_print as print
 19 | 
 20 | from imaginaire.datasets.utils.sampler import DistributedSamplerPreemptable
 21 | from imaginaire.datasets.utils.dataloader import MultiEpochsDataLoader
 22 | 
 23 | 
 24 | def _get_train_dataset_objects(cfg, subset_indices=None):
 25 |     r"""Return dataset objects for the training set.
 26 |     Args:
 27 |         cfg (obj): Global configuration file.
 28 |         subset_indices (sequence): Indices of the subset to use.
 29 | 
 30 |     Returns:
 31 |         train_dataset (obj): PyTorch training dataset object.
 32 |     """
 33 |     dataset_module = importlib.import_module(cfg.data.type)
 34 |     train_dataset = dataset_module.Dataset(cfg, is_inference=False)
 35 |     if subset_indices is not None:
 36 |         train_dataset = torch.utils.data.Subset(train_dataset, subset_indices)
 37 |     print('Train dataset length:', len(train_dataset))
 38 |     return train_dataset
 39 | 
 40 | 
 41 | def _get_val_dataset_objects(cfg, subset_indices=None):
 42 |     r"""Return dataset objects for the validation set.
 43 |     Args:
 44 |         cfg (obj): Global configuration file.
 45 |         subset_indices (sequence): Indices of the subset to use.
 46 |     Returns:
 47 |         val_dataset (obj): PyTorch validation dataset object.
 48 |     """
 49 |     dataset_module = importlib.import_module(cfg.data.type)
 50 |     if hasattr(cfg.data.val, 'type'):
 51 |         for key in ['type', 'input_types', 'input_image']:
 52 |             setattr(cfg.data, key, getattr(cfg.data.val, key))
 53 |         dataset_module = importlib.import_module(cfg.data.type)
 54 |     val_dataset = dataset_module.Dataset(cfg, is_inference=True)
 55 | 
 56 |     if subset_indices is not None:
 57 |         val_dataset = torch.utils.data.Subset(val_dataset, subset_indices)
 58 |     print('Val dataset length:', len(val_dataset))
 59 |     return val_dataset
 60 | 
 61 | 
 62 | def _get_test_dataset_object(cfg, subset_indices=None):
 63 |     r"""Return dataset object for the test set
 64 | 
 65 |     Args:
 66 |         cfg (obj): Global configuration file.
 67 |         subset_indices (sequence): Indices of the subset to use.
 68 |     Returns:
 69 |         (obj): PyTorch dataset object.
 70 |     """
 71 |     dataset_module = importlib.import_module(cfg.test_data.type)
 72 |     test_dataset = dataset_module.Dataset(cfg, is_inference=True, is_test=True)
 73 |     if subset_indices is not None:
 74 |         test_dataset = torch.utils.data.Subset(test_dataset, subset_indices)
 75 |     return test_dataset
 76 | 
 77 | 
 78 | def _get_data_loader(cfg, dataset, batch_size, not_distributed=False,
 79 |                      shuffle=True, drop_last=True, seed=0, use_multi_epoch_loader=False,
 80 |                      preemptable=False):
 81 |     r"""Return data loader .
 82 | 
 83 |     Args:
 84 |         cfg (obj): Global configuration file.
 85 |         dataset (obj): PyTorch dataset object.
 86 |         batch_size (int): Batch size.
 87 |         not_distributed (bool): Do not use distributed samplers.
 88 |         shuffle (bool): Whether to shuffle the data
 89 |         drop_last (bool): Whether to drop the last batch is the number of samples is smaller than the batch size
 90 |         seed (int): random seed.
 91 |         preemptable (bool): Whether to handle preemptions.
 92 |     Return:
 93 |         (obj): Data loader.
 94 |     """
 95 |     not_distributed = not_distributed or not dist.is_initialized()
 96 |     if not_distributed:
 97 |         sampler = None
 98 |     else:
 99 |         if preemptable:
100 |             sampler = DistributedSamplerPreemptable(dataset, shuffle=shuffle, seed=seed)
101 |         else:
102 |             sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=shuffle, seed=seed)
103 |     num_workers = getattr(cfg.data, 'num_workers', 8)
104 |     persistent_workers = getattr(cfg.data, 'persistent_workers', False)
105 |     data_loader = (MultiEpochsDataLoader if use_multi_epoch_loader else torch.utils.data.DataLoader)(
106 |         dataset,
107 |         batch_size=batch_size,
108 |         shuffle=shuffle and (sampler is None),
109 |         sampler=sampler,
110 |         pin_memory=True,
111 |         num_workers=num_workers,
112 |         drop_last=drop_last,
113 |         persistent_workers=persistent_workers if num_workers > 0 else False
114 |     )
115 |     return data_loader
116 | 
117 | 
118 | def get_train_dataloader(
119 |         cfg, shuffle=True, drop_last=True, subset_indices=None, seed=0, preemptable=False):
120 |     r"""Return dataset objects for the training and validation sets.
121 |     Args:
122 |         cfg (obj): Global configuration file.
123 |         shuffle (bool): Whether to shuffle the data
124 |         drop_last (bool): Whether to drop the last batch is the number of samples is smaller than the batch size
125 |         subset_indices (sequence): Indices of the subset to use.
126 |         seed (int): random seed.
127 |         preemptable (bool): Flag for preemption handling
128 |     Returns:
129 |         train_data_loader (obj): Train data loader.
130 |     """
131 |     train_dataset = _get_train_dataset_objects(cfg, subset_indices=subset_indices)
132 |     train_data_loader = _get_data_loader(
133 |         cfg, train_dataset, cfg.data.train.batch_size, not_distributed=False,
134 |         shuffle=shuffle, drop_last=drop_last, seed=seed,
135 |         use_multi_epoch_loader=cfg.data.use_multi_epoch_loader,
136 |         preemptable=preemptable
137 |     )
138 |     return train_data_loader
139 | 
140 | 
141 | def get_val_dataloader(cfg, subset_indices=None, seed=0):
142 |     r"""Return dataset objects for the training and validation sets.
143 |     Args:
144 |         cfg (obj): Global configuration file.
145 |         subset_indices (sequence): Indices of the subset to use.
146 |         seed (int): random seed.
147 |     Returns:
148 |         val_data_loader (obj): Val data loader.
149 |     """
150 |     val_dataset = _get_val_dataset_objects(cfg, subset_indices=subset_indices)
151 |     not_distributed = getattr(cfg.data, 'val_data_loader_not_distributed', False)
152 |     # We often use a folder of images to represent a video. As doing evaluation, we like the images to preserve the
153 |     # original order. As a result, we do not want to distribute images from the same video to different GPUs.
154 |     not_distributed = 'video' in cfg.data.type or not_distributed
155 |     drop_last = getattr(cfg.data.val, 'drop_last', False)
156 |     # Validation loader need not have preemption handling.
157 |     val_data_loader = _get_data_loader(
158 |         cfg, val_dataset, cfg.data.val.batch_size, not_distributed=not_distributed,
159 |         shuffle=False, drop_last=drop_last, seed=seed,
160 |         preemptable=False
161 |     )
162 |     return val_data_loader
163 | 
164 | 
165 | def get_test_dataloader(cfg, subset_indices=None):
166 |     r"""Return dataset objects for testing
167 | 
168 |     Args:
169 |         cfg (obj): Global configuration file.
170 |         subset_indices (sequence): Indices of the subset to use.
171 |     Returns:
172 |         (obj): Test data loader. It may not contain the ground truth.
173 |     """
174 |     test_dataset = _get_test_dataset_object(cfg, subset_indices=subset_indices)
175 |     not_distributed = getattr(
176 |         cfg.test_data, 'val_data_loader_not_distributed', False)
177 |     not_distributed = 'video' in cfg.test_data.type or not_distributed
178 |     test_data_loader = _get_data_loader(
179 |         cfg, test_dataset, cfg.test_data.test.batch_size, not_distributed=not_distributed,
180 |         shuffle=False)
181 |     return test_data_loader
182 | 


--------------------------------------------------------------------------------
/imaginaire/datasets/utils/sampler.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import math
 14 | import torch.distributed as dist
 15 | import torch
 16 | 
 17 | from torch.utils.data import Sampler
 18 | from typing import TypeVar
 19 | 
 20 | T_co = TypeVar('T_co', covariant=True)
 21 | 
 22 | 
 23 | class DistributedSamplerPreemptable(Sampler[T_co]):
 24 |     r"""Sampler that supports loading from an iteration.
 25 |     This is very useful for preemptable jobs.
 26 | 
 27 |     Args:
 28 |         dataset (torch.utils.data.Dataset): Dataset object
 29 |         num_replicas (int): Number of replicas to the distribute the dataloader over.
 30 |             This is typically the world size in DDP jobs.
 31 |         rank (int): Rank of the current process.
 32 |         shuffle (bool): Whether to shuffle the dataloader in each epoch.
 33 |         seed (int): Random seed used for shuffling the dataloader.
 34 |         drop_last (bool): Whether to drop the last batch.
 35 |     """
 36 | 
 37 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True,
 38 |                  seed=0, drop_last=False):
 39 | 
 40 |         if num_replicas is None:
 41 |             if not dist.is_available():
 42 |                 raise RuntimeError("Requires distributed package to be available")
 43 |             num_replicas = dist.get_world_size()
 44 |         if rank is None:
 45 |             if not dist.is_available():
 46 |                 raise RuntimeError("Requires distributed package to be available")
 47 |             rank = dist.get_rank()
 48 |         if rank >= num_replicas or rank < 0:
 49 |             raise ValueError(
 50 |                 "Invalid rank {}, rank should be in the interval"
 51 |                 " [0, {}]".format(rank, num_replicas - 1))
 52 |         self.dataset = dataset
 53 |         self.num_replicas = num_replicas
 54 |         self.rank = rank
 55 |         self.epoch = 0
 56 | 
 57 |         # start_index is the index to begin the dataloader from.
 58 |         self.start_index = 0
 59 | 
 60 |         self.drop_last = drop_last
 61 |         # If the dataset length is evenly divisible by # of replicas, then there
 62 |         # is no need to drop any data, since the dataset will be split equally.
 63 |         if self.drop_last and len(self.dataset) % self.num_replicas != 0:  # type: ignore[arg-type]
 64 |             # Split to nearest available length that is evenly divisible.
 65 |             # This is to ensure each rank receives the same amount of data when
 66 |             # using this Sampler.
 67 |             self.num_samples = math.ceil(
 68 |                 (len(self.dataset) - self.num_replicas) / self.num_replicas  # type: ignore[arg-type]
 69 |             )
 70 |         else:
 71 |             self.num_samples = math.ceil(len(self.dataset) / self.num_replicas)  # type: ignore[arg-type]
 72 |         self.total_size = self.num_samples * self.num_replicas
 73 |         self.shuffle = shuffle
 74 |         self.seed = seed
 75 | 
 76 |     def __iter__(self):
 77 |         if self.shuffle:
 78 |             # deterministically shuffle based on epoch and seed
 79 |             g = torch.Model()
 80 |             g.manual_seed(self.seed + self.epoch)
 81 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()  # type: ignore[arg-type]
 82 |         else:
 83 |             indices = list(range(len(self.dataset)))  # type: ignore[arg-type]
 84 | 
 85 |         if not self.drop_last:
 86 |             # add extra samples to make it evenly divisible
 87 |             padding_size = self.total_size - len(indices)
 88 |             if padding_size <= len(indices):
 89 |                 indices += indices[:padding_size]
 90 |             else:
 91 |                 indices += (indices * math.ceil(padding_size / len(indices)))[:padding_size]
 92 |         else:
 93 |             # remove tail of data to make it evenly divisible.
 94 |             indices = indices[:self.total_size]
 95 |         assert len(indices) == self.total_size
 96 | 
 97 |         # subsample
 98 |         indices = indices[self.rank:self.total_size:self.num_replicas]
 99 |         assert len(indices) == self.num_samples
100 | 
101 |         # assert self.start_index < len(indices)
102 |         if self.start_index >= len(indices):
103 |             print('(Warning): Start index is less than len of dataloader. Goint to the last batch of dataset instead')
104 |             # This is hardcoded to go one batch before.
105 |             self.start_index = len(indices) - 64
106 |         indices = indices[self.start_index:]
107 | 
108 |         return iter(indices)
109 | 
110 |     def __len__(self):
111 |         return self.num_samples
112 | 
113 |     def set_epoch(self, epoch):
114 |         self.epoch = epoch
115 | 
116 |     def set_iteration(self, start_index):
117 |         self.start_index = start_index
118 | 


--------------------------------------------------------------------------------
/imaginaire/models/base.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import torch
14 | 
15 | 
16 | class Model(torch.nn.Module):
17 | 
18 |     def __init__(self, cfg_model, cfg_data):
19 |         super().__init__()
20 | 
21 |     def get_param_groups(self, cfg_optim):
22 |         """Allow the network to use different hyperparameters (e.g., learning rate) for different parameters.
23 |         Returns:
24 |             PyTorch parameter group (list or generator). See the PyTorch documentation for details.
25 |         """
26 |         return self.parameters()
27 | 
28 |     def device(self):
29 |         """Return device on which model resides."""
30 |         return next(self.parameters()).device
31 | 


--------------------------------------------------------------------------------
/imaginaire/models/utils/init_weight.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import torch
14 | from torch.nn import init
15 | 
16 | 
17 | def weights_init(init_type, gain, bias=None):
18 |     r"""Initialize weights in the network.
19 | 
20 |     Args:
21 |         init_type (str): The name of the initialization scheme.
22 |         gain (float): The parameter that is required for the initialization
23 |             scheme.
24 |         bias (object): If not ``None``, specifies the initialization parameter
25 |             for bias.
26 | 
27 |     Returns:
28 |         (obj): init function to be applied.
29 |     """
30 | 
31 |     def init_func(m):
32 |         r"""Init function
33 | 
34 |         Args:
35 |             m: module to be weight initialized.
36 |         """
37 |         class_name = m.__class__.__name__
38 |         if hasattr(m, 'weight') and (
39 |                 class_name.find('Conv') != -1 or
40 |                 class_name.find('Linear') != -1 or
41 |                 class_name.find('Embedding') != -1):
42 |             lr_mul = getattr(m, 'lr_mul', 1.)
43 |             gain_final = gain / lr_mul
44 |             if init_type == 'normal':
45 |                 init.normal_(m.weight.data, 0.0, gain_final)
46 |             elif init_type == 'xavier':
47 |                 init.xavier_normal_(m.weight.data, gain=gain_final)
48 |             elif init_type == 'xavier_uniform':
49 |                 init.xavier_uniform_(m.weight.data, gain=gain_final)
50 |             elif init_type == 'kaiming':
51 |                 init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
52 |                 with torch.no_grad():
53 |                     m.weight.data *= gain_final
54 |             elif init_type == 'kaiming_linear':
55 |                 init.kaiming_normal_(
56 |                     m.weight.data, a=0, mode='fan_in', nonlinearity='linear'
57 |                 )
58 |                 with torch.no_grad():
59 |                     m.weight.data *= gain_final
60 |             elif init_type == 'orthogonal':
61 |                 init.orthogonal_(m.weight.data, gain=gain_final)
62 |             elif init_type == 'none':
63 |                 pass
64 |             else:
65 |                 raise NotImplementedError(
66 |                     'initialization method [%s] is '
67 |                     'not implemented' % init_type)
68 |         if hasattr(m, 'bias') and m.bias is not None:
69 |             if init_type == 'none':
70 |                 pass
71 |             elif bias is not None:
72 |                 bias_type = getattr(bias, 'type', 'normal')
73 |                 if bias_type == 'normal':
74 |                     bias_gain = getattr(bias, 'gain', 0.5)
75 |                     init.normal_(m.bias.data, 0.0, bias_gain)
76 |                 else:
77 |                     raise NotImplementedError(
78 |                         'initialization method [%s] is '
79 |                         'not implemented' % bias_type)
80 |             else:
81 |                 init.constant_(m.bias.data, 0.0)
82 |     return init_func
83 | 
84 | 
85 | def weights_rescale():
86 |     def init_func(m):
87 |         if hasattr(m, 'init_gain'):
88 |             for name, p in m.named_parameters():
89 |                 if 'output_scale' not in name:
90 |                     p.data.mul_(m.init_gain)
91 |     return init_func
92 | 


--------------------------------------------------------------------------------
/imaginaire/models/utils/model_average.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import copy
 14 | 
 15 | import torch
 16 | from torch import nn
 17 | from imaginaire.utils.misc import requires_grad
 18 | 
 19 | 
 20 | def reset_batch_norm(m):
 21 |     r"""Reset batch norm statistics
 22 | 
 23 |     Args:
 24 |         m: Pytorch module
 25 |     """
 26 |     if hasattr(m, 'reset_running_stats'):
 27 |         m.reset_running_stats()
 28 | 
 29 | 
 30 | def calibrate_batch_norm_momentum(m):
 31 |     r"""Calibrate batch norm momentum
 32 | 
 33 |     Args:
 34 |         m: Pytorch module
 35 |     """
 36 |     if hasattr(m, 'reset_running_stats'):
 37 |         # if m._get_name() == 'SyncBatchNorm':
 38 |         if 'BatchNorm' in m._get_name():
 39 |             m.momentum = 1.0 / float(m.num_batches_tracked + 1)
 40 | 
 41 | 
 42 | class ModelAverage(nn.Module):
 43 |     r"""In this model average implementation, the spectral layers are
 44 |     absorbed in the model parameter by default. If such options are
 45 |     turned on, be careful with how you do the training. Remember to
 46 |     re-estimate the batch norm parameters before using the model.
 47 | 
 48 |     Args:
 49 |         module (torch nn module): Torch network.
 50 |         beta (float): Moving average weights. How much we weight the past.
 51 |         start_iteration (int): From which iteration, we start the update.
 52 |     """
 53 |     def __init__(self, module, beta=0.9999, start_iteration=0):
 54 |         super(ModelAverage, self).__init__()
 55 | 
 56 |         self.module = module
 57 |         # A shallow copy creates a new object which stores the reference of
 58 |         # the original elements.
 59 |         # A deep copy creates a new object and recursively adds the copies of
 60 |         # nested objects present in the original elements.
 61 |         self._averaged_model = copy.deepcopy(self.module).to('cuda')
 62 |         self.stream = torch.cuda.Stream()
 63 | 
 64 |         self.beta = beta
 65 | 
 66 |         self.start_iteration = start_iteration
 67 |         # This buffer is to track how many iterations has the model been
 68 |         # trained for. We will ignore the first $(start_iterations) and start
 69 |         # the averaging after.
 70 |         self.register_buffer('num_updates_tracked',
 71 |                              torch.tensor(0, dtype=torch.long))
 72 |         self.num_updates_tracked = self.num_updates_tracked.to('cuda')
 73 |         self.averaged_model.eval()
 74 | 
 75 |         # Averaged model does not require grad.
 76 |         requires_grad(self.averaged_model, False)
 77 | 
 78 |     @property
 79 |     def averaged_model(self):
 80 |         self.stream.synchronize()
 81 |         return self._averaged_model
 82 | 
 83 |     def forward(self, *inputs, **kwargs):
 84 |         r"""PyTorch module forward function overload."""
 85 |         return self.module(*inputs, **kwargs)
 86 | 
 87 |     @torch.no_grad()
 88 |     def update_average(self):
 89 |         r"""Update the moving average."""
 90 |         self.stream.wait_stream(torch.cuda.current_stream())
 91 |         with torch.cuda.stream(self.stream):
 92 |             self.num_updates_tracked += 1
 93 |             if self.num_updates_tracked <= self.start_iteration:
 94 |                 beta = 0.
 95 |             else:
 96 |                 beta = self.beta
 97 |             source_dict = self.module.state_dict()
 98 |             target_dict = self._averaged_model.state_dict()
 99 |             source_list = []
100 |             target_list = []
101 |             for key in target_dict:
102 |                 if 'num_batches_tracked' in key:
103 |                     continue
104 |                 source_list.append(source_dict[key].data)
105 |                 target_list.append(target_dict[key].data.float())
106 | 
107 |             torch._foreach_mul_(target_list, beta)
108 |             torch._foreach_add_(target_list, source_list, alpha=1 - beta)
109 | 
110 |     def __repr__(self):
111 |         r"""Returns a string that holds a printable representation of an
112 |         object"""
113 |         return self.module.__repr__()
114 | 


--------------------------------------------------------------------------------
/imaginaire/trainers/utils/get_trainer.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import importlib
 14 | import torch
 15 | import torch.distributed as dist
 16 | import torch.nn as nn
 17 | from torch.optim import lr_scheduler
 18 | from imaginaire.models.utils.model_average import ModelAverage
 19 | 
 20 | 
 21 | def get_trainer(cfg, is_inference=True, seed=0):
 22 |     """Return the trainer object.
 23 | 
 24 |     Args:
 25 |         cfg (Config): Loaded config object.
 26 |         is_inference (bool): Inference mode.
 27 | 
 28 |     Returns:
 29 |         (obj): Trainer object.
 30 |     """
 31 |     trainer_lib = importlib.import_module(cfg.trainer.type)
 32 |     trainer = trainer_lib.Trainer(cfg, is_inference=is_inference, seed=seed)
 33 |     return trainer
 34 | 
 35 | 
 36 | def wrap_model(cfg, model):
 37 |     r"""Wrap the networks with AMP DDP and (optionally) model average.
 38 | 
 39 |     Args:
 40 |         cfg (obj): Global configuration.
 41 |         model (obj): Model object.
 42 | 
 43 |     Returns:
 44 |         (dict):
 45 |           - model (obj): Model object.
 46 |     """
 47 |     # Apply model average wrapper.
 48 |     if cfg.trainer.ema_config.enabled:
 49 |         model = ModelAverage(model,
 50 |                              cfg.trainer.ema_config.beta,
 51 |                              cfg.trainer.ema_config.start_iteration,
 52 |                              )
 53 |     model = _wrap_model(cfg, model)
 54 |     return model
 55 | 
 56 | 
 57 | class WrappedModel(nn.Module):
 58 |     r"""Dummy wrapping the module.
 59 |     """
 60 | 
 61 |     def __init__(self, module):
 62 |         super(WrappedModel, self).__init__()
 63 |         self.module = module
 64 | 
 65 |     def forward(self, *args, **kwargs):
 66 |         r"""PyTorch module forward function overload."""
 67 |         return self.module(*args, **kwargs)
 68 | 
 69 | 
 70 | def _wrap_model(cfg, model):
 71 |     r"""Wrap a model for distributed data parallel training.
 72 | 
 73 |     Args:
 74 |         model (obj): PyTorch network model.
 75 | 
 76 |     Returns:
 77 |         (obj): Wrapped PyTorch network model.
 78 |     """
 79 |     # Apply DDP wrapper.
 80 |     if dist.is_available() and dist.is_initialized():
 81 |         model = torch.nn.parallel.DistributedDataParallel(
 82 |             model,
 83 |             device_ids=[cfg.local_rank],
 84 |             output_device=cfg.local_rank,
 85 |             find_unused_parameters=cfg.trainer.ddp_config.find_unused_parameters,
 86 |             static_graph=cfg.trainer.ddp_config.static_graph,
 87 |             broadcast_buffers=False,
 88 |         )
 89 |     else:
 90 |         model = WrappedModel(model)
 91 |     return model
 92 | 
 93 | 
 94 | def _calculate_model_size(model):
 95 |     r"""Calculate number of parameters in a PyTorch network.
 96 | 
 97 |     Args:
 98 |         model (obj): PyTorch network.
 99 | 
100 |     Returns:
101 |         (int): Number of parameters.
102 |     """
103 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
104 | 
105 | 
106 | def get_optimizer(cfg_optim, model):
107 |     r"""Return the optimizer object.
108 | 
109 |     Args:
110 |         cfg_optim (obj): Config for the specific optimization module (gen/dis).
111 |         model (obj): PyTorch network object.
112 | 
113 |     Returns:
114 |         (obj): Pytorch optimizer
115 |     """
116 |     if hasattr(model, 'get_param_groups'):
117 |         # Allow the network to use different hyperparameters (e.g., learning rate) for different parameters.
118 |         params = model.get_param_groups(cfg_optim)
119 |     else:
120 |         params = model.parameters()
121 | 
122 |     try:
123 |         # Try the PyTorch optimizer class first.
124 |         optimizer_class = getattr(torch.optim, cfg_optim.type)
125 |     except AttributeError:
126 |         raise NotImplementedError(f"Optimizer {cfg_optim.type} is not yet implemented.")
127 |     optimizer_kwargs = cfg_optim.params
128 | 
129 |     # We will try to use fuse optimizers by default.
130 |     try:
131 |         from apex.optimizers import FusedAdam, FusedSGD
132 |         fused_opt = cfg_optim.fused_opt
133 |     except (ImportError, ModuleNotFoundError):
134 |         fused_opt = False
135 | 
136 |     if fused_opt:
137 |         if cfg_optim.type == 'Adam':
138 |             optimizer_class = FusedAdam
139 |             optimizer_kwargs['adam_w_mode'] = False
140 |         elif cfg_optim.type == 'AdamW':
141 |             optimizer_class = FusedAdam
142 |             optimizer_kwargs['adam_w_mode'] = True
143 |         elif cfg_optim.type == 'SGD':
144 |             optimizer_class = FusedSGD
145 |     if cfg_optim.type in ["RAdam", "RMSprop"]:
146 |         optimizer_kwargs["foreach"] = fused_opt
147 | 
148 |     optim = optimizer_class(params, **optimizer_kwargs)
149 | 
150 |     return optim
151 | 
152 | 
153 | def get_scheduler(cfg_optim, optim):
154 |     """Return the scheduler object.
155 | 
156 |     Args:
157 |         cfg_optim (obj): Config for the specific optimization module (gen/dis).
158 |         optim (obj): PyTorch optimizer object.
159 | 
160 |     Returns:
161 |         (obj): Scheduler
162 |     """
163 |     if cfg_optim.sched.type == 'step':
164 |         scheduler = lr_scheduler.StepLR(optim,
165 |                                         step_size=cfg_optim.sched.step_size,
166 |                                         gamma=cfg_optim.sched.gamma)
167 |     elif cfg_optim.sched.type == 'constant':
168 |         scheduler = lr_scheduler.LambdaLR(optim, lambda x: 1)
169 |     elif cfg_optim.sched.type == 'linear_warmup':
170 |         scheduler = lr_scheduler.LambdaLR(
171 |             optim, lambda x: x * 1.0 / cfg_optim.sched.warmup if x < cfg_optim.sched.warmup else 1.0)
172 |     elif cfg_optim.sched.type == 'cosine_warmup':
173 | 
174 |         warmup_scheduler = lr_scheduler.LinearLR(
175 |             optim,
176 |             start_factor=1.0 / cfg_optim.sched.warmup,
177 |             end_factor=1.0,
178 |             total_iters=cfg_optim.sched.warmup
179 |         )
180 |         T_max_val = cfg_optim.sched.decay_steps - cfg_optim.sched.warmup
181 |         cosine_lr_scheduler = lr_scheduler.CosineAnnealingLR(
182 |             optim,
183 |             T_max=T_max_val,
184 |             eta_min=getattr(cfg_optim.sched, 'eta_min', 0),
185 |         )
186 |         scheduler = lr_scheduler.SequentialLR(
187 |             optim,
188 |             schedulers=[warmup_scheduler, cosine_lr_scheduler],
189 |             milestones=[cfg_optim.sched.warmup]
190 |         )
191 | 
192 |     elif cfg_optim.sched.type == 'linear':
193 |         # Start linear decay from here.
194 |         decay_start = cfg_optim.sched.decay_start
195 |         # End linear decay here.
196 |         # Continue to train using the lowest learning rate till the end.
197 |         decay_end = cfg_optim.sched.decay_end
198 |         # Lowest learning rate multiplier.
199 |         decay_target = cfg_optim.sched.decay_target
200 | 
201 |         def sch(x):
202 |             decay = ((x - decay_start) * decay_target + decay_end - x) / (decay_end - decay_start)
203 |             return min(max(decay, decay_target), 1.)
204 | 
205 |         scheduler = lr_scheduler.LambdaLR(optim, lambda x: sch(x))
206 |     elif cfg_optim.sched.type == 'step_with_warmup':
207 |         # The step_size and gamma follows the signature of lr_scheduler.StepLR.
208 |         step_size = cfg_optim.sched.step_size,
209 |         gamma = cfg_optim.sched.gamma
210 |         # An additional parameter defines the warmup iteration.
211 |         warmup_step_size = cfg_optim.sched.warmup_step_size
212 | 
213 |         def sch(x):
214 |             lr_after_warmup = gamma ** (warmup_step_size // step_size)
215 |             if x < warmup_step_size:
216 |                 return x / warmup_step_size * lr_after_warmup
217 |             else:
218 |                 return gamma ** (x // step_size)
219 | 
220 |         scheduler = lr_scheduler.LambdaLR(optim, lambda x: sch(x))
221 |     else:
222 |         return NotImplementedError('Learning rate policy {} not implemented.'.format(cfg_optim.sched.type))
223 |     return scheduler
224 | 


--------------------------------------------------------------------------------
/imaginaire/trainers/utils/logging.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import datetime
14 | import os
15 | 
16 | import torch.distributed as dist
17 | 
18 | from imaginaire.utils.distributed import is_master, broadcast_object_list
19 | from imaginaire.utils.distributed import master_only_print as print
20 | 
21 | 
22 | def get_date_uid():
23 |     """Generate a unique id based on date.
24 |     Returns:
25 |         str: Return uid string, e.g. '20171122171307111552'.
26 |     """
27 |     return str(datetime.datetime.now().strftime("%Y_%m%d_%H%M_%S"))
28 | 
29 | 
30 | def init_logging(config_path, logdir, makedir=True):
31 |     r"""Create log directory for storing checkpoints and output images.
32 | 
33 |     Args:
34 |         config_path (str): Path to the configuration file.
35 |         logdir (str or None): Log directory name
36 |         makedir (bool): Make a new dir or not
37 |     Returns:
38 |         str: Return log dir
39 |     """
40 |     def _create_logdir(_config_path, _logdir, _root_dir):
41 |         config_file = os.path.basename(_config_path)
42 |         date_uid = get_date_uid()
43 |         # example: logs/2019_0125_1047_58_spade_cocostuff
44 |         _log_file = '_'.join([date_uid, os.path.splitext(config_file)[0]])
45 |         if _logdir is None:
46 |             _logdir = os.path.join(_root_dir, _log_file)
47 |         if makedir:
48 |             print('Make folder {}'.format(_logdir))
49 |             os.makedirs(_logdir, exist_ok=True)
50 |         return _logdir
51 | 
52 |     root_dir = 'logs'
53 |     if dist.is_available():
54 |         if dist.is_initialized():
55 |             message = [None]
56 |             if is_master():
57 |                 logdir = _create_logdir(config_path, logdir, root_dir)
58 |                 message = [logdir]
59 | 
60 |             # Send logdir from master to all workers.
61 |             message = broadcast_object_list(message=message, src=0)
62 |             logdir = message[0]
63 |         else:
64 |             logdir = _create_logdir(config_path, logdir, root_dir)
65 |     else:
66 |         logdir = _create_logdir(config_path, logdir, root_dir)
67 | 
68 |     return logdir
69 | 


--------------------------------------------------------------------------------
/imaginaire/trainers/utils/meters.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import math
 14 | import torch
 15 | import wandb
 16 | from torch.utils.tensorboard import SummaryWriter
 17 | 
 18 | from imaginaire.utils.distributed import master_only, dist_all_reduce_tensor, \
 19 |     is_master, get_rank
 20 | 
 21 | from imaginaire.utils.distributed import master_only_print as print
 22 | 
 23 | LOG_WRITER = None
 24 | LOG_DIR = None
 25 | 
 26 | 
 27 | @torch.no_grad()
 28 | def sn_reshape_weight_to_matrix(weight):
 29 |     r"""Reshape weight to obtain the matrix form.
 30 | 
 31 |     Args:
 32 |         weight (Parameters): pytorch layer parameter tensor.
 33 |     """
 34 |     weight_mat = weight
 35 |     height = weight_mat.size(0)
 36 |     return weight_mat.reshape(height, -1)
 37 | 
 38 | 
 39 | @torch.no_grad()
 40 | def get_weight_stats(mod):
 41 |     r"""Get weight state
 42 | 
 43 |     Args:
 44 |          mod: Pytorch module
 45 |     """
 46 |     if mod.weight_orig.grad is not None:
 47 |         grad_norm = mod.weight_orig.grad.data.norm().item()
 48 |     else:
 49 |         grad_norm = 0.
 50 |     weight_norm = mod.weight_orig.data.norm().item()
 51 |     weight_mat = sn_reshape_weight_to_matrix(mod.weight_orig)
 52 |     sigma = torch.sum(mod.weight_u * torch.mv(weight_mat, mod.weight_v))
 53 |     return grad_norm, weight_norm, sigma
 54 | 
 55 | 
 56 | @master_only
 57 | def set_summary_writer(log_dir):
 58 |     r"""Set summary writer
 59 | 
 60 |     Args:
 61 |         log_dir (str): Log directory.
 62 |     """
 63 |     global LOG_DIR, LOG_WRITER
 64 |     LOG_DIR = log_dir
 65 |     LOG_WRITER = SummaryWriter(log_dir=log_dir)
 66 | 
 67 | 
 68 | def write_summary(name, summary, step, hist=False):
 69 |     """Utility function for write summary to log_writer.
 70 |     """
 71 |     global LOG_WRITER
 72 |     lw = LOG_WRITER
 73 |     if lw is None:
 74 |         raise Exception("Log writer not set.")
 75 |     if hist:
 76 |         lw.add_histogram(name, summary, step)
 77 |     else:
 78 |         lw.add_scalar(name, summary, step)
 79 | 
 80 | 
 81 | class Meter(object):
 82 |     """Meter is to keep track of statistics along steps.
 83 |     Meters write values for purpose like printing average values.
 84 |     Meters can be flushed to log files (i.e. TensorBoard for now)
 85 |     regularly.
 86 | 
 87 |     Args:
 88 |         name (str): the name of meter
 89 |         reduce (bool): If ``True``, perform a distributed reduce for the log
 90 |             values across all GPUs.
 91 |     """
 92 | 
 93 |     def __init__(self, name, reduce=True):
 94 |         self.name = name
 95 |         self.reduce = reduce
 96 |         self.values = []
 97 | 
 98 |     def reset(self):
 99 |         r"""Reset the meter values"""
100 |         if not self.reduce and get_rank() != 0:
101 |             return
102 |         self.values = []
103 | 
104 |     def write(self, value):
105 |         r"""Record the value"""
106 |         if not self.reduce and get_rank() != 0:
107 |             return
108 |         if value is not None:
109 |             self.values.append(value)
110 | 
111 |     def flush(self, step):
112 |         r"""Write the value in the tensorboard.
113 | 
114 |         Args:
115 |             step (int): Epoch or iteration number.
116 |         """
117 |         if not self.reduce and get_rank() != 0:
118 |             return
119 |         values = torch.tensor(self.values, device="cuda")
120 |         if self.reduce:
121 |             values = dist_all_reduce_tensor(values)
122 | 
123 |         if not all(math.isfinite(x) for x in values):
124 |             print("meter {} contained a nan or inf.".format(self.name))
125 |         filtered_values = list(filter(lambda x: math.isfinite(x), self.values))
126 |         if float(len(filtered_values)) != 0:
127 |             value = float(sum(filtered_values)) / float(len(filtered_values))
128 |             if is_master():
129 |                 write_summary(self.name, value, step)
130 |                 wandb.log({self.name: value}, step=step)
131 |         self.reset()
132 | 
133 |     @master_only
134 |     def write_image(self, img_grid, step):
135 |         r"""Write the value in the tensorboard.
136 | 
137 |         Args:
138 |             img_grid:
139 |             step (int): Epoch or iteration number.
140 |         """
141 |         if not self.reduce and get_rank() != 0:
142 |             return
143 |         global LOG_WRITER
144 |         lw = LOG_WRITER
145 |         if lw is None:
146 |             raise Exception("Log writer not set.")
147 |         lw.add_image("Visualizations", img_grid, step)
148 | 


--------------------------------------------------------------------------------
/imaginaire/utils/cudnn.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import torch.backends.cudnn as cudnn
14 | 
15 | from imaginaire.utils.distributed import master_only_print as print
16 | 
17 | 
18 | def init_cudnn(deterministic, benchmark):
19 |     r"""Initialize the cudnn module. The two things to consider is whether to
20 |     use cudnn benchmark and whether to use cudnn deterministic. If cudnn
21 |     benchmark is set, then the cudnn deterministic is automatically false.
22 | 
23 |     Args:
24 |         deterministic (bool): Whether to use cudnn deterministic.
25 |         benchmark (bool): Whether to use cudnn benchmark.
26 |     """
27 |     cudnn.deterministic = deterministic
28 |     cudnn.benchmark = benchmark
29 |     print('cudnn benchmark: {}'.format(benchmark))
30 |     print('cudnn deterministic: {}'.format(deterministic))
31 | 


--------------------------------------------------------------------------------
/imaginaire/utils/distributed.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import functools
 14 | import ctypes
 15 | 
 16 | import torch
 17 | import torch.distributed as dist
 18 | from contextlib import contextmanager
 19 | 
 20 | 
 21 | def init_dist(local_rank, backend='nccl', **kwargs):
 22 |     r"""Initialize distributed training"""
 23 |     if dist.is_available():
 24 |         if dist.is_initialized():
 25 |             return torch.cuda.current_device()
 26 |         torch.cuda.set_device(local_rank)
 27 |         dist.init_process_group(backend=backend, init_method='env://', **kwargs)
 28 | 
 29 |     # Increase the L2 fetch granularity for faster speed.
 30 |     _libcudart = ctypes.CDLL('libcudart.so')
 31 |     # Set device limit on the current device
 32 |     # cudaLimitMaxL2FetchGranularity = 0x05
 33 |     pValue = ctypes.cast((ctypes.c_int * 1)(), ctypes.POINTER(ctypes.c_int))
 34 |     _libcudart.cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128))
 35 |     _libcudart.cudaDeviceGetLimit(pValue, ctypes.c_int(0x05))
 36 |     # assert pValue.contents.value == 128
 37 | 
 38 | 
 39 | def get_rank():
 40 |     r"""Get rank of the thread."""
 41 |     rank = 0
 42 |     if dist.is_available():
 43 |         if dist.is_initialized():
 44 |             rank = dist.get_rank()
 45 |     return rank
 46 | 
 47 | 
 48 | def get_world_size():
 49 |     r"""Get world size. How many GPUs are available in this job."""
 50 |     world_size = 1
 51 |     if dist.is_available():
 52 |         if dist.is_initialized():
 53 |             world_size = dist.get_world_size()
 54 |     return world_size
 55 | 
 56 | 
 57 | def broadcast_object_list(message, src=0):
 58 |     r"""Broadcast object list from the master to the others"""
 59 |     # Send logdir from master to all workers.
 60 |     if dist.is_available():
 61 |         if dist.is_initialized():
 62 |             torch.distributed.broadcast_object_list(message, src=src)
 63 |     return message
 64 | 
 65 | 
 66 | def master_only(func):
 67 |     r"""Apply this function only to the master GPU."""
 68 |     @functools.wraps(func)
 69 |     def wrapper(*args, **kwargs):
 70 |         r"""Simple function wrapper for the master function"""
 71 |         if get_rank() == 0:
 72 |             return func(*args, **kwargs)
 73 |         else:
 74 |             return None
 75 |     return wrapper
 76 | 
 77 | 
 78 | def is_master():
 79 |     r"""check if current process is the master"""
 80 |     return get_rank() == 0
 81 | 
 82 | 
 83 | def is_dist():
 84 |     return dist.is_initialized()
 85 | 
 86 | 
 87 | def barrier():
 88 |     if is_dist():
 89 |         dist.barrier()
 90 | 
 91 | 
 92 | @contextmanager
 93 | def master_first():
 94 |     if not is_master():
 95 |         barrier()
 96 |     yield
 97 |     if dist.is_initialized() and is_master():
 98 |         barrier()
 99 | 
100 | 
101 | def is_local_master():
102 |     return torch.cuda.current_device() == 0
103 | 
104 | 
105 | @master_only
106 | def master_only_print(*args):
107 |     r"""master-only print"""
108 |     print(*args)
109 | 
110 | 
111 | def dist_reduce_tensor(tensor, rank=0, reduce='mean'):
112 |     r""" Reduce to rank 0 """
113 |     world_size = get_world_size()
114 |     if world_size < 2:
115 |         return tensor
116 |     with torch.no_grad():
117 |         dist.reduce(tensor, dst=rank)
118 |         if get_rank() == rank:
119 |             if reduce == 'mean':
120 |                 tensor /= world_size
121 |             elif reduce == 'sum':
122 |                 pass
123 |             else:
124 |                 raise NotImplementedError
125 |     return tensor
126 | 
127 | 
128 | def dist_all_reduce_tensor(tensor, reduce='mean'):
129 |     r""" Reduce to all ranks """
130 |     world_size = get_world_size()
131 |     if world_size < 2:
132 |         return tensor
133 |     with torch.no_grad():
134 |         dist.all_reduce(tensor)
135 |         if reduce == 'mean':
136 |             tensor /= world_size
137 |         elif reduce == 'sum':
138 |             pass
139 |         else:
140 |             raise NotImplementedError
141 |     return tensor
142 | 
143 | 
144 | def dist_all_gather_tensor(tensor):
145 |     r""" gather to all ranks """
146 |     world_size = get_world_size()
147 |     if world_size < 2:
148 |         return [tensor]
149 |     tensor_list = [
150 |         torch.ones_like(tensor) for _ in range(dist.get_world_size())]
151 |     with torch.no_grad():
152 |         dist.all_gather(tensor_list, tensor)
153 |     return tensor_list
154 | 


--------------------------------------------------------------------------------
/imaginaire/utils/gpu_affinity.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import math
14 | import os
15 | # pynvml is a python bindings to the NVIDIA Management Library
16 | # https://developer.nvidia.com/nvidia-management-library-nvml
17 | # An API for monitoring and managing various states of the NVIDIA GPU devices.
18 | # It provides direct access to the queries and commands exposed via nvidia-smi.
19 | 
20 | import pynvml
21 | 
22 | pynvml.nvmlInit()
23 | 
24 | 
25 | def system_get_driver_version():
26 |     r"""Get Driver Version"""
27 |     return pynvml.nvmlSystemGetDriverVersion()
28 | 
29 | 
30 | def device_get_count():
31 |     r"""Get number of devices"""
32 |     return pynvml.nvmlDeviceGetCount()
33 | 
34 | 
35 | class Device(object):
36 |     r"""Device used for nvml."""
37 |     _nvml_affinity_elements = math.ceil(os.cpu_count() / 64)
38 | 
39 |     def __init__(self, device_idx):
40 |         super().__init__()
41 |         self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_idx)
42 | 
43 |     def get_name(self):
44 |         r"""Get obect name"""
45 |         return pynvml.nvmlDeviceGetName(self.handle)
46 | 
47 |     def get_cpu_affinity(self):
48 |         r"""Get CPU affinity"""
49 |         affinity_string = ''
50 |         for j in pynvml.nvmlDeviceGetCpuAffinity(self.handle, Device._nvml_affinity_elements):
51 |             # assume nvml returns list of 64 bit ints
52 |             affinity_string = '{:064b}'.format(j) + affinity_string
53 |         affinity_list = [int(x) for x in affinity_string]
54 |         affinity_list.reverse()  # so core 0 is in 0th element of list
55 | 
56 |         return [i for i, e in enumerate(affinity_list) if e != 0]
57 | 
58 | 
59 | def set_affinity(gpu_id=None):
60 |     r"""Set GPU affinity
61 | 
62 |     Args:
63 |         gpu_id (int): Which gpu device.
64 |     """
65 |     if gpu_id is None:
66 |         gpu_id = int(os.getenv('LOCAL_RANK', 0))
67 | 
68 |     try:
69 |         dev = Device(gpu_id)
70 |         # os.sched_setaffinity() method in Python is used to set the CPU affinity mask of a process indicated
71 |         # by the specified process id.
72 |         # A process’s CPU affinity mask determines the set of CPUs on which it is eligible to run.
73 |         # Syntax: os.sched_setaffinity(pid, mask)
74 |         # pid=0 means the current process
75 |         os.sched_setaffinity(0, dev.get_cpu_affinity())
76 |         # list of ints
77 |         # representing the logical cores this process is now affinitied with
78 |         return os.sched_getaffinity(0)
79 | 
80 |     except pynvml.NVMLError:
81 |         print("(Setting affinity with NVML failed, skipping...)")
82 | 


--------------------------------------------------------------------------------
/imaginaire/utils/set_random_seed.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import random
14 | import numpy as np
15 | import torch
16 | 
17 | from imaginaire.utils.distributed import get_rank
18 | from imaginaire.utils.distributed import master_only_print as print
19 | 
20 | 
21 | def set_random_seed(seed, by_rank=False):
22 |     r"""Set random seeds for everything, including random, numpy, torch.manual_seed, torch.cuda_manual_seed.
23 |     torch.cuda.manual_seed_all is not necessary (included in torch.manual_seed)
24 | 
25 |     Args:
26 |         seed (int): Random seed.
27 |         by_rank (bool): if true, each gpu will use a different random seed.
28 |     """
29 |     if by_rank:
30 |         seed += get_rank()
31 |     print(f"Using random seed {seed}")
32 |     random.seed(seed)
33 |     np.random.seed(seed)
34 |     torch.manual_seed(seed)         # sets seed on the current CPU & all GPUs
35 |     torch.cuda.manual_seed(seed)    # sets seed on current GPU
36 |     # torch.cuda.manual_seed_all(seed)  # included in torch.manual_seed
37 | 


--------------------------------------------------------------------------------
/imaginaire/utils/termcolor.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import pprint
14 | 
15 | import termcolor
16 | 
17 | 
18 | def red(x): return termcolor.colored(str(x), color="red")
19 | def green(x): return termcolor.colored(str(x), color="green")
20 | def blue(x): return termcolor.colored(str(x), color="blue")
21 | def cyan(x): return termcolor.colored(str(x), color="cyan")
22 | def yellow(x): return termcolor.colored(str(x), color="yellow")
23 | def magenta(x): return termcolor.colored(str(x), color="magenta")
24 | def grey(x): return termcolor.colored(str(x), color="grey")
25 | 
26 | 
27 | COLORS = {
28 |     'red': red, 'green': green, 'blue': blue, 'cyan': cyan, 'yellow': yellow, 'magenta': magenta, 'grey': grey
29 | }
30 | 
31 | 
32 | def PP(x):
33 |     string = pprint.pformat(x, indent=2)
34 |     if isinstance(x, dict):
35 |         string = '{\n ' + string[1:-1] + '\n}'
36 |     return string
37 | 
38 | 
39 | def alert(x, color='red'):
40 |     color = COLORS[color]
41 |     print(color('-' * 32))
42 |     print(color(f'* {x}'))
43 |     print(color('-' * 32))
44 | 


--------------------------------------------------------------------------------
/imaginaire/utils/visualization.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import wandb
14 | import torch
15 | import torchvision
16 | 
17 | from matplotlib import pyplot as plt
18 | from torchvision.transforms import functional as torchvision_F
19 | 
20 | 
21 | def wandb_image(images, from_range=(0, 1)):
22 |     images = preprocess_image(images, from_range=from_range)
23 |     image_grid = torchvision.utils.make_grid(images, nrow=1, pad_value=1)
24 |     image_grid = torchvision_F.to_pil_image(image_grid)
25 |     wandb_image = wandb.Image(image_grid)
26 |     return wandb_image
27 | 
28 | 
29 | def preprocess_image(images, from_range=(0, 1), cmap="gray"):
30 |     min, max = from_range
31 |     images = (images - min) / (max - min)
32 |     images = images.detach().cpu().float().clamp_(min=0, max=1)
33 |     if images.shape[1] == 1:
34 |         images = get_heatmap(images[:, 0], cmap=cmap)
35 |     return images
36 | 
37 | 
38 | def get_heatmap(gray, cmap):  # [N,H,W]
39 |     color = plt.get_cmap(cmap)(gray.numpy())
40 |     color = torch.from_numpy(color[..., :3]).permute(0, 3, 1, 2).float()  # [N,3,H,W]
41 |     return color
42 | 


--------------------------------------------------------------------------------
/neuralangelo.yaml:
--------------------------------------------------------------------------------
 1 | # conda env create --file neuralangelo.yaml
 2 | name: neuralangelo
 3 | channels:
 4 |   - conda-forge
 5 |   - pytorch
 6 | dependencies:
 7 |   # general
 8 |   - gpustat
 9 |   - gdown
10 |   - cudatoolkit-dev
11 |   - cmake
12 |   # python general
13 |   - python=3.8
14 |   - pip
15 |   - numpy
16 |   - scipy
17 |   - ipython
18 |   - jupyterlab
19 |   - cython
20 |   - ninja
21 |   - diskcache
22 |   # pytorch
23 |   - pytorch
24 |   - torchvision
25 |   - pip:
26 |     - -r requirements.txt
27 | 


--------------------------------------------------------------------------------
/projects/nerf/configs/ingp_blender.yaml:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | _parent_: projects/nerf/configs/nerf_blender.yaml
12 | 
13 | max_iter: 500000
14 | 
15 | wandb_scalar_iter: 500
16 | wandb_image_iter: 10000
17 | validation_iter: 10000
18 | 
19 | model:
20 |     type: projects.nerf.models.ingp
21 |     mlp:
22 |         layers_feat: [null,64,64]
23 |         layers_rgb: [null,64,3]
24 |     voxel:
25 |         levels:
26 |             min: 4
27 |             max: 12
28 |             num: 16
29 |         dict_size: 19
30 |         dim: 4
31 |         range: [-5,5]
32 |         init_scale: 1e-4
33 |     sample_intvs: 256
34 |     fine_sampling: False
35 | 
36 | optim:
37 |     type: Adam
38 |     params:
39 |         lr: 0.01
40 |     sched:
41 |         gamma: 1
42 | 


--------------------------------------------------------------------------------
/projects/nerf/configs/nerf_blender.yaml:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  5 | # and proprietary rights in and to this software, related documentation
  6 | # and any modifications thereto. Any use, reproduction, disclosure or
  7 | # distribution of this software and related documentation without an express
  8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  9 | # -----------------------------------------------------------------------------
 10 | 
 11 | aws_credentials_file: scripts/s3/pbss_dir.secret
 12 | logging_iter: 9999999999999  # disable the printing logger
 13 | 
 14 | max_iter: 500000
 15 | 
 16 | wandb_scalar_iter: 100
 17 | wandb_image_iter: 1000
 18 | validation_iter: 2000
 19 | 
 20 | speed_benchmark: False
 21 | 
 22 | checkpoint:
 23 |     save_to_s3: False
 24 |     load_from_s3: False
 25 |     s3_credentials: scripts/s3/pbss_dir.secret
 26 |     s3_bucket: nerf
 27 |     save_iter: 10000
 28 | 
 29 | trainer:
 30 |     type: projects.nerf.trainers.nerf
 31 |     ema_config:
 32 |         enabled: False
 33 |         load_ema_checkpoint: False
 34 |     loss_weight:
 35 |         render: 1.0
 36 |         render_fine: 1.0
 37 |     init:
 38 |         type: xavier
 39 |     amp_config:
 40 |         enabled: True
 41 | 
 42 | model:
 43 |     type: projects.nerf.models.nerf
 44 |     mlp:
 45 |         layers_feat: [null,256,256,256,256,256,256,256,256]
 46 |         layers_rgb: [null,128,3]
 47 |         skip: [4]
 48 |     posenc:
 49 |         L_3D: 10
 50 |         L_view: 4
 51 |     density_activ: softplus
 52 |     view_dep: True
 53 |     dist:
 54 |         param: metric
 55 |         range: [2,6]
 56 |     sample_intvs: 64
 57 |     sample_stratified: True
 58 |     fine_sampling: True
 59 |     sample_intvs_fine: 128
 60 |     rand_rays: 1024
 61 |     density_noise_reg:
 62 |     opaque_background: False
 63 |     camera_ndc: False
 64 | 
 65 | optim:
 66 |     type: Adam
 67 |     params:
 68 |         lr: 0.0005
 69 |         betas: [0.9, 0.999]
 70 |     sched:
 71 |         iteration_mode: False
 72 |         type: step
 73 |         step_size: 20
 74 |         gamma: 0.97724
 75 | 
 76 | data:
 77 |     type: projects.nerf.datasets.nerf_blender
 78 |     use_multi_epoch_loader: True
 79 |     num_workers: 4
 80 |     root: datasets/nerf-synthetic/lego
 81 |     image_size: [400,400]
 82 |     preload: True
 83 |     bgcolor: 1
 84 |     train:
 85 |         batch_size: 2
 86 |         subset:
 87 |     val:
 88 |         batch_size: 2
 89 |         subset: 4
 90 |         max_viz_samples: 16
 91 | 
 92 | test_data:
 93 |     type: projects.nerf.datasets.nerf_blender
 94 |     num_workers: 4
 95 |     root: datasets/nerf-synthetic/lego
 96 |     image_size: [400,400]
 97 |     preload: True
 98 |     bgcolor: 1
 99 |     test:
100 |         batch_size: 2
101 |         subset:
102 | 


--------------------------------------------------------------------------------
/projects/nerf/configs/nerf_llff.yaml:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | _parent_: projects/nerf/configs/nerf_blender.yaml
12 | 
13 | max_iter: 200000
14 | 
15 | wandb_scalar_iter: 50
16 | wandb_image_iter: 500
17 | validation_iter: 1000
18 | 
19 | model:
20 |     type: projects.nerf.models.nerf
21 |     dist:
22 |         param: ndc
23 |         range: [0,1]
24 |     sample_intvs: 64
25 |     fine_sampling: True
26 |     sample_intvs_fine: 128
27 |     rand_rays: 1024
28 |     camera_ndc: True
29 | 
30 | optim:
31 |     type: Adam
32 |     params:
33 |         lr: 0.0005
34 |         betas: [0.9, 0.999]
35 |     sched:
36 |         iteration_mode: False
37 |         type: step
38 |         step_size: 100
39 |         gamma: 0.97724
40 | 
41 | data:
42 |     type: projects.nerf.datasets.nerf_llff
43 |     use_multi_epoch_loader: True
44 |     num_workers: 4
45 |     root: datasets/nerf-llff/fern
46 |     image_size: [480,640]
47 |     preload: True
48 |     val_ratio: 0.1
49 |     train:
50 |         batch_size: 2
51 |         subset:
52 |     val:
53 |         batch_size: 2
54 |         subset: 4
55 | 
56 | test_data:
57 |     type: projects.nerf.datasets.nerf_llff
58 |     num_workers: 4
59 |     root: datasets/nerf-llff/fern
60 |     image_size: [480,640]
61 |     preload: True
62 |     test:
63 |         batch_size: 2
64 |         subset:
65 | 


--------------------------------------------------------------------------------
/projects/nerf/datasets/base.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import torch
14 | import tqdm
15 | import threading
16 | import queue
17 | 
18 | 
19 | class Dataset(torch.utils.data.Dataset):
20 | 
21 |     def __init__(self, cfg, is_inference=False, is_test=False):
22 |         super().__init__()
23 |         self.split = "test" if is_test else "val" if is_inference else "train"
24 | 
25 |     def _preload_worker(self, data_list, load_func, q, lock, idx_tqdm):
26 |         # Keep preloading data in parallel.
27 |         while True:
28 |             idx = q.get()
29 |             data_list[idx] = load_func(idx)
30 |             with lock:
31 |                 idx_tqdm.update()
32 |             q.task_done()
33 | 
34 |     def preload_threading(self, load_func, num_workers, data_str="images"):
35 |         # Use threading to preload data in parallel.
36 |         data_list = [None] * len(self)
37 |         q = queue.Queue(maxsize=len(self))
38 |         idx_tqdm = tqdm.tqdm(range(len(self)), desc=f"preloading {data_str} ({self.split})", leave=False)
39 |         for i in range(len(self)):
40 |             q.put(i)
41 |         lock = threading.Lock()
42 |         for ti in range(num_workers):
43 |             t = threading.Thread(target=self._preload_worker,
44 |                                  args=(data_list, load_func, q, lock, idx_tqdm), daemon=True)
45 |             t.start()
46 |         q.join()
47 |         idx_tqdm.close()
48 |         assert all(map(lambda x: x is not None, data_list))
49 |         return data_list
50 | 
51 |     def __getitem__(self, idx):
52 |         raise NotImplementedError
53 | 
54 |     def __len__(self):
55 |         return len(self.list)
56 | 


--------------------------------------------------------------------------------
/projects/nerf/datasets/nerf_blender.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import json
 14 | import numpy as np
 15 | import torch
 16 | import torchvision.transforms.functional as torchvision_F
 17 | from PIL import Image, ImageFile
 18 | 
 19 | from projects.nerf.datasets import base
 20 | from projects.nerf.utils import camera
 21 | 
 22 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 23 | 
 24 | 
 25 | class Dataset(base.Dataset):
 26 | 
 27 |     def __init__(self, cfg, is_inference=False, is_test=False):
 28 |         super().__init__(cfg, is_inference=is_inference, is_test=is_test)
 29 |         cfg_data = cfg.test_data if self.split == "test" else cfg.data
 30 |         data_info = cfg_data[self.split]
 31 |         self.root = cfg_data.root
 32 |         self.preload = cfg_data.preload
 33 |         self.bgcolor = cfg_data.bgcolor
 34 |         self.raw_H, self.raw_W = 800, 800
 35 |         self.H, self.W = cfg_data.image_size
 36 |         meta_fname = f"{cfg_data.root}/transforms_{self.split}.json"
 37 |         with open(meta_fname) as file:
 38 |             self.meta = json.load(file)
 39 |         self.focal = 0.5 * self.raw_W / np.tan(0.5 * self.meta["camera_angle_x"])
 40 |         self.list = self.meta["frames"]
 41 |         # Consider only a subset of data.
 42 |         if data_info.subset:
 43 |             self.list = self.list[:data_info.subset]
 44 |         # Preload dataset if possible.
 45 |         if cfg_data.preload:
 46 |             self.images = self.preload_threading(self.get_image, cfg_data.num_workers)
 47 |             self.cameras = self.preload_threading(self.get_camera, cfg_data.num_workers, data_str="cameras")
 48 | 
 49 |     def __getitem__(self, idx):
 50 |         """Process raw data and return processed data in a dictionary.
 51 | 
 52 |         Args:
 53 |             idx: The index of the sample of the dataset.
 54 |         Returns: A dictionary containing the data.
 55 |                  idx (scalar): The index of the sample of the dataset.
 56 |                  image (3xHxW tensor): Image with pixel values in [0,1] for supervision.
 57 |                  intr (3x3 tensor): The camera intrinsics of `image`.
 58 |                  pose (3x4 tensor): The camera extrinsics [R,t] of `image`.
 59 |         """
 60 |         # Keep track of sample index for convenience.
 61 |         sample = dict(idx=idx)
 62 |         # Get the images.
 63 |         image = self.images[idx] if self.preload else self.get_image(idx)
 64 |         image = self.preprocess_image(image)
 65 |         # Get the cameras (intrinsics and pose).
 66 |         intr, pose = self.cameras[idx] if self.preload else self.get_camera(idx)
 67 |         intr, pose = self.preprocess_camera(intr, pose)
 68 |         # Update the data sample.
 69 |         sample.update(
 70 |             image=image,
 71 |             intr=intr,
 72 |             pose=pose,
 73 |         )
 74 |         return sample
 75 | 
 76 |     def get_image(self, idx):
 77 |         fpath = self.list[idx]["file_path"][2:]
 78 |         image_fname = f"{self.root}/{fpath}.png"
 79 |         image = Image.open(image_fname)
 80 |         image.load()
 81 |         return image
 82 | 
 83 |     def preprocess_image(self, image):
 84 |         # Resize the image.
 85 |         image = image.resize((self.W, self.H))
 86 |         image = torchvision_F.to_tensor(image)
 87 |         # Background masking.
 88 |         rgb, mask = image[:3], image[3:]
 89 |         if self.bgcolor is not None:
 90 |             rgb = rgb * mask + self.bgcolor * (1 - mask)
 91 |         return rgb
 92 | 
 93 |     def get_camera(self, idx):
 94 |         # Camera intrinsics.
 95 |         intr = torch.tensor([[self.focal, 0, self.raw_W / 2],
 96 |                              [0, self.focal, self.raw_H / 2],
 97 |                              [0, 0, 1]]).float()
 98 |         # Camera pose.
 99 |         pose_raw = torch.tensor(self.list[idx]["transform_matrix"], dtype=torch.float32)
100 |         pose = self.parse_raw_camera(pose_raw)
101 |         return intr, pose
102 | 
103 |     def preprocess_camera(self, intr, pose):
104 |         # Adjust the intrinsics according to the resized image.
105 |         intr = intr.clone()
106 |         intr[0] *= self.W / self.raw_W
107 |         intr[1] *= self.H / self.raw_H
108 |         return intr, pose
109 | 
110 |     def parse_raw_camera(self, pose_raw):
111 |         pose_flip = camera.pose(R=torch.diag(torch.tensor([1, -1, -1])))
112 |         pose = camera.pose.compose([pose_flip, pose_raw[:3]])
113 |         pose = camera.pose.invert(pose)
114 |         return pose
115 | 


--------------------------------------------------------------------------------
/projects/nerf/datasets/nerf_llff.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import numpy as np
 14 | import torch
 15 | import torch.nn.functional as torch_F
 16 | import torchvision.transforms.functional as torchvision_F
 17 | from PIL import Image, ImageFile
 18 | 
 19 | from projects.nerf.datasets import base
 20 | from projects.nerf.utils import camera
 21 | 
 22 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 23 | 
 24 | 
 25 | class Dataset(base.Dataset):
 26 | 
 27 |     def __init__(self, cfg, is_inference=False, is_test=False):
 28 |         super().__init__(cfg, is_inference=is_inference, is_test=is_test)
 29 |         cfg_data = cfg.test_data if self.split == "test" else cfg.data
 30 |         data_info = cfg_data[self.split]
 31 |         self.root = cfg_data.root
 32 |         self.preload = cfg_data.preload
 33 |         self.raw_H, self.raw_W = 3024, 4032
 34 |         self.H, self.W = cfg_data.image_size
 35 |         list_fname = f"{cfg_data.root}/images.list"
 36 |         image_fnames = open(list_fname).read().splitlines()
 37 |         poses_raw, bounds = self.parse_cameras_and_bounds(cfg_data)
 38 |         self.list = list(zip(image_fnames, poses_raw, bounds))
 39 |         # Manually split train/val subsets.
 40 |         num_val_split = int(len(self) * cfg_data.val_ratio)
 41 |         self.list = self.list[:-num_val_split] if self.split == "train" else self.list[-num_val_split:]
 42 |         # Consider only a subset of data.
 43 |         if data_info.subset:
 44 |             self.list = self.list[:data_info.subset]
 45 |         # Preload dataset if possible.
 46 |         if cfg_data.preload:
 47 |             self.images = self.preload_threading(self.get_image, cfg_data.num_workers)
 48 |             self.cameras = self.preload_threading(self.get_camera, cfg_data.num_workers, data_str="cameras")
 49 | 
 50 |     def parse_cameras_and_bounds(self, cfg_data):
 51 |         fname = f"{cfg_data.root}/poses_bounds.npy"
 52 |         data = torch.tensor(np.load(fname), dtype=torch.float32)
 53 |         # Parse cameras (intrinsics and poses).
 54 |         cam_data = data[:, :-2].view([-1, 3, 5])  # [N,3,5]
 55 |         poses_raw = cam_data[..., :4]  # [N,3,4]
 56 |         poses_raw[..., 0], poses_raw[..., 1] = poses_raw[..., 1], -poses_raw[..., 0]
 57 |         raw_H, raw_W, self.focal = cam_data[0, :, -1]
 58 |         assert self.raw_H == raw_H and self.raw_W == raw_W
 59 |         # Parse depth bounds.
 60 |         bounds = data[:, -2:]  # [N,2]
 61 |         scale = 1. / (bounds.min() * 0.75)  # Not sure how this was determined?
 62 |         poses_raw[..., 3] *= scale
 63 |         bounds *= scale
 64 |         # Roughly center camera poses.
 65 |         poses_raw = self.center_camera_poses(poses_raw)
 66 |         return poses_raw, bounds
 67 | 
 68 |     def center_camera_poses(self, poses):
 69 |         # Compute average pose.
 70 |         center = poses[..., 3].mean(dim=0)
 71 |         v1 = torch_F.normalize(poses[..., 1].mean(dim=0), dim=0)
 72 |         v2 = torch_F.normalize(poses[..., 2].mean(dim=0), dim=0)
 73 |         v0 = v1.cross(v2)
 74 |         pose_avg = torch.stack([v0, v1, v2, center], dim=-1)[None]  # [1,3,4]
 75 |         # Apply inverse of averaged pose.
 76 |         poses = camera.pose.compose([poses, camera.pose.invert(pose_avg)])
 77 |         return poses
 78 | 
 79 |     def __getitem__(self, idx):
 80 |         """Process raw data and return processed data in a dictionary.
 81 | 
 82 |         Args:
 83 |             idx: The index of the sample of the dataset.
 84 |         Returns: A dictionary containing the data.
 85 |                  idx (scalar): The index of the sample of the dataset.
 86 |                  image (3xHxW tensor): Image with pixel values in [0,1] for supervision.
 87 |                  intr (3x3 tensor): The camera intrinsics of `image`.
 88 |                  pose (3x4 tensor): The camera extrinsics [R,t] of `image`.
 89 |         """
 90 |         # Keep track of sample index for convenience.
 91 |         sample = dict(idx=idx)
 92 |         # Get the images.
 93 |         image = self.images[idx] if self.preload else self.get_image(idx)
 94 |         image = self.preprocess_image(image)
 95 |         # Get the cameras (intrinsics and pose).
 96 |         intr, pose = self.cameras[idx] if self.preload else self.get_camera(idx)
 97 |         intr, pose = self.preprocess_camera(intr, pose)
 98 |         # Update the data sample.
 99 |         sample.update(
100 |             image=image,
101 |             intr=intr,
102 |             pose=pose,
103 |         )
104 |         return sample
105 | 
106 |     def get_image(self, idx):
107 |         image_fname = f"{self.root}/images/{self.list[idx][0]}"
108 |         image = Image.open(image_fname)
109 |         image.load()
110 |         return image
111 | 
112 |     def preprocess_image(self, image):
113 |         # Resize the image and convert to Pytorch.
114 |         image = image.resize((self.W, self.H))
115 |         image = torchvision_F.to_tensor(image)
116 |         return image
117 | 
118 |     def get_camera(self, idx):
119 |         # Camera intrinsics.
120 |         intr = torch.tensor([[self.focal, 0, self.raw_W / 2],
121 |                              [0, self.focal, self.raw_H / 2],
122 |                              [0, 0, 1]]).float()
123 |         # Camera pose.
124 |         pose_raw = self.list[idx][1]
125 |         pose = self.parse_raw_camera(pose_raw)
126 |         return intr, pose
127 | 
128 |     def preprocess_camera(self, intr, pose):
129 |         # Adjust the intrinsics according to the resized image.
130 |         intr = intr.clone()
131 |         intr[0] *= self.W / self.raw_W
132 |         intr[1] *= self.H / self.raw_H
133 |         return intr, pose
134 | 
135 |     def parse_raw_camera(self, pose_raw):
136 |         pose_flip = camera.pose(R=torch.diag(torch.tensor([1, -1, -1])))
137 |         pose = camera.pose.compose([pose_flip, pose_raw[:3]])
138 |         pose = camera.pose.invert(pose)
139 |         pose = camera.pose.compose([pose_flip, pose])
140 |         return pose
141 | 


--------------------------------------------------------------------------------
/projects/nerf/models/ingp.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import numpy as np
14 | import torch
15 | import tinycudann as tcnn
16 | 
17 | from projects.nerf.models import nerf
18 | 
19 | 
20 | class Model(nerf.Model):
21 | 
22 |     def __init__(self, cfg_model, cfg_data):
23 |         super().__init__(cfg_model, cfg_data)
24 |         self.fine_sampling = False
25 |         self.density_reg = cfg_model.density_noise_reg
26 |         # Define models.
27 |         self.nerf = InstantNGP(cfg_model)
28 | 
29 | 
30 | class InstantNGP(nerf.NeRF):
31 | 
32 |     def __init__(self, cfg_model):
33 |         self.voxel = cfg_model.voxel
34 |         super().__init__(cfg_model)
35 | 
36 |     def set_input_dims(self, cfg_model):
37 |         # Define the input encoding dimensions.
38 |         self.input_3D_dim = 3 + cfg_model.voxel.dim * cfg_model.voxel.levels.num
39 |         self.input_view_dim = 3 if cfg_model.view_dep else None
40 | 
41 |     def build_model(self, cfg_model):
42 |         super().build_model(cfg_model)
43 |         # Build the tcnn hash grid.
44 |         l_min, l_max = self.voxel.levels.min, self.voxel.levels.max
45 |         r_min, r_max = 2 ** l_min, 2 ** l_max
46 |         num_levels = self.voxel.levels.num
47 |         growth_rate = np.exp((np.log(r_max) - np.log(r_min)) / (num_levels - 1))
48 |         config = dict(
49 |             otype="HashGrid",
50 |             n_levels=cfg_model.voxel.levels.num,
51 |             n_features_per_level=cfg_model.voxel.dim,
52 |             log2_hashmap_size=cfg_model.voxel.dict_size,
53 |             base_resolution=2 ** cfg_model.voxel.levels.min,
54 |             per_level_scale=growth_rate,
55 |         )
56 |         self.tiny_cuda_encoding = tcnn.Encoding(3, config)
57 |         # Compute resolutions of all levels.
58 |         self.resolutions = []
59 |         for lv in range(0, num_levels):
60 |             size = np.floor(r_min * growth_rate ** lv).astype(int) + 1
61 |             self.resolutions.append(size)
62 | 
63 |     def forward(self, points_3D, ray_unit, density_reg=None):
64 |         return super().forward(points_3D, ray_unit, density_reg)
65 | 
66 |     def _encode_3D(self, points_3D):
67 |         # Tri-linear interpolate the corresponding embeddings from the dictionary.
68 |         vol_min, vol_max = self.voxel.range
69 |         points_3D_normalized = (points_3D - vol_min) / (vol_max - vol_min)  # Normalize to [0,1].
70 |         tcnn_input = points_3D_normalized.view(-1, 3)
71 |         tcnn_output = self.tiny_cuda_encoding(tcnn_input)
72 |         points_enc = tcnn_output.view(*points_3D_normalized.shape[:-1], tcnn_output.shape[-1])
73 |         points_enc = torch.cat([points_enc, points_3D], dim=-1)  # [B,R,N,LD+3]
74 |         return points_enc
75 | 
76 |     def _encode_view(self, ray_unit):
77 |         return ray_unit
78 | 


--------------------------------------------------------------------------------
/projects/nerf/trainers/base.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import torch
 14 | import wandb
 15 | from imaginaire.trainers.base import BaseTrainer
 16 | from imaginaire.utils.distributed import is_master, master_only
 17 | from tqdm import tqdm
 18 | 
 19 | from projects.nerf.utils.misc import collate_test_data_batches, get_unique_test_data, trim_test_samples
 20 | 
 21 | 
 22 | class BaseTrainer(BaseTrainer):
 23 |     """
 24 |     A customized BaseTrainer.
 25 |     """
 26 | 
 27 |     def __init__(self, cfg, is_inference=True, seed=0):
 28 |         super().__init__(cfg, is_inference=is_inference, seed=seed)
 29 |         self.metrics = dict()
 30 |         # The below configs should be properly overridden.
 31 |         cfg.setdefault("wandb_scalar_iter", 9999999999999)
 32 |         cfg.setdefault("wandb_image_iter", 9999999999999)
 33 |         cfg.setdefault("validation_epoch", 9999999999999)
 34 |         cfg.setdefault("validation_iter", 9999999999999)
 35 | 
 36 |     def init_losses(self, cfg):
 37 |         super().init_losses(cfg)
 38 |         self.weights = {key: value for key, value in cfg.trainer.loss_weight.items() if value}
 39 | 
 40 |     def _end_of_iteration(self, data, current_epoch, current_iteration):
 41 |         # Log to wandb.
 42 |         if current_iteration % self.cfg.wandb_scalar_iter == 0:
 43 |             # Compute the elapsed time (as in the original base trainer).
 44 |             self.timer.time_iteration = self.elapsed_iteration_time / self.cfg.wandb_scalar_iter
 45 |             self.elapsed_iteration_time = 0
 46 |             # Log scalars.
 47 |             self.log_wandb_scalars(data, mode="train")
 48 |             # Exit if the training loss has gone to NaN/inf.
 49 |             if is_master() and self.losses["total"].isnan():
 50 |                 self.finalize(self.cfg)
 51 |                 raise ValueError("Training loss has gone to NaN!!!")
 52 |             if is_master() and self.losses["total"].isinf():
 53 |                 self.finalize(self.cfg)
 54 |                 raise ValueError("Training loss has gone to infinity!!!")
 55 |         if current_iteration % self.cfg.wandb_image_iter == 0:
 56 |             self.log_wandb_images(data, mode="train")
 57 |         # Run validation on val set.
 58 |         if current_iteration % self.cfg.validation_iter == 0:
 59 |             data_all = self.test(self.eval_data_loader, mode="val")
 60 |             # Log the results to W&B.
 61 |             if is_master():
 62 |                 self.log_wandb_scalars(data_all, mode="val")
 63 |                 self.log_wandb_images(data_all, mode="val", max_samples=self.cfg.data.val.max_viz_samples)
 64 | 
 65 |     def _end_of_epoch(self, data, current_epoch, current_iteration):
 66 |         # Run validation on val set.
 67 |         if current_epoch % self.cfg.validation_epoch == 0:
 68 |             data_all = self.test(self.eval_data_loader, mode="val")
 69 |             # Log the results to W&B.
 70 |             if is_master():
 71 |                 self.log_wandb_scalars(data_all, mode="val")
 72 |                 self.log_wandb_images(data_all, mode="val", max_samples=self.cfg.data.val.max_viz_samples)
 73 | 
 74 |     @master_only
 75 |     def log_wandb_scalars(self, data, mode=None):
 76 |         scalars = dict()
 77 |         # Log scalars (basic info & losses).
 78 |         if mode == "train":
 79 |             scalars.update({"optim/lr": self.sched.get_last_lr()[0]})
 80 |             scalars.update({"time/iteration": self.timer.time_iteration})
 81 |             scalars.update({"time/epoch": self.timer.time_epoch})
 82 |         scalars.update({f"{mode}/loss/{key}": value for key, value in self.losses.items()})
 83 |         scalars.update(iteration=self.current_iteration, epoch=self.current_epoch)
 84 |         wandb.log(scalars, step=self.current_iteration)
 85 | 
 86 |     @master_only
 87 |     def log_wandb_images(self, data, mode=None, max_samples=None):
 88 |         trim_test_samples(data, max_samples=max_samples)
 89 | 
 90 |     def model_forward(self, data):
 91 |         # Model forward.
 92 |         output = self.model(data)  # data = self.model(data) will not return the same data in the case of DDP.
 93 |         data.update(output)
 94 |         # Compute loss.
 95 |         self.timer._time_before_loss()
 96 |         self._compute_loss(data, mode="train")
 97 |         total_loss = self._get_total_loss()
 98 |         return total_loss
 99 | 
100 |     def _compute_loss(self, data, mode=None):
101 |         raise NotImplementedError
102 | 
103 |     def train(self, cfg, data_loader, single_gpu=False, profile=False, show_pbar=False):
104 |         self.current_epoch = self.checkpointer.resume_epoch or self.current_epoch
105 |         self.current_iteration = self.checkpointer.resume_iteration or self.current_iteration
106 |         if ((self.current_epoch % self.cfg.validation_epoch == 0 or
107 |              self.current_iteration % self.cfg.validation_iter == 0)):
108 |             # Do an initial validation.
109 |             data_all = self.test(self.eval_data_loader, mode="val", show_pbar=show_pbar)
110 |             # Log the results to W&B.
111 |             if is_master():
112 |                 self.log_wandb_scalars(data_all, mode="val")
113 |                 self.log_wandb_images(data_all, mode="val", max_samples=self.cfg.data.val.max_viz_samples)
114 |         # Train.
115 |         super().train(cfg, data_loader, single_gpu, profile, show_pbar)
116 | 
117 |     @torch.no_grad()
118 |     def test(self, data_loader, output_dir=None, inference_args=None, mode="test", show_pbar=False):
119 |         """The evaluation/inference engine.
120 |         Args:
121 |             data_loader: The data loader.
122 |             output_dir: Output directory to dump the test results.
123 |             inference_args: (unused)
124 |             mode: Evaluation mode {"val", "test"}. Can be other modes, but will only gather the data.
125 |         Returns:
126 |             data_all: A dictionary of all the data.
127 |         """
128 |         if self.cfg.trainer.ema_config.enabled:
129 |             model = self.model.module.averaged_model
130 |         else:
131 |             model = self.model.module
132 |         model.eval()
133 |         if show_pbar:
134 |             data_loader = tqdm(data_loader, desc="Evaluating", leave=False)
135 |         data_batches = []
136 |         for it, data in enumerate(data_loader):
137 |             data = self.start_of_iteration(data, current_iteration=self.current_iteration)
138 |             output = model.inference(data)
139 |             data.update(output)
140 |             data_batches.append(data)
141 |         # Aggregate the data from all devices and process the results.
142 |         data_gather = collate_test_data_batches(data_batches)
143 |         # Only the master process should process the results; slaves will just return.
144 |         if is_master():
145 |             data_all = get_unique_test_data(data_gather, data_gather["idx"])
146 |             tqdm.write(f"Evaluating with {len(data_all['idx'])} samples.")
147 |             # Validate/test.
148 |             if mode == "val":
149 |                 self._compute_loss(data_all, mode=mode)
150 |                 _ = self._get_total_loss()
151 |             if mode == "test":
152 |                 # Dump the test results for postprocessing.
153 |                 self.dump_test_results(data_all, output_dir)
154 |             return data_all
155 |         else:
156 |             return
157 | 
158 |     def dump_test_results(self, data_all, output_dir):
159 |         raise NotImplementedError
160 | 


--------------------------------------------------------------------------------
/projects/nerf/trainers/nerf.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import torch
 14 | import torch.nn.functional as torch_F
 15 | import wandb
 16 | import skvideo.io
 17 | 
 18 | from imaginaire.utils.distributed import master_only
 19 | from projects.nerf.trainers.base import BaseTrainer
 20 | from imaginaire.utils.visualization import wandb_image, preprocess_image
 21 | 
 22 | 
 23 | class Trainer(BaseTrainer):
 24 | 
 25 |     def __init__(self, cfg, is_inference=True, seed=0):
 26 |         super().__init__(cfg, is_inference=is_inference, seed=seed)
 27 |         self.batch_idx, _ = torch.meshgrid(torch.arange(cfg.data.train.batch_size),
 28 |                                            torch.arange(cfg.model.rand_rays), indexing="ij")  # [B,R]
 29 |         self.batch_idx = self.batch_idx.cuda()
 30 | 
 31 |     def _init_loss(self, cfg):
 32 |         self.criteria["render"] = self.criteria["render_fine"] = torch.nn.MSELoss()
 33 | 
 34 |     def _compute_loss(self, data, mode=None):
 35 |         if mode == "train":
 36 |             # Extract the corresponding sampled rays.
 37 |             batch_size = len(data["image"])
 38 |             image_vec = data["image"].permute(0, 2, 3, 1).view(batch_size, -1, 3)  # [B,HW,3]
 39 |             image_sampled = image_vec[self.batch_idx, data["ray_idx"]]  # [B,R,3]
 40 |             # Compute loss only on randomly sampled rays.
 41 |             self.losses["render"] = self.criteria["render"](data["rgb"], image_sampled)
 42 |             self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb"], image_sampled).log10()
 43 |             if self.cfg.model.fine_sampling:
 44 |                 self.losses["render_fine"] = self.criteria["render_fine"](data["rgb_fine"], image_sampled)
 45 |                 self.metrics["psnr_fine"] = -10 * torch_F.mse_loss(data["rgb_fine"], image_sampled).log10()
 46 |         else:
 47 |             # Compute loss on the entire image.
 48 |             self.losses["render"] = self.criteria["render"](data["rgb_map"], data["image"])
 49 |             self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb_map"], data["image"]).log10()
 50 |             if self.cfg.model.fine_sampling:
 51 |                 self.losses["render_fine"] = self.criteria["render_fine"](data["rgb_map_fine"], data["image"])
 52 |                 self.metrics["psnr_fine"] = -10 * torch_F.mse_loss(data["rgb_map_fine"], data["image"]).log10()
 53 | 
 54 |     @master_only
 55 |     def log_wandb_scalars(self, data, mode=None):
 56 |         super().log_wandb_scalars(data, mode=mode)
 57 |         scalars = {f"{mode}/PSNR/nerf": self.metrics["psnr"].detach()}
 58 |         if "render_fine" in self.losses:
 59 |             scalars.update({f"{mode}/PSNR/nerf_fine": self.metrics["psnr_fine"].detach()})
 60 |         wandb.log(scalars, step=self.current_iteration)
 61 | 
 62 |     @master_only
 63 |     def log_wandb_images(self, data, mode=None, max_samples=None):
 64 |         super().log_wandb_images(data, mode=mode, max_samples=max_samples)
 65 |         images = {f"{mode}/image_target": wandb_image(data["image"])}
 66 |         if mode == "val":
 67 |             images_error = (data["rgb_map"] - data["image"]).abs()
 68 |             images.update({
 69 |                 f"{mode}/images": wandb_image(data["rgb_map"]),
 70 |                 f"{mode}/images_error": wandb_image(images_error),
 71 |                 f"{mode}/inv_depth": wandb_image(data["inv_depth_map"]),
 72 |             })
 73 |             if self.cfg.model.fine_sampling:
 74 |                 images_error_fine = (data["rgb_map_fine"] - data["image"]).abs()
 75 |                 images.update({
 76 |                     f"{mode}/images_fine": wandb_image(data["rgb_map_fine"]),
 77 |                     f"{mode}/images_error_fine": wandb_image(images_error_fine),
 78 |                     f"{mode}/inv_depth_fine": wandb_image(data["inv_depth_map_fine"]),
 79 |                 })
 80 |         images.update({"iteration": self.current_iteration})
 81 |         images.update({"epoch": self.current_epoch})
 82 |         wandb.log(images, step=self.current_iteration)
 83 | 
 84 |     def dump_test_results(self, data_all, output_dir):
 85 |         results = dict(
 86 |             images_target=preprocess_image(data_all["images_target"]),
 87 |             image=preprocess_image(data_all["rgb_map"]),
 88 |             inv_depth=preprocess_image(data_all["inv_depth_map"]),
 89 |         )
 90 |         if self.cfg.model.fine_sampling:
 91 |             results.update(
 92 |                 image_fine=preprocess_image(data_all["rgb_map_fine"]),
 93 |                 inv_depth_fine=preprocess_image(data_all["inv_depth_map_fine"]),
 94 |             )
 95 |         # Write results as videos.
 96 |         inputdict, outputdict = self._get_ffmpeg_dicts()
 97 |         for key, image_list in results.items():
 98 |             print(f"writing video ({key})...")
 99 |             video_fname = f"{output_dir}/{key}.mp4"
100 |             video_writer = skvideo.io.FFmpegWriter(video_fname, inputdict=inputdict, outputdict=outputdict)
101 |             for image in image_list:
102 |                 image = (image * 255).byte().permute(1, 2, 0).numpy()
103 |                 video_writer.writeFrame(image)
104 |             video_writer.close()
105 | 
106 |     def _get_ffmpeg_dicts(self):
107 |         inputdict = {"-r": str(30)}
108 |         outputdict = {"-crf": str(10), "-pix_fmt": "yuv420p"}
109 |         return inputdict, outputdict
110 | 


--------------------------------------------------------------------------------
/projects/nerf/utils/misc.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import torch
14 | 
15 | from imaginaire.utils.distributed import dist_all_gather_tensor
16 | 
17 | 
18 | def collate_test_data_batches(data_batches):
19 |     """Aggregate the list of test data from all devices and process the results.
20 |     Args:
21 |         data_batches (list): List of (hierarchical) dictionaries, where leaf entries are tensors.
22 |     Returns:
23 |         data_gather (dict): (hierarchical) dictionaries, where leaf entries are concatenated tensors.
24 |     """
25 |     data_gather = dict()
26 |     for key in data_batches[0].keys():
27 |         data_list = [data[key] for data in data_batches]
28 |         if isinstance(data_batches[0][key], dict):
29 |             data_gather[key] = collate_test_data_batches(data_list)
30 |         elif isinstance(data_batches[0][key], torch.Tensor):
31 |             data_gather[key] = torch.cat(data_list, dim=0)
32 |             data_gather[key] = torch.cat(dist_all_gather_tensor(data_gather[key].contiguous()), dim=0)
33 |         else:
34 |             raise TypeError
35 |     return data_gather
36 | 
37 | 
38 | def get_unique_test_data(data_gather, idx):
39 |     """Aggregate the list of test data from all devices and process the results.
40 |     Args:
41 |         data_gather (dict): (hierarchical) dictionaries, where leaf entries are tensors.
42 |         idx (tensor): sample indices.
43 |     Returns:
44 |         data_all (dict): (hierarchical) dictionaries, where leaf entries are tensors ordered by idx.
45 |     """
46 |     data_all = dict()
47 |     for key, value in data_gather.items():
48 |         if isinstance(value, dict):
49 |             data_all[key] = get_unique_test_data(value, idx)
50 |         elif isinstance(value, torch.Tensor):
51 |             data_all[key] = []
52 |             for i in range(max(idx) + 1):
53 |                 # If multiple occurrences of the same idx, just choose the first one. If no occurrence, just ignore.
54 |                 matches = (idx == i).nonzero(as_tuple=True)[0]
55 |                 if matches.numel() != 0:
56 |                     data_all[key].append(value[matches[0]])
57 |             data_all[key] = torch.stack(data_all[key], dim=0)
58 |         else:
59 |             raise TypeError
60 |     return data_all
61 | 
62 | 
63 | def trim_test_samples(data, max_samples=None):
64 |     for key, value in data.items():
65 |         if isinstance(value, dict):
66 |             data[key] = trim_test_samples(value, max_samples=max_samples)
67 |         elif isinstance(value, torch.Tensor):
68 |             if max_samples is not None:
69 |                 data[key] = value[:max_samples]
70 |         else:
71 |             raise TypeError
72 | 


--------------------------------------------------------------------------------
/projects/nerf/utils/render.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import torch
 14 | from torch.cuda.amp import autocast
 15 | 
 16 | 
 17 | def volume_rendering_weights(ray, densities, depths, depth_far=None):
 18 |     """The volume rendering function. Details can be found in the NeRF paper.
 19 |     Args:
 20 |         ray (tensor [batch,ray,3]): The ray directions in world space.
 21 |         densities (tensor [batch,ray,samples]): The predicted volume density samples.
 22 |         depths (tensor [batch,ray,samples,1]): The corresponding depth samples.
 23 |         depth_far (tensor [batch,ray,1,1]): The farthest depth for computing the last interval.
 24 |     Returns:
 25 |         weights (tensor [batch,ray,samples,1]): The predicted weight of each sampled point along the ray (in [0,1]).
 26 |     """
 27 |     ray_length = ray.norm(dim=-1, keepdim=True)  # [B,R,1]
 28 |     if depth_far is None:
 29 |         depth_far = torch.empty_like(depths[..., :1, :]).fill_(1e10)  # [B,R,1,1]
 30 |     depths_aug = torch.cat([depths, depth_far], dim=2)  # [B,R,N+1,1]
 31 |     dists = depths_aug * ray_length[..., None]  # [B,R,N+1,1]
 32 |     # Volume rendering: compute rendering weights (using quadrature).
 33 |     dist_intvs = dists[..., 1:, 0] - dists[..., :-1, 0]  # [B,R,N]
 34 |     sigma_delta = densities * dist_intvs  # [B,R,N]
 35 |     sigma_delta_0 = torch.cat([torch.zeros_like(sigma_delta[..., :1]),
 36 |                                sigma_delta[..., :-1]], dim=2)  # [B,R,N]
 37 |     T = (-sigma_delta_0.cumsum(dim=2)).exp_()  # [B,R,N]
 38 |     alphas = 1 - (-sigma_delta).exp_()  # [B,R,N]
 39 |     # Compute weights for compositing samples.
 40 |     weights = (T * alphas)[..., None]  # [B,R,N,1]
 41 |     return weights
 42 | 
 43 | 
 44 | def volume_rendering_weights_dist(densities, dists, dist_far=None):
 45 |     """The volume rendering function. Details can be found in the NeRF paper.
 46 |     Args:
 47 |         densities (tensor [batch,ray,samples]): The predicted volume density samples.
 48 |         dists (tensor [batch,ray,samples,1]): The corresponding distance samples.
 49 |         dist_far (tensor [batch,ray,1,1]): The farthest distance for computing the last interval.
 50 |     Returns:
 51 |         weights (tensor [batch,ray,samples,1]): The predicted weight of each sampled point along the ray (in [0,1]).
 52 |     """
 53 |     # TODO: re-consolidate!!
 54 |     if dist_far is None:
 55 |         dist_far = torch.empty_like(dists[..., :1, :]).fill_(1e10)  # [B,R,1,1]
 56 |     dists = torch.cat([dists, dist_far], dim=2)  # [B,R,N+1,1]
 57 |     # Volume rendering: compute rendering weights (using quadrature).
 58 |     dist_intvs = dists[..., 1:, 0] - dists[..., :-1, 0]  # [B,R,N]
 59 |     sigma_delta = densities * dist_intvs  # [B,R,N]
 60 |     sigma_delta_0 = torch.cat([torch.zeros_like(sigma_delta[..., :1]), sigma_delta[..., :-1]], dim=2)  # [B,R,N]
 61 |     T = (-sigma_delta_0.cumsum(dim=2)).exp_()  # [B,R,N]
 62 |     alphas = 1 - (-sigma_delta).exp_()  # [B,R,N]
 63 |     # Compute weights for compositing samples.
 64 |     weights = (T * alphas)[..., None]  # [B,R,N,1]
 65 |     return weights
 66 | 
 67 | 
 68 | def volume_rendering_alphas_dist(densities, dists, dist_far=None):
 69 |     """The volume rendering function. Details can be found in the NeRF paper.
 70 |     Args:
 71 |         densities (tensor [batch,ray,samples]): The predicted volume density samples.
 72 |         dists (tensor [batch,ray,samples,1]): The corresponding distance samples.
 73 |         dist_far (tensor [batch,ray,1,1]): The farthest distance for computing the last interval.
 74 |     Returns:
 75 |         alphas (tensor [batch,ray,samples,1]): The occupancy of each sampled point along the ray (in [0,1]).
 76 |     """
 77 |     if dist_far is None:
 78 |         dist_far = torch.empty_like(dists[..., :1, :]).fill_(1e10)  # [B,R,1,1]
 79 |     dists = torch.cat([dists, dist_far], dim=2)  # [B,R,N+1,1]
 80 |     # Volume rendering: compute rendering weights (using quadrature).
 81 |     dist_intvs = dists[..., 1:, 0] - dists[..., :-1, 0]  # [B,R,N]
 82 |     sigma_delta = densities * dist_intvs  # [B,R,N]
 83 |     alphas = 1 - (-sigma_delta).exp_()  # [B,R,N]
 84 |     return alphas
 85 | 
 86 | 
 87 | def alpha_compositing_weights(alphas):
 88 |     """Alpha compositing of (sampled) MPIs given their RGBs and alphas.
 89 |     Args:
 90 |         alphas (tensor [batch,ray,samples]): The predicted opacity values.
 91 |     Returns:
 92 |         weights (tensor [batch,ray,samples,1]): The predicted weight of each MPI (in [0,1]).
 93 |     """
 94 |     alphas_front = torch.cat([torch.zeros_like(alphas[..., :1]),
 95 |                               alphas[..., :-1]], dim=2)  # [B,R,N]
 96 |     with autocast(enabled=False):  # TODO: may be unstable in some cases.
 97 |         visibility = (1 - alphas_front).cumprod(dim=2)  # [B,R,N]
 98 |     weights = (alphas * visibility)[..., None]  # [B,R,N,1]
 99 |     return weights
100 | 
101 | 
102 | def composite(quantities, weights):
103 |     """Composite the samples to render the RGB/depth/opacity of the corresponding pixels.
104 |     Args:
105 |         quantities (tensor [batch,ray,samples,k]): The quantity to be weighted summed.
106 |         weights (tensor [batch,ray,samples,1]): The predicted weight of each sampled point along the ray.
107 |     Returns:
108 |         quantity (tensor [batch,ray,k]): The expected (rendered) quantity.
109 |     """
110 |     # Integrate RGB and depth weighted by probability.
111 |     quantity = (quantities * weights).sum(dim=2)  # [B,R,K]
112 |     return quantity
113 | 


--------------------------------------------------------------------------------
/projects/nerf/utils/visualize.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import numpy as np
 14 | import torch
 15 | import matplotlib.pyplot as plt
 16 | import plotly.graph_objs as go
 17 | import k3d
 18 | 
 19 | from projects.nerf.utils import camera
 20 | 
 21 | 
 22 | def get_camera_mesh(pose, depth=1):
 23 |     vertices = torch.tensor([[-0.5, -0.5, 1],
 24 |                              [0.5, -0.5, 1],
 25 |                              [0.5, 0.5, 1],
 26 |                              [-0.5, 0.5, 1],
 27 |                              [0, 0, 0]]) * depth  # [6,3]
 28 |     faces = torch.tensor([[0, 1, 2],
 29 |                           [0, 2, 3],
 30 |                           [0, 1, 4],
 31 |                           [1, 2, 4],
 32 |                           [2, 3, 4],
 33 |                           [3, 0, 4]])  # [6,3]
 34 |     vertices = camera.cam2world(vertices[None], pose)  # [N,6,3]
 35 |     wireframe = vertices[:, [0, 1, 2, 3, 0, 4, 1, 2, 4, 3]]  # [N,10,3]
 36 |     return vertices, faces, wireframe
 37 | 
 38 | 
 39 | def merge_meshes(vertices, faces):
 40 |     mesh_N, vertex_N = vertices.shape[:2]
 41 |     faces_merged = torch.cat([faces + i * vertex_N for i in range(mesh_N)], dim=0)
 42 |     vertices_merged = vertices.view(-1, vertices.shape[-1])
 43 |     return vertices_merged, faces_merged
 44 | 
 45 | 
 46 | def merge_wireframes_k3d(wireframe):
 47 |     wf_first, wf_last, wf_dummy = wireframe[:, :1], wireframe[:, -1:], wireframe[:, :1] * np.nan
 48 |     wireframe_merged = torch.cat([wf_first, wireframe, wf_last, wf_dummy], dim=1)
 49 |     return wireframe_merged
 50 | 
 51 | 
 52 | def merge_wireframes_plotly(wireframe):
 53 |     wf_dummy = wireframe[:, :1] * np.nan
 54 |     wireframe_merged = torch.cat([wireframe, wf_dummy], dim=1).view(-1, 3)
 55 |     return wireframe_merged
 56 | 
 57 | 
 58 | def get_xyz_indicators(pose, length=0.1):
 59 |     xyz = torch.eye(4, 3)[None] * length
 60 |     xyz = camera.cam2world(xyz, pose)
 61 |     return xyz
 62 | 
 63 | 
 64 | def merge_xyz_indicators_k3d(xyz):  # [N,4,3]
 65 |     xyz = xyz[:, [[-1, 0], [-1, 1], [-1, 2]]]  # [N,3,2,3]
 66 |     xyz_0, xyz_1 = xyz.unbind(dim=2)  # [N,3,3]
 67 |     xyz_dummy = xyz_0 * np.nan
 68 |     xyz_merged = torch.stack([xyz_0, xyz_0, xyz_1, xyz_1, xyz_dummy], dim=2)  # [N,3,5,3]
 69 |     return xyz_merged
 70 | 
 71 | 
 72 | def merge_xyz_indicators_plotly(xyz):  # [N,4,3]
 73 |     xyz = xyz[:, [[-1, 0], [-1, 1], [-1, 2]]]  # [N,3,2,3]
 74 |     xyz_0, xyz_1 = xyz.unbind(dim=2)  # [N,3,3]
 75 |     xyz_dummy = xyz_0 * np.nan
 76 |     xyz_merged = torch.stack([xyz_0, xyz_1, xyz_dummy], dim=2)  # [N,3,3,3]
 77 |     xyz_merged = xyz_merged.view(-1, 3)
 78 |     return xyz_merged
 79 | 
 80 | 
 81 | def k3d_visualize_pose(poses, vis_depth=0.5, xyz_length=0.1, center_size=0.1, xyz_width=0.02, mesh_opacity=0.05):
 82 |     # poses has shape [N,3,4] potentially in sequential order
 83 |     N = len(poses)
 84 |     centers_cam = torch.zeros(N, 1, 3)
 85 |     centers_world = camera.cam2world(centers_cam, poses)
 86 |     centers_world = centers_world[:, 0]
 87 |     # Get the camera wireframes.
 88 |     vertices, faces, wireframe = get_camera_mesh(poses, depth=vis_depth)
 89 |     xyz = get_xyz_indicators(poses, length=xyz_length)
 90 |     vertices_merged, faces_merged = merge_meshes(vertices, faces)
 91 |     wireframe_merged = merge_wireframes_k3d(wireframe)
 92 |     xyz_merged = merge_xyz_indicators_k3d(xyz)
 93 |     # Set the color map for the camera trajectory and the xyz indicators.
 94 |     color_map = plt.get_cmap("gist_rainbow")
 95 |     center_color = []
 96 |     vertices_merged_color = []
 97 |     wireframe_color = []
 98 |     xyz_color = []
 99 |     x_hex, y_hex, z_hex = int(255) << 16, int(255) << 8, int(255)
100 |     for i in range(N):
101 |         # Set the camera pose colors (with a smooth gradient color map).
102 |         r, g, b, _ = color_map(i / (N - 1))
103 |         r, g, b = r * 0.8, g * 0.8, b * 0.8
104 |         pose_rgb_hex = (int(r * 255) << 16) + (int(g * 255) << 8) + int(b * 255)
105 |         center_color += [pose_rgb_hex]
106 |         vertices_merged_color += [pose_rgb_hex] * 5
107 |         wireframe_color += [pose_rgb_hex] * 13
108 |         # Set the xyz indicator colors.
109 |         xyz_color += [x_hex] * 5 + [y_hex] * 5 + [z_hex] * 5
110 |     # Plot in K3D.
111 |     k3d_objects = [
112 |         k3d.points(centers_world, colors=center_color, point_size=center_size, shader="3d"),
113 |         k3d.mesh(vertices_merged, faces_merged, colors=vertices_merged_color, side="double", opacity=mesh_opacity),
114 |         k3d.line(wireframe_merged, colors=wireframe_color, shader="simple"),
115 |         k3d.line(xyz_merged, colors=xyz_color, shader="thick", width=xyz_width),
116 |     ]
117 |     return k3d_objects
118 | 
119 | 
120 | def plotly_visualize_pose(poses, vis_depth=0.5, xyz_length=0.5, center_size=2, xyz_width=5, mesh_opacity=0.05):
121 |     # poses has shape [N,3,4] potentially in sequential order
122 |     N = len(poses)
123 |     centers_cam = torch.zeros(N, 1, 3)
124 |     centers_world = camera.cam2world(centers_cam, poses)
125 |     centers_world = centers_world[:, 0]
126 |     # Get the camera wireframes.
127 |     vertices, faces, wireframe = get_camera_mesh(poses, depth=vis_depth)
128 |     xyz = get_xyz_indicators(poses, length=xyz_length)
129 |     vertices_merged, faces_merged = merge_meshes(vertices, faces)
130 |     wireframe_merged = merge_wireframes_plotly(wireframe)
131 |     xyz_merged = merge_xyz_indicators_plotly(xyz)
132 |     # Break up (x,y,z) coordinates.
133 |     wireframe_x, wireframe_y, wireframe_z = wireframe_merged.unbind(dim=-1)
134 |     xyz_x, xyz_y, xyz_z = xyz_merged.unbind(dim=-1)
135 |     centers_x, centers_y, centers_z = centers_world.unbind(dim=-1)
136 |     vertices_x, vertices_y, vertices_z = vertices_merged.unbind(dim=-1)
137 |     # Set the color map for the camera trajectory and the xyz indicators.
138 |     color_map = plt.get_cmap("gist_rainbow")
139 |     center_color = []
140 |     faces_merged_color = []
141 |     wireframe_color = []
142 |     xyz_color = []
143 |     x_color, y_color, z_color = *np.eye(3).T,
144 |     for i in range(N):
145 |         # Set the camera pose colors (with a smooth gradient color map).
146 |         r, g, b, _ = color_map(i / (N - 1))
147 |         rgb = np.array([r, g, b]) * 0.8
148 |         wireframe_color += [rgb] * 11
149 |         center_color += [rgb]
150 |         faces_merged_color += [rgb] * 6
151 |         xyz_color += [x_color] * 3 + [y_color] * 3 + [z_color] * 3
152 |     # Plot in plotly.
153 |     plotly_traces = [
154 |         go.Scatter3d(x=wireframe_x, y=wireframe_y, z=wireframe_z, mode="lines",
155 |                      line=dict(color=wireframe_color, width=1)),
156 |         go.Scatter3d(x=xyz_x, y=xyz_y, z=xyz_z, mode="lines", line=dict(color=xyz_color, width=xyz_width)),
157 |         go.Scatter3d(x=centers_x, y=centers_y, z=centers_z, mode="markers",
158 |                      marker=dict(color=center_color, size=center_size, opacity=1)),
159 |         go.Mesh3d(x=vertices_x, y=vertices_y, z=vertices_z,
160 |                   i=[f[0] for f in faces_merged], j=[f[1] for f in faces_merged], k=[f[2] for f in faces_merged],
161 |                   facecolor=faces_merged_color, opacity=mesh_opacity),
162 |     ]
163 |     return plotly_traces
164 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/configs/base.yaml:
--------------------------------------------------------------------------------
  1 | # -----------------------------------------------------------------------------
  2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
  5 | # and proprietary rights in and to this software, related documentation
  6 | # and any modifications thereto. Any use, reproduction, disclosure or
  7 | # distribution of this software and related documentation without an express
  8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
  9 | # -----------------------------------------------------------------------------
 10 | 
 11 | logging_iter: 9999999999999  # disable the printing logger
 12 | 
 13 | max_iter: 500000
 14 | 
 15 | wandb_scalar_iter: 100
 16 | wandb_image_iter: 10000
 17 | validation_iter: 5000
 18 | speed_benchmark: False
 19 | 
 20 | checkpoint:
 21 |     save_iter: 20000
 22 | 
 23 | trainer:
 24 |     type: projects.neuralangelo.trainer
 25 |     ema_config:
 26 |         enabled: False
 27 |         load_ema_checkpoint: False
 28 |     loss_weight:
 29 |         render: 1.0
 30 |         eikonal: 0.1
 31 |         curvature: 5e-4
 32 |     init:
 33 |         type: none
 34 |     amp_config:
 35 |         enabled: False
 36 |     depth_vis_scale: 0.5
 37 | 
 38 | model:
 39 |     type: projects.neuralangelo.model
 40 |     object:
 41 |         sdf:
 42 |             mlp:
 43 |                 num_layers: 1
 44 |                 hidden_dim: 256
 45 |                 skip: []
 46 |                 activ: softplus
 47 |                 activ_params:
 48 |                     beta: 100
 49 |                 geometric_init: True
 50 |                 weight_norm: True
 51 |                 out_bias: 0.5
 52 |                 inside_out: False
 53 |             encoding:
 54 |                 type: hashgrid
 55 |                 levels: 16
 56 |                 hashgrid:
 57 |                     min_logres: 5
 58 |                     max_logres: 11
 59 |                     dict_size: 22
 60 |                     dim: 8
 61 |                     range: [-2,2]
 62 |                 coarse2fine:
 63 |                     enabled: True
 64 |                     init_active_level: 4
 65 |                     step: 5000
 66 |             gradient:
 67 |                 mode: numerical
 68 |                 taps: 4
 69 |         rgb:
 70 |             mlp:
 71 |                 num_layers: 4
 72 |                 hidden_dim: 256
 73 |                 skip: []
 74 |                 activ: relu_
 75 |                 activ_params: {}
 76 |                 weight_norm: True
 77 |             mode: idr
 78 |             encoding_view:
 79 |                 type: spherical
 80 |                 levels: 3
 81 |         s_var:
 82 |             init_val: 3.
 83 |             anneal_end: 0.1
 84 |     background:
 85 |         enabled: True
 86 |         white: False
 87 |         mlp:
 88 |             num_layers: 8
 89 |             hidden_dim: 256
 90 |             skip: [4]
 91 |             num_layers_rgb: 2
 92 |             hidden_dim_rgb: 128
 93 |             skip_rgb: []
 94 |             activ: relu
 95 |             activ_params: {}
 96 |             activ_density: softplus
 97 |             activ_density_params: {}
 98 |         view_dep: True
 99 |         encoding:
100 |             type: fourier
101 |             levels: 10
102 |         encoding_view:
103 |             type: spherical
104 |             levels: 3
105 |     render:
106 |         rand_rays: 512
107 |         num_samples:
108 |             coarse: 64
109 |             fine: 16
110 |             background: 32
111 |         num_sample_hierarchy: 4
112 |         stratified: True
113 |     appear_embed:
114 |         enabled: False
115 |         dim: 8
116 | 
117 | optim:
118 |     type: AdamW
119 |     params:
120 |         lr: 1e-3
121 |         weight_decay: 1e-2
122 |     sched:
123 |         iteration_mode: True
124 |         type: two_steps_with_warmup
125 |         warm_up_end: 5000
126 |         two_steps: [300000,400000]
127 |         gamma: 10.0
128 | 
129 | data:
130 |     type: projects.nerf.datasets.nerf_blender
131 |     root: datasets/nerf-synthetic/lego
132 |     use_multi_epoch_loader: True
133 |     num_workers: 4
134 |     preload: True
135 |     num_images:  # The number of training images.
136 |     train:
137 |         image_size: [800,800]
138 |         batch_size: 2
139 |         subset:
140 |     val:
141 |         image_size: [400,400]
142 |         batch_size: 2
143 |         subset: 4
144 |         max_viz_samples: 16
145 |     readjust:
146 |         center: [0.,0.,0.]
147 |         scale: 1.
148 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/configs/custom/template.yaml:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | # NOTE: this template is an example for designing your custom config file. It is not intended to be used directly.
12 | # Please preprocess your custom data and auto-generate the config file before customizing.
13 | 
14 | _parent_: projects/neuralangelo/configs/base.yaml
15 | 
16 | model:
17 |     object:
18 |         sdf:
19 |             mlp:
20 |                 inside_out: False
21 |             encoding:
22 |                 coarse2fine:
23 |                     init_active_level: 8
24 |     appear_embed:
25 |         enabled: True
26 |         dim: 8
27 | 
28 | data:
29 |     type: projects.neuralangelo.data
30 |     root:  # The root path of the dataset.
31 |     num_images:  # The number of training images.
32 |     train:
33 |         image_size: [1200,1600]
34 |         batch_size: 1
35 |         subset:
36 |     val:
37 |         image_size: [300,400]
38 |         batch_size: 1
39 |         subset: 1
40 |         max_viz_samples:
41 |     readjust:
42 |         center: [0.,0.,0.]
43 |         scale: 1.
44 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/configs/dtu.yaml:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | _parent_: projects/neuralangelo/configs/base.yaml
12 | 
13 | model:
14 |     object:
15 |         sdf:
16 |             mlp:
17 |                 inside_out: False
18 |             encoding:
19 |                 coarse2fine:
20 |                     init_active_level: 4
21 |         s_var:
22 |             init_val: 1.4
23 |     appear_embed:
24 |         enabled: False
25 | 
26 | data:
27 |     type: projects.neuralangelo.data
28 |     root: datasets/dtu/dtu_scan24
29 |     train:
30 |         image_size: [1200,1600]
31 |         batch_size: 1
32 |         subset:
33 |     val:
34 |         image_size: [300,400]
35 |         batch_size: 1
36 |         subset: 1
37 |         max_viz_samples: 16
38 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/configs/tnt.yaml:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | _parent_: projects/neuralangelo/configs/base.yaml
12 | 
13 | model:
14 |     object:
15 |         sdf:
16 |             mlp:
17 |                 inside_out: False   # True for Meetingroom.
18 |             encoding:
19 |                 coarse2fine:
20 |                     init_active_level: 8
21 |     appear_embed:
22 |         enabled: True
23 |         dim: 8
24 | 
25 | data:
26 |     type: projects.neuralangelo.data
27 |     root: datasets/tanks_and_temples/Barn
28 |     num_images: 410  # The number of training images.
29 |     train:
30 |         image_size: [835,1500]
31 |         batch_size: 1
32 |         subset:
33 |     val:
34 |         image_size: [300,540]
35 |         batch_size: 1
36 |         subset: 1
37 |         max_viz_samples: 16
38 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/data.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import json
 14 | import numpy as np
 15 | import torch
 16 | import torchvision.transforms.functional as torchvision_F
 17 | from PIL import Image, ImageFile
 18 | 
 19 | from projects.nerf.datasets import base
 20 | from projects.nerf.utils import camera
 21 | 
 22 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 23 | 
 24 | 
 25 | class Dataset(base.Dataset):
 26 | 
 27 |     def __init__(self, cfg, is_inference=False):
 28 |         super().__init__(cfg, is_inference=is_inference, is_test=False)
 29 |         cfg_data = cfg.data
 30 |         self.root = cfg_data.root
 31 |         self.preload = cfg_data.preload
 32 |         self.H, self.W = cfg_data.val.image_size if is_inference else cfg_data.train.image_size
 33 |         meta_fname = f"{cfg_data.root}/transforms.json"
 34 |         with open(meta_fname) as file:
 35 |             self.meta = json.load(file)
 36 |         self.list = self.meta["frames"]
 37 |         if cfg_data[self.split].subset:
 38 |             subset = cfg_data[self.split].subset
 39 |             subset_idx = np.linspace(0, len(self.list), subset+1)[:-1].astype(int)
 40 |             self.list = [self.list[i] for i in subset_idx]
 41 |         self.num_rays = cfg.model.render.rand_rays
 42 |         self.readjust = getattr(cfg_data, "readjust", None)
 43 |         # Preload dataset if possible.
 44 |         if cfg_data.preload:
 45 |             self.images = self.preload_threading(self.get_image, cfg_data.num_workers)
 46 |             self.cameras = self.preload_threading(self.get_camera, cfg_data.num_workers, data_str="cameras")
 47 | 
 48 |     def __getitem__(self, idx):
 49 |         """Process raw data and return processed data in a dictionary.
 50 | 
 51 |         Args:
 52 |             idx: The index of the sample of the dataset.
 53 |         Returns: A dictionary containing the data.
 54 |                  idx (scalar): The index of the sample of the dataset.
 55 |                  image (R tensor): Image idx for per-image embedding.
 56 |                  image (Rx3 tensor): Image with pixel values in [0,1] for supervision.
 57 |                  intr (3x3 tensor): The camera intrinsics of `image`.
 58 |                  pose (3x4 tensor): The camera extrinsics [R,t] of `image`.
 59 |         """
 60 |         # Keep track of sample index for convenience.
 61 |         sample = dict(idx=idx)
 62 |         # Get the images.
 63 |         image, image_size_raw = self.images[idx] if self.preload else self.get_image(idx)
 64 |         image = self.preprocess_image(image)
 65 |         # Get the cameras (intrinsics and pose).
 66 |         intr, pose = self.cameras[idx] if self.preload else self.get_camera(idx)
 67 |         intr, pose = self.preprocess_camera(intr, pose, image_size_raw)
 68 |         # Pre-sample ray indices.
 69 |         if self.split == "train":
 70 |             ray_idx = torch.randperm(self.H * self.W)[:self.num_rays]  # [R]
 71 |             image_sampled = image.flatten(1, 2)[:, ray_idx].t()  # [R,3]
 72 |             sample.update(
 73 |                 ray_idx=ray_idx,
 74 |                 image_sampled=image_sampled,
 75 |                 intr=intr,
 76 |                 pose=pose,
 77 |             )
 78 |         else:  # keep image during inference
 79 |             sample.update(
 80 |                 image=image,
 81 |                 intr=intr,
 82 |                 pose=pose,
 83 |             )
 84 |         return sample
 85 | 
 86 |     def get_image(self, idx):
 87 |         fpath = self.list[idx]["file_path"]
 88 |         image_fname = f"{self.root}/{fpath}"
 89 |         image = Image.open(image_fname)
 90 |         image.load()
 91 |         image_size_raw = image.size
 92 |         return image, image_size_raw
 93 | 
 94 |     def preprocess_image(self, image):
 95 |         # Resize the image.
 96 |         image = image.resize((self.W, self.H))
 97 |         image = torchvision_F.to_tensor(image)
 98 |         rgb = image[:3]
 99 |         return rgb
100 | 
101 |     def get_camera(self, idx):
102 |         # Camera intrinsics.
103 |         intr = torch.tensor([[self.meta["fl_x"], self.meta["sk_x"], self.meta["cx"]],
104 |                              [self.meta["sk_y"], self.meta["fl_y"], self.meta["cy"]],
105 |                              [0, 0, 1]]).float()
106 |         # Camera pose.
107 |         c2w_gl = torch.tensor(self.list[idx]["transform_matrix"], dtype=torch.float32)
108 |         c2w = self._gl_to_cv(c2w_gl)
109 |         # center scene
110 |         center = np.array(self.meta["sphere_center"])
111 |         center += np.array(getattr(self.readjust, "center", [0])) if self.readjust else 0.
112 |         c2w[:3, -1] -= center
113 |         # scale scene
114 |         scale = np.array(self.meta["sphere_radius"])
115 |         scale *= getattr(self.readjust, "scale", 1.) if self.readjust else 1.
116 |         c2w[:3, -1] /= scale
117 |         w2c = camera.Pose().invert(c2w[:3])
118 |         return intr, w2c
119 | 
120 |     def preprocess_camera(self, intr, pose, image_size_raw):
121 |         # Adjust the intrinsics according to the resized image.
122 |         intr = intr.clone()
123 |         raw_W, raw_H = image_size_raw
124 |         intr[0] *= self.W / raw_W
125 |         intr[1] *= self.H / raw_H
126 |         return intr, pose
127 | 
128 |     def _gl_to_cv(self, gl):
129 |         # convert to CV convention used in Imaginaire
130 |         cv = gl * torch.tensor([1, -1, -1, 1])
131 |         return cv
132 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/convert_dtu_to_json.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import numpy as np
 14 | import json
 15 | from argparse import ArgumentParser
 16 | import os
 17 | import cv2
 18 | from PIL import Image, ImageFile
 19 | from glob import glob
 20 | import math
 21 | import sys
 22 | from pathlib import Path
 23 | 
 24 | 
 25 | dir_path = Path(os.path.dirname(os.path.realpath(__file__))).parents[2]
 26 | sys.path.append(dir_path.__str__())
 27 | from projects.neuralangelo.scripts.convert_data_to_json import _cv_to_gl  # noqa: E402
 28 | 
 29 | ImageFile.LOAD_TRUNCATED_IMAGES = True
 30 | 
 31 | 
 32 | def load_K_Rt_from_P(filename, P=None):
 33 |     # This function is borrowed from IDR: https://github.com/lioryariv/idr
 34 |     if P is None:
 35 |         lines = open(filename).read().splitlines()
 36 |         if len(lines) == 4:
 37 |             lines = lines[1:]
 38 |         lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)]
 39 |         P = np.asarray(lines).astype(np.float32).squeeze()
 40 | 
 41 |     out = cv2.decomposeProjectionMatrix(P)
 42 |     K = out[0]
 43 |     R = out[1]
 44 |     t = out[2]
 45 | 
 46 |     K = K / K[2, 2]
 47 |     intrinsics = np.eye(4)
 48 |     intrinsics[:3, :3] = K
 49 | 
 50 |     pose = np.eye(4, dtype=np.float32)
 51 |     pose[:3, :3] = R.transpose()
 52 |     pose[:3, 3] = (t[:3] / t[3])[:, 0]
 53 | 
 54 |     return intrinsics, pose
 55 | 
 56 | 
 57 | def dtu_to_json(args):
 58 |     assert args.dtu_path, "Provide path to DTU dataset"
 59 |     scene_list = os.listdir(args.dtu_path)
 60 | 
 61 |     for scene in scene_list:
 62 |         scene_path = os.path.join(args.dtu_path, scene)
 63 |         if not os.path.isdir(scene_path) or 'scan' not in scene:
 64 |             continue
 65 | 
 66 |         out = {
 67 |             "k1": 0.0,  # take undistorted images only
 68 |             "k2": 0.0,
 69 |             "k3": 0.0,
 70 |             "k4": 0.0,
 71 |             "p1": 0.0,
 72 |             "p2": 0.0,
 73 |             "is_fisheye": False,
 74 |             "frames": []
 75 |         }
 76 | 
 77 |         camera_param = dict(np.load(os.path.join(scene_path, 'cameras_sphere.npz')))
 78 |         images_lis = sorted(glob(os.path.join(scene_path, 'image/*.png')))
 79 |         for idx, image in enumerate(images_lis):
 80 |             image = os.path.basename(image)
 81 | 
 82 |             world_mat = camera_param['world_mat_%d' % idx]
 83 |             scale_mat = camera_param['scale_mat_%d' % idx]
 84 | 
 85 |             # scale and decompose
 86 |             P = world_mat @ scale_mat
 87 |             P = P[:3, :4]
 88 |             intrinsic_param, c2w = load_K_Rt_from_P(None, P)
 89 |             c2w_gl = _cv_to_gl(c2w)
 90 | 
 91 |             frame = {"file_path": 'image/' + image, "transform_matrix": c2w_gl.tolist()}
 92 |             out["frames"].append(frame)
 93 | 
 94 |         fl_x = intrinsic_param[0][0]
 95 |         fl_y = intrinsic_param[1][1]
 96 |         cx = intrinsic_param[0][2]
 97 |         cy = intrinsic_param[1][2]
 98 |         sk_x = intrinsic_param[0][1]
 99 |         sk_y = intrinsic_param[1][0]
100 |         w, h = Image.open(os.path.join(scene_path, 'image', image)).size
101 | 
102 |         angle_x = math.atan(w / (fl_x * 2)) * 2
103 |         angle_y = math.atan(h / (fl_y * 2)) * 2
104 | 
105 |         scale_mat = scale_mat.astype(float)
106 | 
107 |         out.update({
108 |             "camera_angle_x": angle_x,
109 |             "camera_angle_y": angle_y,
110 |             "fl_x": fl_x,
111 |             "fl_y": fl_y,
112 |             "cx": cx,
113 |             "cy": cy,
114 |             "sk_x": sk_x,
115 |             "sk_y": sk_y,
116 |             "w": int(w),
117 |             "h": int(h),
118 |             "aabb_scale": np.exp2(np.rint(np.log2(scale_mat[0, 0]))),  # power of two, for INGP resolution computation
119 |             "sphere_center": [0., 0., 0.],
120 |             "sphere_radius": 1.,
121 |         })
122 | 
123 |         file_path = os.path.join(scene_path, 'transforms.json')
124 |         with open(file_path, "w") as outputfile:
125 |             json.dump(out, outputfile, indent=2)
126 |         print('Writing data to json file: ', file_path)
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     parser = ArgumentParser()
131 |     parser.add_argument('--dtu_path', type=str, default=None)
132 | 
133 |     args = parser.parse_args()
134 | 
135 |     dtu_to_json(args)
136 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/extract_mesh.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import argparse
 14 | import json
 15 | import os
 16 | import sys
 17 | import numpy as np
 18 | from functools import partial
 19 | 
 20 | sys.path.append(os.getcwd())
 21 | from imaginaire.config import Config, recursive_update_strict, parse_cmdline_arguments  # noqa: E402
 22 | from imaginaire.utils.distributed import init_dist, get_world_size, is_master, master_only_print as print  # noqa: E402
 23 | from imaginaire.utils.gpu_affinity import set_affinity  # noqa: E402
 24 | from imaginaire.trainers.utils.get_trainer import get_trainer  # noqa: E402
 25 | from projects.neuralangelo.utils.mesh import extract_mesh, extract_texture  # noqa: E402
 26 | 
 27 | 
 28 | def parse_args():
 29 |     parser = argparse.ArgumentParser(description="Training")
 30 |     parser.add_argument("--config", required=True, help="Path to the training config file.")
 31 |     parser.add_argument("--checkpoint", default="", help="Checkpoint path.")
 32 |     parser.add_argument('--local_rank', type=int, default=os.getenv('LOCAL_RANK', 0))
 33 |     parser.add_argument('--single_gpu', action='store_true')
 34 |     parser.add_argument("--resolution", default=512, type=int, help="Marching cubes resolution")
 35 |     parser.add_argument("--block_res", default=64, type=int, help="Block-wise resolution for marching cubes")
 36 |     parser.add_argument("--output_file", default="mesh.ply", type=str, help="Output file name")
 37 |     parser.add_argument("--textured", action="store_true", help="Export mesh with texture")
 38 |     parser.add_argument("--keep_lcc", action="store_true",
 39 |                         help="Keep only largest connected component. May remove thin structures.")
 40 |     args, cfg_cmd = parser.parse_known_args()
 41 |     return args, cfg_cmd
 42 | 
 43 | 
 44 | def main():
 45 |     args, cfg_cmd = parse_args()
 46 |     set_affinity(args.local_rank)
 47 |     cfg = Config(args.config)
 48 | 
 49 |     cfg_cmd = parse_cmdline_arguments(cfg_cmd)
 50 |     recursive_update_strict(cfg, cfg_cmd)
 51 | 
 52 |     # If args.single_gpu is set to True, we will disable distributed data parallel.
 53 |     if not args.single_gpu:
 54 |         # this disables nccl timeout
 55 |         os.environ["NCLL_BLOCKING_WAIT"] = "0"
 56 |         os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "0"
 57 |         cfg.local_rank = args.local_rank
 58 |         init_dist(cfg.local_rank, rank=-1, world_size=-1)
 59 |     print(f"Running mesh extraction with {get_world_size()} GPUs.")
 60 | 
 61 |     cfg.logdir = ''
 62 | 
 63 |     # Initialize data loaders and models.
 64 |     trainer = get_trainer(cfg, is_inference=True, seed=0)
 65 |     # Load checkpoint.
 66 |     trainer.checkpointer.load(args.checkpoint, load_opt=False, load_sch=False)
 67 |     trainer.model.eval()
 68 | 
 69 |     # Set the coarse-to-fine levels.
 70 |     trainer.current_iteration = trainer.checkpointer.eval_iteration
 71 |     if cfg.model.object.sdf.encoding.coarse2fine.enabled:
 72 |         trainer.model_module.neural_sdf.set_active_levels(trainer.current_iteration)
 73 |         if cfg.model.object.sdf.gradient.mode == "numerical":
 74 |             trainer.model_module.neural_sdf.set_normal_epsilon()
 75 | 
 76 |     meta_fname = f"{cfg.data.root}/transforms.json"
 77 |     with open(meta_fname) as file:
 78 |         meta = json.load(file)
 79 | 
 80 |     if "aabb_range" in meta:
 81 |         bounds = (np.array(meta["aabb_range"]) - np.array(meta["sphere_center"])[..., None]) / meta["sphere_radius"]
 82 |     else:
 83 |         bounds = np.array([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]])
 84 | 
 85 |     sdf_func = lambda x: -trainer.model_module.neural_sdf.sdf(x)  # noqa: E731
 86 |     texture_func = partial(extract_texture, neural_sdf=trainer.model_module.neural_sdf,
 87 |                            neural_rgb=trainer.model_module.neural_rgb,
 88 |                            appear_embed=trainer.model_module.appear_embed) if args.textured else None
 89 |     mesh = extract_mesh(sdf_func=sdf_func, bounds=bounds, intv=(2.0 / args.resolution),
 90 |                         block_res=args.block_res, texture_func=texture_func, filter_lcc=args.keep_lcc)
 91 | 
 92 |     if is_master():
 93 |         print(f"vertices: {len(mesh.vertices)}")
 94 |         print(f"faces: {len(mesh.faces)}")
 95 |         if args.textured:
 96 |             print(f"colors: {len(mesh.visual.vertex_colors)}")
 97 |         # center and scale
 98 |         mesh.vertices = mesh.vertices * meta["sphere_radius"] + np.array(meta["sphere_center"])
 99 |         mesh.update_faces(mesh.nondegenerate_faces())
100 |         os.makedirs(os.path.dirname(args.output_file), exist_ok=True)
101 |         mesh.export(args.output_file)
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     main()
106 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/generate_config.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import os
14 | import sys
15 | from argparse import ArgumentParser
16 | from pathlib import Path
17 | import yaml
18 | from addict import Dict
19 | from PIL import Image, ImageFile
20 | 
21 | dir_path = Path(os.path.dirname(os.path.realpath(__file__))).parents[2]
22 | sys.path.append(dir_path.__str__())
23 | 
24 | ImageFile.LOAD_TRUNCATED_IMAGES = True
25 | 
26 | 
27 | def generate_config(args):
28 |     cfg = Dict()
29 |     cfg._parent_ = "projects/neuralangelo/configs/base.yaml"
30 |     num_images = len(os.listdir(os.path.join(args.data_dir, "images")))
31 |     # model cfg
32 |     if args.auto_exposure_wb:
33 |         cfg.data.num_images = num_images
34 |         cfg.model.appear_embed.enabled = True
35 |         cfg.model.appear_embed.dim = 8
36 |         if num_images < 4:  # default is 4
37 |             cfg.data.val.subset = num_images
38 |     else:
39 |         cfg.model.appear_embed.enabled = False
40 |     if args.scene_type == "outdoor":
41 |         cfg.model.object.sdf.mlp.inside_out = False
42 |         cfg.model.object.sdf.encoding.coarse2fine.init_active_level = 8
43 |     elif args.scene_type == "indoor":
44 |         cfg.model.object.sdf.mlp.inside_out = True
45 |         cfg.model.object.sdf.encoding.coarse2fine.init_active_level = 8
46 |         cfg.model.background.enabled = False
47 |         cfg.model.render.num_samples.background = 0
48 |     elif args.scene_type == "object":
49 |         cfg.model.object.sdf.mlp.inside_out = False
50 |         cfg.model.object.sdf.encoding.coarse2fine.init_active_level = 4
51 |     else:
52 |         raise TypeError("Unknown scene type")
53 |     # data config
54 |     cfg.data.type = "projects.neuralangelo.data"
55 |     cfg.data.root = args.data_dir
56 |     img = Image.open(os.path.join(args.data_dir, "images", os.listdir(os.path.join(args.data_dir, "images"))[0]))
57 |     w, h = img.size
58 |     cfg.data.train.image_size = [h, w]
59 |     short_size = args.val_short_size
60 |     cfg.data.val.image_size = [short_size, int(w/h*short_size)] if w > h else [int(h/w*short_size), short_size]
61 |     cfg.data.readjust.center = [0., 0., 0.]
62 |     cfg.data.readjust.scale = 1.
63 |     # export cfg
64 |     cfg_fname = os.path.join(dir_path, "projects/neuralangelo/configs", f"custom/{args.sequence_name}.yaml")
65 |     with open(cfg_fname, "w") as file:
66 |         yaml.safe_dump(cfg.to_dict(), file, default_flow_style=False, indent=4)
67 |     print("Config generated to file:", cfg_fname)
68 |     return
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     parser = ArgumentParser()
73 |     parser.add_argument("--sequence_name", type=str, default="recon", help="Name of sequence")
74 |     parser.add_argument("--data_dir", type=str, default=None, help="Path to data")
75 |     parser.add_argument("--auto_exposure_wb", action="store_true",
76 |                         help="Video capture with auto-exposure or white-balance")
77 |     parser.add_argument("--scene_type", type=str, default="outdoor", choices=["outdoor", "indoor", "object"],
78 |                         help="Select scene type. Outdoor for building-scale reconstruction; "
79 |                              "indoor for room-scale reconstruction; object for object-centric scene reconstruction.")
80 |     parser.add_argument("--val_short_size", type=int, default=300,
81 |                         help="Set the short side of validation images (for saving compute when rendering val images)")
82 |     args = parser.parse_args()
83 |     generate_config(args)
84 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/preprocess.sh:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | # usage: preprocess.sh <sequence_name> <full_video_path> <downsample_rate> <scene_type>
12 | 
13 | data_path=datasets/${1}_ds${3}
14 | bash projects/neuralangelo/scripts/run_ffmpeg.sh ${1} ${2} ${3}
15 | bash projects/neuralangelo/scripts/run_colmap.sh ${data_path}
16 | python3 projects/neuralangelo/scripts/convert_data_to_json.py --data_dir ${data_path} --scene_type ${4}
17 | python3 projects/neuralangelo/scripts/generate_config.py --sequence_name ${1} --data_dir ${data_path} --scene_type ${4}
18 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/preprocess_dtu.sh:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | # usage: dtu_download.sh <path_to_dtu>
12 | 
13 | echo "Download DTU data"
14 | mkdir -p "${1}"
15 | curl -L -o data.zip https://www.dropbox.com/sh/w0y8bbdmxzik3uk/AAAaZffBiJevxQzRskoOYcyja?dl=1
16 | unzip data.zip "data_DTU.zip"
17 | rm data.zip
18 | unzip -q data_DTU.zip -d ${1}
19 | rm data_DTU.zip
20 | echo "Generate json files"
21 | python3 projects/neuralangelo/scripts/convert_dtu_to_json.py --dtu_path ${1}
22 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/preprocess_tnt.sh:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | # usage: tnt_download.sh <path_to_tnt>
12 | 
13 | echo "Download fixed poses for Courthouse"
14 | gdown 10pcCwaQY6hqyiegJGdgmLp_HMFOnsmgq
15 | gdown 19TT5aTz3z60eUVIDhFJ7EgGqpcqQnJEi
16 | mv Courthouse_COLMAP_SfM.log ${1}/Courthouse/Courthouse_COLMAP_SfM.log
17 | mv Courthouse_trans.txt ${1}/Courthouse/Courthouse_trans.txt
18 | 
19 | echo "Compute intrinsics, undistort images and generate json files. This may take a while"
20 | python3 projects/neuralangelo/scripts/convert_tnt_to_json.py --tnt_path ${1}
21 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/run_colmap.sh:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | # usage: run_colmap.sh <project_path>
12 | 
13 | colmap feature_extractor \
14 |     --database_path=${1}/database.db \
15 |     --image_path=${1}/images_raw \
16 |     --ImageReader.camera_model=SIMPLE_RADIAL \
17 |     --ImageReader.single_camera=true \
18 |     --SiftExtraction.use_gpu=true \
19 |     --SiftExtraction.num_threads=32
20 | 
21 | colmap sequential_matcher \
22 |     --database_path=${1}/database.db \
23 |     --SiftMatching.use_gpu=true
24 | 
25 | mkdir -p ${1}/sparse
26 | colmap mapper \
27 |     --database_path=${1}/database.db \
28 |     --image_path=${1}/images_raw \
29 |     --output_path=${1}/sparse
30 | 
31 | cp ${1}/sparse/0/*.bin ${1}/sparse/
32 | for path in ${1}/sparse/*/; do
33 |     m=$(basename ${path})
34 |     if [ ${m} != "0" ]; then
35 |         colmap model_merger \
36 |             --input_path1=${1}/sparse \
37 |             --input_path2=${1}/sparse/${m} \
38 |             --output_path=${1}/sparse
39 |         colmap bundle_adjuster \
40 |             --input_path=${1}/sparse \
41 |             --output_path=${1}/sparse
42 |     fi
43 | done
44 | 
45 | colmap image_undistorter \
46 |     --image_path=${1}/images_raw \
47 |     --input_path=${1}/sparse \
48 |     --output_path=${1} \
49 |     --output_type=COLMAP
50 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/run_ffmpeg.sh:
--------------------------------------------------------------------------------
 1 | # -----------------------------------------------------------------------------
 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 5 | # and proprietary rights in and to this software, related documentation
 6 | # and any modifications thereto. Any use, reproduction, disclosure or
 7 | # distribution of this software and related documentation without an express
 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 9 | # -----------------------------------------------------------------------------
10 | 
11 | # usage: run_ffmpeg.sh <sequence_name> <full_video_path> <downsample_rate>
12 | 
13 | data_path=datasets/${1}_ds${3}
14 | image_path=${data_path}/images_raw
15 | mkdir -p ${image_path}
16 | ffmpeg -i ${2} -vf "select=not(mod(n\,$3))" -vsync vfr -q:v 2 ${image_path}/%06d.jpg
17 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/visualize_colmap.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# Set the work directory to the imaginaire root.\n",
 11 |     "import os, sys, time\n",
 12 |     "import pathlib\n",
 13 |     "root_dir = pathlib.Path().absolute().parents[2]\n",
 14 |     "os.chdir(root_dir)\n",
 15 |     "print(f\"Root Directory Path: {root_dir}\")"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62",
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# Import Python libraries.\n",
 26 |     "import numpy as np\n",
 27 |     "import torch\n",
 28 |     "import k3d\n",
 29 |     "import json\n",
 30 |     "import plotly.graph_objs as go\n",
 31 |     "from collections import OrderedDict\n",
 32 |     "# Import imaginaire modules.\n",
 33 |     "from projects.nerf.utils import camera, visualize\n",
 34 |     "from third_party.colmap.scripts.python.read_write_model import read_model"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "id": "76033016-2d92-4a5d-9e50-3978553e8df4",
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# Read the COLMAP data.\n",
 45 |     "colmap_path = \"datasets/lego_ds2\"\n",
 46 |     "cameras, images, points_3D = read_model(path=f\"{colmap_path}/sparse\", ext=\".bin\")\n",
 47 |     "# Convert camera poses.\n",
 48 |     "images = OrderedDict(sorted(images.items()))\n",
 49 |     "qvecs = torch.from_numpy(np.stack([image.qvec for image in images.values()]))\n",
 50 |     "tvecs = torch.from_numpy(np.stack([image.tvec for image in images.values()]))\n",
 51 |     "Rs = camera.quaternion.q_to_R(qvecs)\n",
 52 |     "poses = torch.cat([Rs, tvecs[..., None]], dim=-1)  # [N,3,4]\n",
 53 |     "print(f\"# images: {len(poses)}\")\n",
 54 |     "# Get the sparse 3D points and the colors.\n",
 55 |     "xyzs = torch.from_numpy(np.stack([point.xyz for point in points_3D.values()]))\n",
 56 |     "rgbs = np.stack([point.rgb for point in points_3D.values()])\n",
 57 |     "rgbs_int32 = (rgbs[:, 0] * 2**16 + rgbs[:, 1] * 2**8 + rgbs[:, 2]).astype(np.uint32)\n",
 58 |     "print(f\"# points: {len(xyzs)}\")"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "id": "47862ee1-286c-4877-a181-4b33b7733719",
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "vis_depth = 0.2"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "id": "b6cf60ec-fe6a-43ba-9aaf-e3c7afd88208",
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "# Visualize the bounding sphere.\n",
 79 |     "json_fname = f\"{colmap_path}/transforms.json\"\n",
 80 |     "with open(json_fname) as file:\n",
 81 |     "    meta = json.load(file)\n",
 82 |     "center = meta[\"sphere_center\"]\n",
 83 |     "radius = meta[\"sphere_radius\"]\n",
 84 |     "# ------------------------------------------------------------------------------------\n",
 85 |     "# These variables can be adjusted to make the bounding sphere fit the region of interest.\n",
 86 |     "# The adjusted values can then be set in the config as data.readjust.center and data.readjust.scale\n",
 87 |     "readjust_center = np.array([0., 0., 0.])\n",
 88 |     "readjust_scale = 1.\n",
 89 |     "# ------------------------------------------------------------------------------------\n",
 90 |     "center += readjust_center\n",
 91 |     "radius *= readjust_scale\n",
 92 |     "# Make some points to hallucinate a bounding sphere.\n",
 93 |     "sphere_points = np.random.randn(100000, 3)\n",
 94 |     "sphere_points = sphere_points / np.linalg.norm(sphere_points, axis=-1, keepdims=True)\n",
 95 |     "sphere_points = sphere_points * radius + center"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "id": "e986aed0-1aaf-4772-937c-136db7f2eaec",
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "# You can choose to visualize with Plotly...\n",
106 |     "x, y, z = *xyzs.T,\n",
107 |     "colors = rgbs / 255.0\n",
108 |     "sphere_x, sphere_y, sphere_z = *sphere_points.T,\n",
109 |     "sphere_colors = [\"#4488ff\"] * len(sphere_points)\n",
110 |     "traces_poses = visualize.plotly_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.05)\n",
111 |     "trace_points = go.Scatter3d(x=x, y=y, z=z, mode=\"markers\", marker=dict(size=1, color=colors, opacity=1), hoverinfo=\"skip\")\n",
112 |     "trace_sphere = go.Scatter3d(x=sphere_x, y=sphere_y, z=sphere_z, mode=\"markers\", marker=dict(size=0.5, color=sphere_colors, opacity=0.7), hoverinfo=\"skip\")\n",
113 |     "traces_all = traces_poses + [trace_points, trace_sphere]\n",
114 |     "layout = go.Layout(scene=dict(xaxis=dict(showspikes=False, backgroundcolor=\"rgba(0,0,0,0)\", gridcolor=\"rgba(0,0,0,0.1)\"),\n",
115 |     "                              yaxis=dict(showspikes=False, backgroundcolor=\"rgba(0,0,0,0)\", gridcolor=\"rgba(0,0,0,0.1)\"),\n",
116 |     "                              zaxis=dict(showspikes=False, backgroundcolor=\"rgba(0,0,0,0)\", gridcolor=\"rgba(0,0,0,0.1)\"),\n",
117 |     "                              xaxis_title=\"X\", yaxis_title=\"Y\", zaxis_title=\"Z\", dragmode=\"orbit\",\n",
118 |     "                              aspectratio=dict(x=1, y=1, z=1), aspectmode=\"data\"), height=800)\n",
119 |     "fig = go.Figure(data=traces_all, layout=layout)\n",
120 |     "fig.show()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "id": "fdde170b-4546-4617-9162-a9fcb936347d",
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "# ... or visualize with K3D.\n",
131 |     "plot = k3d.plot(name=\"poses\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n",
132 |     "k3d_objects = visualize.k3d_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.05)\n",
133 |     "for k3d_object in k3d_objects:\n",
134 |     "    plot += k3d_object\n",
135 |     "plot += k3d.points(xyzs, colors=rgbs_int32, point_size=0.02, shader=\"flat\")\n",
136 |     "plot += k3d.points(sphere_points, color=0x4488ff, point_size=0.01, shader=\"flat\")\n",
137 |     "plot.display()\n",
138 |     "plot.camera_fov = 30.0"
139 |    ]
140 |   }
141 |  ],
142 |  "metadata": {
143 |   "kernelspec": {
144 |    "display_name": "Python 3 (ipykernel)",
145 |    "language": "python",
146 |    "name": "python3"
147 |   },
148 |   "language_info": {
149 |    "codemirror_mode": {
150 |     "name": "ipython",
151 |     "version": 3
152 |    },
153 |    "file_extension": ".py",
154 |    "mimetype": "text/x-python",
155 |    "name": "python",
156 |    "nbconvert_exporter": "python",
157 |    "pygments_lexer": "ipython3",
158 |    "version": "3.9.13"
159 |   }
160 |  },
161 |  "nbformat": 4,
162 |  "nbformat_minor": 5
163 | }
164 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/visualize_mesh.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61",
 7 |    "metadata": {},
 8 |    "outputs": [],
 9 |    "source": [
10 |     "# Set the work directory to the imaginaire root.\n",
11 |     "import os, sys, time\n",
12 |     "import pathlib\n",
13 |     "root_dir = pathlib.Path().absolute().parents[2]\n",
14 |     "os.chdir(root_dir)\n",
15 |     "print(f\"Root Directory Path: {root_dir}\")"
16 |    ]
17 |   },
18 |   {
19 |    "cell_type": "code",
20 |    "execution_count": null,
21 |    "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62",
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": [
25 |     "# Import Python libraries.\n",
26 |     "import numpy as np\n",
27 |     "import trimesh\n",
28 |     "import k3d"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "code",
33 |    "execution_count": null,
34 |    "id": "84604c4a-8d95-462a-b7f0-acaa0b9f563d",
35 |    "metadata": {},
36 |    "outputs": [],
37 |    "source": [
38 |     "# Load the 3D mesh.\n",
39 |     "ply_fname = \"logs/0_debug/18/mesh.ply\"\n",
40 |     "mesh = trimesh.load(ply_fname)\n",
41 |     "print(f\"# vertices: {len(mesh.vertices)}\")\n",
42 |     "print(f\"# faces: {len(mesh.faces)}\")"
43 |    ]
44 |   },
45 |   {
46 |    "cell_type": "code",
47 |    "execution_count": null,
48 |    "id": "d1f8df0b-8361-40f3-a801-0cc42b920fed",
49 |    "metadata": {},
50 |    "outputs": [],
51 |    "source": [
52 |     "# Visualize with K3D.\n",
53 |     "plot = k3d.plot(name=\"mesh\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n",
54 |     "C = mesh.visual.vertex_colors\n",
55 |     "colors = C[:,0]*256**2 + C[:,1]*256 + C[:,2]\n",
56 |     "plot += k3d.mesh(mesh.vertices, mesh.faces, colors=colors, side=\"double\", opacity=1)\n",
57 |     "plot.display()\n",
58 |     "plot.camera_fov = 30.0"
59 |    ]
60 |   }
61 |  ],
62 |  "metadata": {
63 |   "kernelspec": {
64 |    "display_name": "Python 3 (ipykernel)",
65 |    "language": "python",
66 |    "name": "python3"
67 |   },
68 |   "language_info": {
69 |    "codemirror_mode": {
70 |     "name": "ipython",
71 |     "version": 3
72 |    },
73 |    "file_extension": ".py",
74 |    "mimetype": "text/x-python",
75 |    "name": "python",
76 |    "nbconvert_exporter": "python",
77 |    "pygments_lexer": "ipython3",
78 |    "version": "3.9.13"
79 |   }
80 |  },
81 |  "nbformat": 4,
82 |  "nbformat_minor": 5
83 | }
84 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/scripts/visualize_transforms.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# Set the work directory to the imaginaire root.\n",
 11 |     "import os, sys, time\n",
 12 |     "import pathlib\n",
 13 |     "root_dir = pathlib.Path().absolute().parents[2]\n",
 14 |     "os.chdir(root_dir)\n",
 15 |     "print(f\"Root Directory Path: {root_dir}\")"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62",
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# Import Python libraries.\n",
 26 |     "import numpy as np\n",
 27 |     "import torch\n",
 28 |     "import k3d\n",
 29 |     "import json\n",
 30 |     "from collections import OrderedDict\n",
 31 |     "# Import imaginaire modules.\n",
 32 |     "from projects.nerf.utils import camera, visualize\n",
 33 |     "from third_party.colmap.scripts.python.read_write_model import read_model"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "id": "97bedecf-da68-44b1-96cf-580ef7e7f3f0",
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# Read the COLMAP data.\n",
 44 |     "colmap_path = \"datasets/lego_ds2\"\n",
 45 |     "json_fname = f\"{colmap_path}/transforms.json\"\n",
 46 |     "with open(json_fname) as file:\n",
 47 |     "    meta = json.load(file)\n",
 48 |     "center = meta[\"sphere_center\"]\n",
 49 |     "radius = meta[\"sphere_radius\"]\n",
 50 |     "# Convert camera poses.\n",
 51 |     "poses = []\n",
 52 |     "for frame in meta[\"frames\"]:\n",
 53 |     "    c2w = torch.tensor(frame[\"transform_matrix\"])\n",
 54 |     "    c2w[:, 1:3] *= -1\n",
 55 |     "    w2c = c2w.inverse()\n",
 56 |     "    pose = w2c[:3]  # [3,4]\n",
 57 |     "    poses.append(pose)\n",
 58 |     "poses = torch.stack(poses, dim=0)\n",
 59 |     "print(f\"# images: {len(poses)}\")"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "id": "2016d20c-1e58-407f-9810-cbe76dc5ccec",
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "vis_depth = 0.2\n",
 70 |     "k3d_textures = []"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "id": "d7168a09-6654-4660-b140-66b9dfd6f1e8",
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# (optional) visualize the images.\n",
 81 |     "# This block can be skipped if we don't want to visualize the image observations.\n",
 82 |     "for i, frame in enumerate(meta[\"frames\"]):\n",
 83 |     "    image_fname = frame[\"file_path\"]\n",
 84 |     "    image_path = f\"{colmap_path}/{image_fname}\"\n",
 85 |     "    with open(image_path, \"rb\") as file:\n",
 86 |     "        binary = file.read()\n",
 87 |     "    # Compute the corresponding image corners in 3D.\n",
 88 |     "    pose = poses[i]\n",
 89 |     "    corners = torch.tensor([[-0.5, 0.5, 1], [0.5, 0.5, 1], [-0.5, -0.5, 1]])\n",
 90 |     "    corners *= vis_depth\n",
 91 |     "    corners = camera.cam2world(corners, pose)\n",
 92 |     "    puv = [corners[0].tolist(), (corners[1]-corners[0]).tolist(), (corners[2]-corners[0]).tolist()]\n",
 93 |     "    k3d_texture = k3d.texture(binary, file_format=\"jpg\", puv=puv)\n",
 94 |     "    k3d_textures.append(k3d_texture)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "id": "b6cf60ec-fe6a-43ba-9aaf-e3c7afd88208",
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "# Visualize the bounding sphere.\n",
105 |     "json_fname = f\"{colmap_path}/transforms.json\"\n",
106 |     "with open(json_fname) as file:\n",
107 |     "    meta = json.load(file)\n",
108 |     "center = meta[\"sphere_center\"]\n",
109 |     "radius = meta[\"sphere_radius\"]\n",
110 |     "# ------------------------------------------------------------------------------------\n",
111 |     "# These variables can be adjusted to make the bounding sphere fit the region of interest.\n",
112 |     "# The adjusted values can then be set in the config as data.readjust.center and data.readjust.scale\n",
113 |     "readjust_center = np.array([0., 0., 0.])\n",
114 |     "readjust_scale = 1.\n",
115 |     "# ------------------------------------------------------------------------------------\n",
116 |     "center += readjust_center\n",
117 |     "radius *= readjust_scale\n",
118 |     "# Make some points to hallucinate a bounding sphere.\n",
119 |     "sphere_points = np.random.randn(100000, 3)\n",
120 |     "sphere_points = sphere_points / np.linalg.norm(sphere_points, axis=-1, keepdims=True)\n",
121 |     "sphere_points = sphere_points * radius + center"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "id": "fdde170b-4546-4617-9162-a9fcb936347d",
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "# Visualize with K3D.\n",
132 |     "plot = k3d.plot(name=\"poses\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n",
133 |     "k3d_objects = visualize.k3d_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.)\n",
134 |     "for k3d_object in k3d_objects:\n",
135 |     "    plot += k3d_object\n",
136 |     "for k3d_texture in k3d_textures:\n",
137 |     "    plot += k3d_texture\n",
138 |     "plot += k3d.points(sphere_points, color=0x4488ff, point_size=0.01, shader=\"flat\")\n",
139 |     "plot.display()\n",
140 |     "plot.camera_fov = 30.0"
141 |    ]
142 |   }
143 |  ],
144 |  "metadata": {
145 |   "kernelspec": {
146 |    "display_name": "Python 3 (ipykernel)",
147 |    "language": "python",
148 |    "name": "python3"
149 |   },
150 |   "language_info": {
151 |    "codemirror_mode": {
152 |     "name": "ipython",
153 |     "version": 3
154 |    },
155 |    "file_extension": ".py",
156 |    "mimetype": "text/x-python",
157 |    "name": "python",
158 |    "nbconvert_exporter": "python",
159 |    "pygments_lexer": "ipython3",
160 |    "version": "3.9.13"
161 |   }
162 |  },
163 |  "nbformat": 4,
164 |  "nbformat_minor": 5
165 | }
166 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/trainer.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import torch
 14 | import torch.nn.functional as torch_F
 15 | import wandb
 16 | 
 17 | from imaginaire.utils.distributed import master_only
 18 | from imaginaire.utils.visualization import wandb_image
 19 | from projects.nerf.trainers.base import BaseTrainer
 20 | from projects.neuralangelo.utils.misc import get_scheduler, eikonal_loss, curvature_loss
 21 | 
 22 | 
 23 | class Trainer(BaseTrainer):
 24 | 
 25 |     def __init__(self, cfg, is_inference=True, seed=0):
 26 |         super().__init__(cfg, is_inference=is_inference, seed=seed)
 27 |         self.metrics = dict()
 28 |         self.warm_up_end = cfg.optim.sched.warm_up_end
 29 |         self.cfg_gradient = cfg.model.object.sdf.gradient
 30 |         if cfg.model.object.sdf.encoding.type == "hashgrid" and cfg.model.object.sdf.encoding.coarse2fine.enabled:
 31 |             self.c2f_step = cfg.model.object.sdf.encoding.coarse2fine.step
 32 |             self.model.module.neural_sdf.warm_up_end = self.warm_up_end
 33 | 
 34 |     def _init_loss(self, cfg):
 35 |         self.criteria["render"] = torch.nn.L1Loss()
 36 | 
 37 |     def setup_scheduler(self, cfg, optim):
 38 |         return get_scheduler(cfg.optim, optim)
 39 | 
 40 |     def _compute_loss(self, data, mode=None):
 41 |         if mode == "train":
 42 |             # Compute loss only on randomly sampled rays.
 43 |             self.losses["render"] = self.criteria["render"](data["rgb"], data["image_sampled"]) * 3  # FIXME:sumRGB?!
 44 |             self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb"], data["image_sampled"]).log10()
 45 |             if "eikonal" in self.weights.keys():
 46 |                 self.losses["eikonal"] = eikonal_loss(data["gradients"], outside=data["outside"])
 47 |             if "curvature" in self.weights:
 48 |                 self.losses["curvature"] = curvature_loss(data["hessians"], outside=data["outside"])
 49 |         else:
 50 |             # Compute loss on the entire image.
 51 |             self.losses["render"] = self.criteria["render"](data["rgb_map"], data["image"])
 52 |             self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb_map"], data["image"]).log10()
 53 | 
 54 |     def get_curvature_weight(self, current_iteration, init_weight):
 55 |         if "curvature" in self.weights:
 56 |             if current_iteration <= self.warm_up_end:
 57 |                 self.weights["curvature"] = current_iteration / self.warm_up_end * init_weight
 58 |             else:
 59 |                 model = self.model_module
 60 |                 decay_factor = model.neural_sdf.growth_rate ** (model.neural_sdf.anneal_levels - 1)
 61 |                 self.weights["curvature"] = init_weight / decay_factor
 62 | 
 63 |     def _start_of_iteration(self, data, current_iteration):
 64 |         model = self.model_module
 65 |         self.progress = model.progress = current_iteration / self.cfg.max_iter
 66 |         if self.cfg.model.object.sdf.encoding.coarse2fine.enabled:
 67 |             model.neural_sdf.set_active_levels(current_iteration)
 68 |             if self.cfg_gradient.mode == "numerical":
 69 |                 model.neural_sdf.set_normal_epsilon()
 70 |                 self.get_curvature_weight(current_iteration, self.cfg.trainer.loss_weight.curvature)
 71 |         elif self.cfg_gradient.mode == "numerical":
 72 |             model.neural_sdf.set_normal_epsilon()
 73 | 
 74 |         return super()._start_of_iteration(data, current_iteration)
 75 | 
 76 |     @master_only
 77 |     def log_wandb_scalars(self, data, mode=None):
 78 |         super().log_wandb_scalars(data, mode=mode)
 79 |         scalars = {
 80 |             f"{mode}/PSNR": self.metrics["psnr"].detach(),
 81 |             f"{mode}/s-var": self.model_module.s_var.item(),
 82 |         }
 83 |         if "curvature" in self.weights:
 84 |             scalars[f"{mode}/curvature_weight"] = self.weights["curvature"]
 85 |         if "eikonal" in self.weights:
 86 |             scalars[f"{mode}/eikonal_weight"] = self.weights["eikonal"]
 87 |         if mode == "train" and self.cfg_gradient.mode == "numerical":
 88 |             scalars[f"{mode}/epsilon"] = self.model.module.neural_sdf.normal_eps
 89 |         if self.cfg.model.object.sdf.encoding.coarse2fine.enabled:
 90 |             scalars[f"{mode}/active_levels"] = self.model.module.neural_sdf.active_levels
 91 |         wandb.log(scalars, step=self.current_iteration)
 92 | 
 93 |     @master_only
 94 |     def log_wandb_images(self, data, mode=None, max_samples=None):
 95 |         images = {"iteration": self.current_iteration, "epoch": self.current_epoch}
 96 |         if mode == "val":
 97 |             images_error = (data["rgb_map"] - data["image"]).abs()
 98 |             images.update({
 99 |                 f"{mode}/vis/rgb_target": wandb_image(data["image"]),
100 |                 f"{mode}/vis/rgb_render": wandb_image(data["rgb_map"]),
101 |                 f"{mode}/vis/rgb_error": wandb_image(images_error),
102 |                 f"{mode}/vis/normal": wandb_image(data["normal_map"], from_range=(-1, 1)),
103 |                 f"{mode}/vis/inv_depth": wandb_image(1 / (data["depth_map"] + 1e-8) * self.cfg.trainer.depth_vis_scale),
104 |                 f"{mode}/vis/opacity": wandb_image(data["opacity_map"]),
105 |             })
106 |         wandb.log(images, step=self.current_iteration)
107 | 
108 |     def train(self, cfg, data_loader, single_gpu=False, profile=False, show_pbar=False):
109 |         self.progress = self.model_module.progress = self.current_iteration / self.cfg.max_iter
110 |         super().train(cfg, data_loader, single_gpu, profile, show_pbar)
111 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/utils/mesh.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import numpy as np
 14 | import trimesh
 15 | import mcubes
 16 | import torch
 17 | import torch.distributed as dist
 18 | import torch.nn.functional as torch_F
 19 | from tqdm import tqdm
 20 | 
 21 | from imaginaire.utils.distributed import get_world_size, is_master
 22 | 
 23 | 
 24 | @torch.no_grad()
 25 | def extract_mesh(sdf_func, bounds, intv, block_res=64, texture_func=None, filter_lcc=False):
 26 |     lattice_grid = LatticeGrid(bounds, intv=intv, block_res=block_res)
 27 |     data_loader = get_lattice_grid_loader(lattice_grid)
 28 |     mesh_blocks = []
 29 |     if is_master():
 30 |         data_loader = tqdm(data_loader, leave=False)
 31 |     for it, data in enumerate(data_loader):
 32 |         xyz = data["xyz"][0]
 33 |         xyz_cuda = xyz.cuda()
 34 |         sdf_cuda = sdf_func(xyz_cuda)[..., 0]
 35 |         sdf = sdf_cuda.cpu()
 36 |         mesh = marching_cubes(sdf.numpy(), xyz.numpy(), intv, texture_func, filter_lcc)
 37 |         mesh_blocks.append(mesh)
 38 |     mesh_blocks_gather = [None] * get_world_size()
 39 |     if dist.is_initialized():
 40 |         dist.all_gather_object(mesh_blocks_gather, mesh_blocks)
 41 |     else:
 42 |         mesh_blocks_gather = [mesh_blocks]
 43 |     if is_master():
 44 |         mesh_blocks_all = [mesh for mesh_blocks in mesh_blocks_gather for mesh in mesh_blocks
 45 |                            if mesh.vertices.shape[0] > 0]
 46 |         mesh = trimesh.util.concatenate(mesh_blocks_all)
 47 |         return mesh
 48 |     else:
 49 |         return None
 50 | 
 51 | 
 52 | @torch.no_grad()
 53 | def extract_texture(xyz, neural_rgb, neural_sdf, appear_embed):
 54 |     num_samples, _ = xyz.shape
 55 |     xyz_cuda = torch.from_numpy(xyz).float().cuda()[None, None]  # [N,3] -> [1,1,N,3]
 56 |     sdfs, feats = neural_sdf(xyz_cuda)
 57 |     gradients, _ = neural_sdf.compute_gradients(xyz_cuda, training=False, sdf=sdfs)
 58 |     normals = torch_F.normalize(gradients, dim=-1)
 59 |     if appear_embed is not None:
 60 |         feat_dim = appear_embed.embedding_dim  # [1,1,N,C]
 61 |         app = torch.zeros([1, 1, num_samples, feat_dim], device=sdfs.device)  # TODO: hard-coded to zero. better way?
 62 |     else:
 63 |         app = None
 64 |     rgbs = neural_rgb.forward(xyz_cuda, normals, -normals, feats, app=app)  # [1,1,N,3]
 65 |     return (rgbs.squeeze().cpu().numpy() * 255).astype(np.uint8)
 66 | 
 67 | 
 68 | class LatticeGrid(torch.utils.data.Dataset):
 69 | 
 70 |     def __init__(self, bounds, intv, block_res=64):
 71 |         super().__init__()
 72 |         self.block_res = block_res
 73 |         ((x_min, x_max), (y_min, y_max), (z_min, z_max)) = bounds
 74 |         self.x_grid = torch.arange(x_min, x_max, intv)
 75 |         self.y_grid = torch.arange(y_min, y_max, intv)
 76 |         self.z_grid = torch.arange(z_min, z_max, intv)
 77 |         res_x, res_y, res_z = len(self.x_grid), len(self.y_grid), len(self.z_grid)
 78 |         print("Extracting surface at resolution", res_x, res_y, res_z)
 79 |         self.num_blocks_x = int(np.ceil(res_x / block_res))
 80 |         self.num_blocks_y = int(np.ceil(res_y / block_res))
 81 |         self.num_blocks_z = int(np.ceil(res_z / block_res))
 82 | 
 83 |     def __getitem__(self, idx):
 84 |         # Keep track of sample index for convenience.
 85 |         sample = dict(idx=idx)
 86 |         block_idx_x = idx // (self.num_blocks_y * self.num_blocks_z)
 87 |         block_idx_y = (idx // self.num_blocks_z) % self.num_blocks_y
 88 |         block_idx_z = idx % self.num_blocks_z
 89 |         xi = block_idx_x * self.block_res
 90 |         yi = block_idx_y * self.block_res
 91 |         zi = block_idx_z * self.block_res
 92 |         x, y, z = torch.meshgrid(self.x_grid[xi:xi+self.block_res+1],
 93 |                                  self.y_grid[yi:yi+self.block_res+1],
 94 |                                  self.z_grid[zi:zi+self.block_res+1], indexing="ij")
 95 |         xyz = torch.stack([x, y, z], dim=-1)
 96 |         sample.update(xyz=xyz)
 97 |         return sample
 98 | 
 99 |     def __len__(self):
100 |         return self.num_blocks_x * self.num_blocks_y * self.num_blocks_z
101 | 
102 | 
103 | def get_lattice_grid_loader(dataset, num_workers=8):
104 |     if dist.is_initialized():
105 |         sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=False)
106 |     else:
107 |         sampler = None
108 |     return torch.utils.data.DataLoader(
109 |         dataset,
110 |         batch_size=1,
111 |         shuffle=False,
112 |         sampler=sampler,
113 |         pin_memory=True,
114 |         num_workers=num_workers,
115 |         drop_last=False
116 |     )
117 | 
118 | 
119 | def marching_cubes(sdf, xyz, intv, texture_func, filter_lcc):
120 |     # marching cubes
121 |     V, F = mcubes.marching_cubes(sdf, 0.)
122 |     if V.shape[0] > 0:
123 |         V = V * intv + xyz[0, 0, 0]
124 |         if texture_func is not None:
125 |             C = texture_func(V)
126 |             mesh = trimesh.Trimesh(V, F, vertex_colors=C)
127 |         else:
128 |             mesh = trimesh.Trimesh(V, F)
129 |         mesh = filter_points_outside_bounding_sphere(mesh)
130 |         mesh = filter_largest_cc(mesh) if filter_lcc else mesh
131 |     else:
132 |         mesh = trimesh.Trimesh()
133 |     return mesh
134 | 
135 | 
136 | def filter_points_outside_bounding_sphere(old_mesh):
137 |     mask = np.linalg.norm(old_mesh.vertices, axis=-1) < 1.0
138 |     if np.any(mask):
139 |         indices = np.ones(len(old_mesh.vertices), dtype=int) * -1
140 |         indices[mask] = np.arange(mask.sum())
141 |         faces_mask = mask[old_mesh.faces[:, 0]] & mask[old_mesh.faces[:, 1]] & mask[old_mesh.faces[:, 2]]
142 |         new_faces = indices[old_mesh.faces[faces_mask]]
143 |         new_vertices = old_mesh.vertices[mask]
144 |         new_colors = old_mesh.visual.vertex_colors[mask]
145 |         new_mesh = trimesh.Trimesh(new_vertices, new_faces, vertex_colors=new_colors)
146 |     else:
147 |         new_mesh = trimesh.Trimesh()
148 |     return new_mesh
149 | 
150 | 
151 | def filter_largest_cc(mesh):
152 |     components = mesh.split(only_watertight=False)
153 |     areas = np.array([c.area for c in components], dtype=float)
154 |     if len(areas) > 0 and mesh.vertices.shape[0] > 0:
155 |         new_mesh = components[areas.argmax()]
156 |     else:
157 |         new_mesh = trimesh.Trimesh()
158 |     return new_mesh
159 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/utils/misc.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | from functools import partial
 14 | import numpy as np
 15 | import torch
 16 | import torch.nn.functional as torch_F
 17 | import imaginaire.trainers.utils
 18 | from torch.optim import lr_scheduler
 19 | 
 20 | flip_mat = np.array([
 21 |     [1, 0, 0, 0],
 22 |     [0, -1, 0, 0],
 23 |     [0, 0, -1, 0],
 24 |     [0, 0, 0, 1]
 25 | ])
 26 | 
 27 | 
 28 | def get_scheduler(cfg_opt, opt):
 29 |     """Return the scheduler object.
 30 | 
 31 |     Args:
 32 |         cfg_opt (obj): Config for the specific optimization module (gen/dis).
 33 |         opt (obj): PyTorch optimizer object.
 34 | 
 35 |     Returns:
 36 |         (obj): Scheduler
 37 |     """
 38 |     if cfg_opt.sched.type == 'two_steps_with_warmup':
 39 |         warm_up_end = cfg_opt.sched.warm_up_end
 40 |         two_steps = cfg_opt.sched.two_steps
 41 |         gamma = cfg_opt.sched.gamma
 42 | 
 43 |         def sch(x):
 44 |             if x < warm_up_end:
 45 |                 return x / warm_up_end
 46 |             else:
 47 |                 if x > two_steps[1]:
 48 |                     return 1.0 / gamma ** 2
 49 |                 elif x > two_steps[0]:
 50 |                     return 1.0 / gamma
 51 |                 else:
 52 |                     return 1.0
 53 | 
 54 |         scheduler = lr_scheduler.LambdaLR(opt, lambda x: sch(x))
 55 |     elif cfg_opt.sched.type == 'cos_with_warmup':
 56 |         alpha = cfg_opt.sched.alpha
 57 |         max_iter = cfg_opt.sched.max_iter
 58 |         warm_up_end = cfg_opt.sched.warm_up_end
 59 | 
 60 |         def sch(x):
 61 |             if x < warm_up_end:
 62 |                 return x / warm_up_end
 63 |             else:
 64 |                 progress = (x - warm_up_end) / (max_iter - warm_up_end)
 65 |                 learning_factor = (np.cos(np.pi * progress) + 1.0) * 0.5 * (1 - alpha) + alpha
 66 |                 return learning_factor
 67 | 
 68 |         scheduler = lr_scheduler.LambdaLR(opt, lambda x: sch(x))
 69 |     else:
 70 |         return imaginaire.trainers.utils.get_scheduler()
 71 |     return scheduler
 72 | 
 73 | 
 74 | def eikonal_loss(gradients, outside=None):
 75 |     gradient_error = (gradients.norm(dim=-1) - 1.0) ** 2  # [B,R,N]
 76 |     gradient_error = gradient_error.nan_to_num(nan=0.0, posinf=0.0, neginf=0.0)  # [B,R,N]
 77 |     if outside is not None:
 78 |         return (gradient_error * (~outside).float()).mean()
 79 |     else:
 80 |         return gradient_error.mean()
 81 | 
 82 | 
 83 | def curvature_loss(hessian, outside=None):
 84 |     laplacian = hessian.sum(dim=-1).abs()  # [B,R,N]
 85 |     laplacian = laplacian.nan_to_num(nan=0.0, posinf=0.0, neginf=0.0)  # [B,R,N]
 86 |     if outside is not None:
 87 |         return (laplacian * (~outside).float()).mean()
 88 |     else:
 89 |         return laplacian.mean()
 90 | 
 91 | 
 92 | def get_activation(activ, **kwargs):
 93 |     func = dict(
 94 |         identity=lambda x: x,
 95 |         relu=torch_F.relu,
 96 |         relu_=torch_F.relu_,
 97 |         abs=torch.abs,
 98 |         abs_=torch.abs_,
 99 |         sigmoid=torch.sigmoid,
100 |         sigmoid_=torch.sigmoid_,
101 |         exp=torch.exp,
102 |         exp_=torch.exp_,
103 |         softplus=torch_F.softplus,
104 |         silu=torch_F.silu,
105 |         silu_=partial(torch_F.silu, inplace=True),
106 |     )[activ]
107 |     return partial(func, **kwargs)
108 | 
109 | 
110 | def to_full_image(image, image_size=None, from_vec=True):
111 |     # if from_vec is True: [B,HW,...,K] --> [B,K,H,W,...]
112 |     # if from_vec is False: [B,H,W,...,K] --> [B,K,H,W,...]
113 |     if from_vec:
114 |         assert image_size is not None
115 |         image = image.unflatten(dim=1, sizes=image_size)
116 |     image = image.moveaxis(-1, 1)
117 |     return image
118 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/utils/mlp.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import numpy as np
14 | import torch
15 | import torch.nn.functional as torch_F
16 | 
17 | 
18 | class MLPforNeuralSDF(torch.nn.Module):
19 | 
20 |     def __init__(self, layer_dims, skip_connection=[], activ=None, use_layernorm=False, use_weightnorm=False,
21 |                  geometric_init=False, out_bias=0., invert=False):
22 |         """Initialize a multi-layer perceptron with skip connection.
23 |         Args:
24 |             layer_dims: A list of integers representing the number of channels in each layer.
25 |             skip_connection: A list of integers representing the index of layers to add skip connection.
26 |         """
27 |         super().__init__()
28 |         self.skip_connection = skip_connection
29 |         self.use_layernorm = use_layernorm
30 |         self.linears = torch.nn.ModuleList()
31 |         if use_layernorm:
32 |             self.layer_norm = torch.nn.ModuleList()
33 |         # Hidden layers
34 |         layer_dim_pairs = list(zip(layer_dims[:-1], layer_dims[1:]))
35 |         for li, (k_in, k_out) in enumerate(layer_dim_pairs):
36 |             if li in self.skip_connection:
37 |                 k_in += layer_dims[0]
38 |             linear = torch.nn.Linear(k_in, k_out)
39 |             if geometric_init:
40 |                 self._geometric_init(linear, k_in, k_out, first=(li == 0),
41 |                                      skip_dim=(layer_dims[0] if li in self.skip_connection else 0))
42 |             if use_weightnorm:
43 |                 linear = torch.nn.utils.weight_norm(linear)
44 |             self.linears.append(linear)
45 |             if use_layernorm and li != len(layer_dim_pairs) - 1:
46 |                 self.layer_norm.append(torch.nn.LayerNorm(k_out))
47 |             if li == len(layer_dim_pairs) - 1:
48 |                 self.linears[-1].bias.data.fill_(0.0)
49 |         # SDF prediction layer
50 |         self.linear_sdf = torch.nn.Linear(k_in, 1)
51 |         if geometric_init:
52 |             self._geometric_init_sdf(self.linear_sdf, k_in, out_bias=out_bias, invert=invert)
53 |         self.activ = activ or torch_F.relu_
54 | 
55 |     def forward(self, input, with_sdf=True, with_feat=True):
56 |         feat = input
57 |         for li, linear in enumerate(self.linears):
58 |             if li in self.skip_connection:
59 |                 feat = torch.cat([feat, input], dim=-1)
60 |             if li != len(self.linears) - 1 or with_feat:
61 |                 feat_pre = linear(feat)
62 |                 if self.use_layernorm:
63 |                     feat_pre = self.layer_norm[li](feat_pre)
64 |                 feat_activ = self.activ(feat_pre)
65 |             if li == len(self.linears) - 1:
66 |                 out = [self.linear_sdf(feat) if with_sdf else None,
67 |                        feat_activ if with_feat else None]
68 |             feat = feat_activ
69 |         return out
70 | 
71 |     def _geometric_init(self, linear, k_in, k_out, first=False, skip_dim=0):
72 |         torch.nn.init.constant_(linear.bias, 0.0)
73 |         torch.nn.init.normal_(linear.weight, 0.0, np.sqrt(2 / k_out))
74 |         if first:
75 |             torch.nn.init.constant_(linear.weight[:, 3:], 0.0)  # positional encodings
76 |         if skip_dim:
77 |             torch.nn.init.constant_(linear.weight[:, -skip_dim:], 0.0)  # skip connections
78 | 
79 |     def _geometric_init_sdf(self, linear, k_in, out_bias=0., invert=False):
80 |         torch.nn.init.normal_(linear.weight, mean=np.sqrt(np.pi / k_in), std=0.0001)
81 |         torch.nn.init.constant_(linear.bias, -out_bias)
82 |         if invert:
83 |             linear.weight.data *= -1
84 |             linear.bias.data *= -1
85 | 


--------------------------------------------------------------------------------
/projects/neuralangelo/utils/spherical_harmonics.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | -----------------------------------------------------------------------------
 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | NVIDIA CORPORATION and its licensors retain all intellectual property
 6 | and proprietary rights in and to this software, related documentation
 7 | and any modifications thereto. Any use, reproduction, disclosure or
 8 | distribution of this software and related documentation without an express
 9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
10 | -----------------------------------------------------------------------------
11 | '''
12 | 
13 | import torch
14 | 
15 | 
16 | SH_C0 = 0.28209479177387814
17 | SH_C1 = 0.4886025119029199
18 | SH_C2 = [
19 |     1.0925484305920792,
20 |     -1.0925484305920792,
21 |     0.31539156525252005,
22 |     -1.0925484305920792,
23 |     0.5462742152960396
24 | ]
25 | SH_C3 = [
26 |     -0.5900435899266435,
27 |     2.890611442640554,
28 |     -0.4570457994644658,
29 |     0.3731763325901154,
30 |     -0.4570457994644658,
31 |     1.445305721320277,
32 |     -0.5900435899266435
33 | ]
34 | SH_C4 = [
35 |     2.5033429417967046,
36 |     -1.7701307697799304,
37 |     0.9461746957575601,
38 |     -0.6690465435572892,
39 |     0.10578554691520431,
40 |     -0.6690465435572892,
41 |     0.47308734787878004,
42 |     -1.7701307697799304,
43 |     0.6258357354491761,
44 | ]
45 | 
46 | 
47 | def get_spherical_harmonics(dirs, levels):
48 |     # Evaluate spherical harmonics bases at unit directions, without taking linear combination.
49 |     vals = torch.empty((*dirs.shape[:-1], (levels + 1) ** 2), device=dirs.device)
50 |     vals[..., 0] = SH_C0
51 |     if levels >= 1:
52 |         x, y, z = dirs.unbind(-1)
53 |         vals[..., 1] = -SH_C1 * y
54 |         vals[..., 2] = SH_C1 * z
55 |         vals[..., 3] = -SH_C1 * x
56 |     if levels >= 2:
57 |         xx, yy, zz = x * x, y * y, z * z
58 |         xy, yz, xz = x * y, y * z, x * z
59 |         vals[..., 4] = SH_C2[0] * xy
60 |         vals[..., 5] = SH_C2[1] * yz
61 |         vals[..., 6] = SH_C2[2] * (2.0 * zz - xx - yy)
62 |         vals[..., 7] = SH_C2[3] * xz
63 |         vals[..., 8] = SH_C2[4] * (xx - yy)
64 |     if levels >= 3:
65 |         vals[..., 9] = SH_C3[0] * y * (3 * xx - yy)
66 |         vals[..., 10] = SH_C3[1] * xy * z
67 |         vals[..., 11] = SH_C3[2] * y * (4 * zz - xx - yy)
68 |         vals[..., 12] = SH_C3[3] * z * (2 * zz - 3 * xx - 3 * yy)
69 |         vals[..., 13] = SH_C3[4] * x * (4 * zz - xx - yy)
70 |         vals[..., 14] = SH_C3[5] * z * (xx - yy)
71 |         vals[..., 15] = SH_C3[6] * x * (xx - 3 * yy)
72 |     if levels >= 4:
73 |         vals[..., 16] = SH_C4[0] * xy * (xx - yy)
74 |         vals[..., 17] = SH_C4[1] * yz * (3 * xx - yy)
75 |         vals[..., 18] = SH_C4[2] * xy * (7 * zz - 1)
76 |         vals[..., 19] = SH_C4[3] * yz * (7 * zz - 3)
77 |         vals[..., 20] = SH_C4[4] * (zz * (35 * zz - 30) + 3)
78 |         vals[..., 21] = SH_C4[5] * xz * (7 * zz - 3)
79 |         vals[..., 22] = SH_C4[6] * (xx - yy) * (7 * zz - 1)
80 |         vals[..., 23] = SH_C4[7] * xz * (xx - 3 * yy)
81 |         vals[..., 24] = SH_C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy))
82 |     if levels >= 5:
83 |         raise NotImplementedError
84 |     return vals
85 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | addict
 2 | gdown
 3 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
 4 | gpustat
 5 | icecream
 6 | imageio-ffmpeg
 7 | imutils
 8 | ipdb
 9 | k3d
10 | kornia
11 | lpips
12 | matplotlib
13 | mediapy
14 | nvidia-ml-py3
15 | open3d
16 | opencv-python-headless
17 | OpenEXR
18 | pathlib
19 | pillow
20 | plotly
21 | pyequilib
22 | pyexr
23 | PyMCubes
24 | pyquaternion
25 | pyyaml
26 | requests
27 | scikit-image
28 | scikit-video
29 | scipy
30 | seaborn
31 | tensorboard
32 | termcolor
33 | tqdm
34 | trimesh
35 | wandb
36 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | -----------------------------------------------------------------------------
  3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
  4 | 
  5 | NVIDIA CORPORATION and its licensors retain all intellectual property
  6 | and proprietary rights in and to this software, related documentation
  7 | and any modifications thereto. Any use, reproduction, disclosure or
  8 | distribution of this software and related documentation without an express
  9 | license agreement from NVIDIA CORPORATION is strictly prohibited.
 10 | -----------------------------------------------------------------------------
 11 | '''
 12 | 
 13 | import argparse
 14 | import os
 15 | 
 16 | import imaginaire.config
 17 | from imaginaire.config import Config, recursive_update_strict, parse_cmdline_arguments
 18 | from imaginaire.utils.cudnn import init_cudnn
 19 | from imaginaire.utils.distributed import init_dist, get_world_size, master_only_print as print, is_master
 20 | from imaginaire.utils.gpu_affinity import set_affinity
 21 | from imaginaire.trainers.utils.logging import init_logging
 22 | from imaginaire.trainers.utils.get_trainer import get_trainer
 23 | from imaginaire.utils.set_random_seed import set_random_seed
 24 | 
 25 | 
 26 | def parse_args():
 27 |     parser = argparse.ArgumentParser(description='Training')
 28 |     parser.add_argument('--config', help='Path to the training config file.', required=True)
 29 |     parser.add_argument('--logdir', help='Dir for saving logs and models.', default=None)
 30 |     parser.add_argument('--checkpoint', default=None, help='Checkpoint path.')
 31 |     parser.add_argument('--seed', type=int, default=0, help='Random seed.')
 32 |     parser.add_argument('--local_rank', type=int, default=os.getenv('LOCAL_RANK', 0))
 33 |     parser.add_argument('--single_gpu', action='store_true')
 34 |     parser.add_argument('--debug', action='store_true')
 35 |     parser.add_argument('--profile', action='store_true')
 36 |     parser.add_argument('--show_pbar', action='store_true')
 37 |     parser.add_argument('--wandb', action='store_true', help="Enable using Weights & Biases as the logger")
 38 |     parser.add_argument('--wandb_name', default='default', type=str)
 39 |     parser.add_argument('--resume', action='store_true')
 40 |     args, cfg_cmd = parser.parse_known_args()
 41 |     return args, cfg_cmd
 42 | 
 43 | 
 44 | def main():
 45 |     args, cfg_cmd = parse_args()
 46 |     set_affinity(args.local_rank)
 47 |     cfg = Config(args.config)
 48 | 
 49 |     cfg_cmd = parse_cmdline_arguments(cfg_cmd)
 50 |     recursive_update_strict(cfg, cfg_cmd)
 51 | 
 52 |     # If args.single_gpu is set to True, we will disable distributed data parallel.
 53 |     if not args.single_gpu:
 54 |         # this disables nccl timeout
 55 |         os.environ["NCLL_BLOCKING_WAIT"] = "0"
 56 |         os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "0"
 57 |         cfg.local_rank = args.local_rank
 58 |         init_dist(cfg.local_rank, rank=-1, world_size=-1)
 59 |     print(f"Training with {get_world_size()} GPUs.")
 60 | 
 61 |     # set random seed by rank
 62 |     set_random_seed(args.seed, by_rank=True)
 63 | 
 64 |     # Global arguments.
 65 |     imaginaire.config.DEBUG = args.debug
 66 | 
 67 |     # Create log directory for storing training results.
 68 |     cfg.logdir = init_logging(args.config, args.logdir, makedir=True)
 69 | 
 70 |     # Print and save final config
 71 |     if is_master():
 72 |         cfg.print_config()
 73 |         cfg.save_config(cfg.logdir)
 74 | 
 75 |     # Initialize cudnn.
 76 |     init_cudnn(cfg.cudnn.deterministic, cfg.cudnn.benchmark)
 77 | 
 78 |     # Initialize data loaders and models.
 79 |     trainer = get_trainer(cfg, is_inference=False, seed=args.seed)
 80 |     trainer.set_data_loader(cfg, split="train")
 81 |     trainer.set_data_loader(cfg, split="val")
 82 |     trainer.checkpointer.load(args.checkpoint, args.resume, load_sch=True, load_opt=True)
 83 | 
 84 |     # Initialize Wandb.
 85 |     trainer.init_wandb(cfg,
 86 |                        project=args.wandb_name,
 87 |                        mode="disabled" if args.debug or not args.wandb else "online",
 88 |                        resume=args.resume,
 89 |                        use_group=True)
 90 | 
 91 |     trainer.mode = 'train'
 92 |     # Start training.
 93 |     trainer.train(cfg,
 94 |                   trainer.train_data_loader,
 95 |                   single_gpu=args.single_gpu,
 96 |                   profile=args.profile,
 97 |                   show_pbar=args.show_pbar)
 98 | 
 99 |     # Finalize training.
100 |     trainer.finalize(cfg)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     main()
105 | 


--------------------------------------------------------------------------------