├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── DATA_PROCESSING.md ├── LICENSE.md ├── README.md ├── assets └── teaser.gif ├── docker ├── Dockerfile-colmap └── Dockerfile-neuralangelo ├── imaginaire ├── config.py ├── config_base.yaml ├── datasets │ ├── base.py │ └── utils │ │ ├── dataloader.py │ │ ├── get_dataloader.py │ │ └── sampler.py ├── models │ ├── base.py │ └── utils │ │ ├── init_weight.py │ │ └── model_average.py ├── trainers │ ├── base.py │ └── utils │ │ ├── get_trainer.py │ │ ├── logging.py │ │ └── meters.py └── utils │ ├── cudnn.py │ ├── distributed.py │ ├── gpu_affinity.py │ ├── misc.py │ ├── set_random_seed.py │ ├── termcolor.py │ └── visualization.py ├── neuralangelo.yaml ├── projects ├── nerf │ ├── configs │ │ ├── ingp_blender.yaml │ │ ├── nerf_blender.yaml │ │ └── nerf_llff.yaml │ ├── datasets │ │ ├── base.py │ │ ├── nerf_blender.py │ │ └── nerf_llff.py │ ├── models │ │ ├── ingp.py │ │ └── nerf.py │ ├── trainers │ │ ├── base.py │ │ └── nerf.py │ └── utils │ │ ├── camera.py │ │ ├── misc.py │ │ ├── nerf_util.py │ │ ├── render.py │ │ └── visualize.py └── neuralangelo │ ├── configs │ ├── base.yaml │ ├── custom │ │ └── template.yaml │ ├── dtu.yaml │ └── tnt.yaml │ ├── data.py │ ├── model.py │ ├── scripts │ ├── convert_data_to_json.py │ ├── convert_dtu_to_json.py │ ├── convert_tnt_to_json.py │ ├── extract_mesh.py │ ├── generate_config.py │ ├── preprocess.sh │ ├── preprocess_dtu.sh │ ├── preprocess_tnt.sh │ ├── run_colmap.sh │ ├── run_ffmpeg.sh │ ├── visualize_colmap.ipynb │ ├── visualize_mesh.ipynb │ └── visualize_transforms.ipynb │ ├── trainer.py │ └── utils │ ├── mesh.py │ ├── misc.py │ ├── mlp.py │ ├── modules.py │ └── spherical_harmonics.py ├── requirements.txt └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | checkpoints 2 | 3 | # Other uncheckable file types 4 | *.zip 5 | *.exe 6 | *.dll 7 | *.swp 8 | *.vscode 9 | *.ipynb 10 | *.DS_Store 11 | *.pyc 12 | 13 | # Credential information that should never be checked in 14 | *.secret 15 | 16 | # Data types 17 | *.png 18 | *.hdr 19 | *.jpg 20 | *.jpeg 21 | *.pgm 22 | *.tiff 23 | *.tif 24 | *.mp4 25 | *.MOV 26 | *.tar 27 | *.tar.gz 28 | *.pkl 29 | *.pt 30 | *.bin 31 | *.ply 32 | 33 | # log folder 34 | logs/ 35 | 36 | # dataset folder 37 | datasets/ 38 | /datasets/ 39 | 40 | # config folder 41 | !projects/neuralangelo/configs/custom/template.yaml 42 | projects/neuralangelo/configs/custom 43 | 44 | # ------------------------ BELOW IS AUTO-GENERATED FOR PYTHON REPOS ------------------------ 45 | 46 | # Byte-compiled / optimized / DLL files 47 | __pycache__/ 48 | *.py[cod] 49 | *$py.class 50 | 51 | # C extensions 52 | *.so 53 | 54 | # Distribution / packaging 55 | .Python 56 | build/ 57 | develop-eggs/ 58 | dist/ 59 | downloads/ 60 | eggs/ 61 | .eggs/ 62 | lib/ 63 | lib64/ 64 | parts/ 65 | sdist/ 66 | var/ 67 | wheels/ 68 | share/python-wheels/ 69 | *.egg-info/ 70 | .installed.cfg 71 | *.egg 72 | MANIFEST 73 | 74 | # PyInstaller 75 | # Usually these files are written by a python script from a template 76 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 77 | *.manifest 78 | *.spec 79 | 80 | # Installer logs 81 | pip-log.txt 82 | pip-delete-this-directory.txt 83 | 84 | # Unit test / coverage reports 85 | htmlcov/ 86 | .tox/ 87 | .nox/ 88 | .coverage 89 | .coverage.* 90 | .cache 91 | nosetests.xml 92 | coverage.xml 93 | *.cover 94 | *.py,cover 95 | .hypothesis/ 96 | .pytest_cache/ 97 | cover/ 98 | 99 | # Translations 100 | *.mo 101 | *.pot 102 | 103 | # Django stuff: 104 | *.log 105 | local_settings.py 106 | db.sqlite3 107 | db.sqlite3-journal 108 | 109 | # Flask stuff: 110 | instance/ 111 | .webassets-cache 112 | 113 | # Scrapy stuff: 114 | .scrapy 115 | 116 | # Sphinx documentation 117 | docs/_build/ 118 | 119 | # PyBuilder 120 | .pybuilder/ 121 | target/ 122 | 123 | # Jupyter Notebook 124 | .ipynb_checkpoints 125 | 126 | # IPython 127 | profile_default/ 128 | ipython_config.py 129 | 130 | # pyenv 131 | # For a library or package, you might want to ignore these files since the code is 132 | # intended to run in multiple environments; otherwise, check them in: 133 | # .python-version 134 | 135 | # pipenv 136 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 137 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 138 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 139 | # install all needed dependencies. 140 | #Pipfile.lock 141 | 142 | # poetry 143 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 144 | # This is especially recommended for binary packages to ensure reproducibility, and is more 145 | # commonly ignored for libraries. 146 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 147 | #poetry.lock 148 | 149 | # pdm 150 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 151 | #pdm.lock 152 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 153 | # in version control. 154 | # https://pdm.fming.dev/#use-with-ide 155 | .pdm.toml 156 | 157 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 158 | __pypackages__/ 159 | 160 | # Celery stuff 161 | celerybeat-schedule 162 | celerybeat.pid 163 | 164 | # SageMath parsed files 165 | *.sage.py 166 | 167 | # Environments 168 | .env 169 | .venv 170 | env/ 171 | venv/ 172 | ENV/ 173 | env.bak/ 174 | venv.bak/ 175 | 176 | # Spyder project settings 177 | .spyderproject 178 | .spyproject 179 | 180 | # Rope project settings 181 | .ropeproject 182 | 183 | # mkdocs documentation 184 | /site 185 | 186 | # mypy 187 | .mypy_cache/ 188 | .dmypy.json 189 | dmypy.json 190 | 191 | # Pyre type checker 192 | .pyre/ 193 | 194 | # pytype static type analyzer 195 | .pytype/ 196 | 197 | # Cython debug symbols 198 | cython_debug/ 199 | 200 | # PyCharm 201 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 202 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 203 | # and can be added to the global gitignore or merged into this file. For a more nuclear 204 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 205 | #.idea/ 206 | CLIP 207 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/colmap"] 2 | path = third_party/colmap 3 | url = https://github.com/colmap/colmap.git 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pycqa/flake8 3 | rev: 4.0.0 4 | hooks: 5 | - id: flake8 6 | args: [--max-line-length=120] 7 | exclude: third_party 8 | -------------------------------------------------------------------------------- /DATA_PROCESSING.md: -------------------------------------------------------------------------------- 1 | # Data Preparation 2 | 3 | *Note: please use respecting the license terms of each dataset. Each user is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.* 4 | 5 | The following sections provide a guide on how to preprocess input videos for Neuralangelo. 6 | 7 | ## Prerequisites 8 | Initialize the COLMAP submodule: 9 | ```bash 10 | git submodule update --init --recursive 11 | ``` 12 | 13 | ## Self-captured video sequence 14 | To capture your own data, we recommend using a high shutter speed to avoid motion blur (which is very common when using a phone camera). We provide a synthetic [Lego sequence](https://drive.google.com/file/d/1yWoZ4Hk3FgmV3pd34ZbW7jEqgqyJgzHy/view?usp=drive_link) (from the original [NeRF](https://github.com/bmild/nerf)) as a toy example video for testing the workflow. There are two steps: 15 | 1. [preprocessing](#preprocessing) the data and running COLMAP, 16 | 2. [inspecting](#inspect-and-adjust-colmap-results) and refining the bounding sphere of interest for running Neuralangelo. 17 | 18 | ### Preprocessing 19 | First, set some environment variables: 20 | ```bash 21 | SEQUENCE=lego 22 | PATH_TO_VIDEO=lego.mp4 23 | DOWNSAMPLE_RATE=2 24 | SCENE_TYPE=object 25 | ``` 26 | where 27 | - `SEQUENCE`: your custom name for the video sequence. 28 | - `PATH_TO_VIDEO`: absolute/relative path to your video. 29 | - `DOWNSAMPLE_RATE`: temporal downsampling rate of video sequence (for extracting video frames). 30 | - `SCENE_TYPE`: can be one of ` {outdoor,indoor,object}`. 31 | 32 | To preprocess your data, you can choose to either 33 | 34 | - Run the following end-to-end script: 35 | ```bash 36 | bash projects/neuralangelo/scripts/preprocess.sh ${SEQUENCE} ${PATH_TO_VIDEO} ${DOWNSAMPLE_RATE} ${SCENE_TYPE} 37 | ``` 38 | 39 | - Or you can follow the steps below if you want more fine-grained control: 40 | 41 | 1. Extract images from the input video 42 | 43 | ```bash 44 | bash projects/neuralangelo/scripts/run_ffmpeg.sh ${SEQUENCE} ${PATH_TO_VIDEO} ${DOWNSAMPLE_RATE} 45 | ``` 46 | This will create a directory `datasets/{SEQUENCE}_ds{DOWNSAMPLE_RATE}` (set as `DATA_PATH` onwards), which stores all the processed data. 47 | The extracted images will be stored in `{DATA_PATH}/images_raw`. 48 | 49 | 2. Run COLMAP 50 | 51 | ```bash 52 | DATA_PATH=datasets/${SEQUENCE}_ds${DOWNSAMPLE_RATE} 53 | bash projects/neuralangelo/scripts/run_colmap.sh ${DATA_PATH} 54 | ``` 55 | `DATA_PATH`: path to processed data. 56 | 57 | After COLMAP finishes, the folder structure will look like following: 58 | ``` 59 | DATA_PATH 60 | ├─ database.db (COLMAP database) 61 | ├─ images (undistorted input images) 62 | ├─ images_raw (raw input images) 63 | ├─ sparse (COLMAP data from SfM) 64 | │ ├─ cameras.bin (camera parameters) 65 | │ ├─ images.bin (images and camera poses) 66 | │ ├─ points3D.bin (sparse point clouds) 67 | │ ├─ 0 (a directory containing individual SfM models. There could also be 1, 2... etc.) 68 | │ ... 69 | ├─ stereo (COLMAP data for MVS, not used here) 70 | ... 71 | ``` 72 | `{DATA_PATH}/images` will be the input image observations for surface reconstruction. 73 | 74 | 3. Generate JSON file for data loading 75 | 76 | In this step, we define the bounding region for reconstruction and convert the COLMAP data to JSON format following Instant NGP. 77 | It is strongly recommended to [inspect](#inspect-and-adjust-colmap-results) the results to verify and adjust the bounding region for improved performance. 78 | ```bash 79 | python3 projects/neuralangelo/scripts/convert_data_to_json.py --data_dir ${DATA_PATH} --scene_type ${SCENE_TYPE} 80 | ``` 81 | The JSON file will be generated in `{DATA_PATH}/transforms.json`. 82 | 83 | 4. Config files 84 | 85 | Use the following to configure and generate your config files: 86 | ```bash 87 | python3 projects/neuralangelo/scripts/generate_config.py --sequence_name ${SEQUENCE} --data_dir ${DATA_PATH} --scene_type ${SCENE_TYPE} 88 | ``` 89 | The config file will be generated as `projects/neuralangelo/configs/custom/{SEQUENCE}.yaml`. 90 | You can add the `--help` flag to list all arguments; for example, consider adding `--auto_exposure_wb` for modeling varying lighting/appearances in the video. 91 | Alternatively, you can directly modify the hyperparameters in the generated config file. 92 | 93 | ### Inspect and adjust COLMAP results 94 | 95 | For certain cases, the camera poses estimated by COLMAP could be erroneous. In addition, the automated estimation of the bounding sphere could be inaccurate (which ideally should include the scene/object of interest). It is highly recommended that the bounding sphere is adjusted. 96 | We offer some tools to to inspect and adjust the pre-processing results. Below are some options: 97 | 98 | - Blender: Download [Blender](https://www.blender.org/download/) and follow the instructions in our [add-on repo](https://github.com/mli0603/BlenderNeuralangelo). The add-on will save your adjustment of the bounding sphere. 99 | - This [Jupyter notebook](projects/neuralangelo/scripts/visualize_colmap.ipynb) (using K3D) can be helpful for visualizing the COLMAP results. You can adjust the bounding sphere by manually specifying the refining sphere center and size in the `data.readjust` config. 100 | 101 | For certain cases, an exhaustive feature matcher may be able to estimate more accurate camera poses. 102 | This could be done by changing `sequential_matcher` to `exhaustive_matcher` in [run_colmap.sh](https://github.com/NVlabs/neuralangelo/blob/main/projects/neuralangelo/scripts/run_colmap.sh#L24). 103 | However, this would take more time to process and could sometimes result in "broken trajectories" (from COLMAP failing due to ambiguous matches). 104 | For more details, please refer to the COLMAP [documentation](https://colmap.github.io/). 105 | 106 | ## DTU dataset 107 | You can run the following command to download [the DTU dataset](https://roboimagedata.compute.dtu.dk/?page_id=36) that is preprocessed by NeuS authors and generate json files: 108 | ```bash 109 | PATH_TO_DTU=datasets/dtu # Modify this to be the DTU dataset root directory. 110 | bash projects/neuralangelo/scripts/preprocess_dtu.sh ${PATH_TO_DTU} 111 | ``` 112 | 113 | ## Tanks and Temples dataset 114 | Download the data from [Tanks and Temples](https://tanksandtemples.org/download/) website. 115 | You will also need to download additional [COLMAP/camera/alignment](https://drive.google.com/file/d/1jAr3IDvhVmmYeDWi0D_JfgiHcl70rzVE/view?resourcekey=) and the images of each scene. 116 | The file structure should look like (you need to move the downloaded images to folder `images_raw`): 117 | ``` 118 | tanks_and_temples 119 | ├─ Barn 120 | │ ├─ Barn_COLMAP_SfM.log (camera poses) 121 | │ ├─ Barn.json (cropfiles) 122 | │ ├─ Barn.ply (ground-truth point cloud) 123 | │ ├─ Barn_trans.txt (colmap-to-ground-truth transformation) 124 | │ └─ images_raw (raw input images downloaded from Tanks and Temples website) 125 | │ ├─ 000001.png 126 | │ ├─ 000002.png 127 | │ ... 128 | ├─ Caterpillar 129 | │ ├─ ... 130 | ... 131 | ``` 132 | Run the following command to generate json files: 133 | ```bash 134 | PATH_TO_TNT=datasets/tanks_and_temples # Modify this to be the Tanks and Temples root directory. 135 | bash projects/neuralangelo/scripts/preprocess_tnt.sh ${PATH_TO_TNT} 136 | ``` 137 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # NVIDIA Source Code License for Neuralangelo 2 | 3 | ## 1. Definitions 4 | 5 | - “Licensor” means any person or entity that distributes its Work. 6 | 7 | - “Software” means the original work of authorship made available under this License. 8 | 9 | - “Work” means the Software and any additions to or derivative works of the Software that are made available under this License. 10 | 11 | - “NVIDIA Processors” means any central processing unit (CPU), graphics processing unit (GPU), field-programmable gate array (FPGA), application-specific integrated circuit (ASIC) or any combination thereof designed, made, sold, or provided by NVIDIA or its affiliates. 12 | 13 | - The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. 14 | 15 | - Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 16 | 17 | ## 2. License Grant 18 | 19 | ### 2.1 Copyright Grant. 20 | 21 | Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 22 | 23 | ## 3. Limitations 24 | 25 | ### 3.1 Redistribution. 26 | 27 | You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 28 | 29 | ### 3.2 Derivative Works. 30 | 31 | You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 32 | 33 | ### 3.3 Use Limitation. 34 | 35 | The Work and any derivative works thereof only may be used or intended for use non-commercially and with NVIDIA Processors. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative works commercially. As used herein, “non-commercially” means for research or evaluation purposes only. 36 | 37 | ### 3.4 Patent Claims. 38 | 39 | If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately. 40 | 41 | ### 3.5 Trademarks. 42 | 43 | This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 44 | 45 | ### 3.6 Termination. 46 | 47 | If you violate any term of this License, then your rights under this License (including the grant in Section 2.1) will terminate immediately. 48 | 49 | ## 4. Disclaimer of Warranty. 50 | 51 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 52 | 53 | ## 5. Limitation of Liability. 54 | 55 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neuralangelo 2 | This is the official implementation of **Neuralangelo: High-Fidelity Neural Surface Reconstruction**. 3 | 4 | [Zhaoshuo Li](https://mli0603.github.io/), 5 | [Thomas Müller](https://tom94.net/), 6 | [Alex Evans](https://research.nvidia.com/person/alex-evans), 7 | [Russell H. Taylor](https://www.cs.jhu.edu/~rht/), 8 | [Mathias Unberath](https://mathiasunberath.github.io/), 9 | [Ming-Yu Liu](https://mingyuliu.net/), 10 | [Chen-Hsuan Lin](https://chenhsuanlin.bitbucket.io/) 11 | IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2023 12 | 13 | ### [Project page](https://research.nvidia.com/labs/dir/neuralangelo/) | [Paper](https://arxiv.org/abs/2306.03092/) | [Colab notebook](https://colab.research.google.com/drive/13u8DX9BNzQwiyPPCB7_4DbSxiQ5-_nGF) 14 | 15 | 16 | 17 | The code is built upon the Imaginaire library from the Deep Imagination Research Group at NVIDIA. 18 | For business inquiries, please submit the [NVIDIA research licensing form](https://www.nvidia.com/en-us/research/inquiries/). 19 | 20 | -------------------------------------- 21 | 22 | ## Installation 23 | We offer two ways to setup the environment: 24 | 1. We provide prebuilt Docker images, where 25 | - `docker.io/chenhsuanlin/colmap:3.8` is for running COLMAP and the data preprocessing scripts. This includes the prebuilt COLMAP library (CUDA-supported). 26 | - `docker.io/chenhsuanlin/neuralangelo:23.04-py3` is for running the main Neuralangelo pipeline. 27 | 28 | The corresponding Dockerfiles can be found in the `docker` directory. 29 | 2. The conda environment for Neuralangelo. Install the dependencies and activate the environment `neuralangelo` with 30 | ```bash 31 | conda env create --file neuralangelo.yaml 32 | conda activate neuralangelo 33 | ``` 34 | For COLMAP, alternative installation options are also available on the [COLMAP website](https://colmap.github.io/). 35 | 36 | -------------------------------------- 37 | 38 | ## Data preparation 39 | Please refer to [Data Preparation](DATA_PROCESSING.md) for step-by-step instructions. 40 | We assume known camera poses for each extracted frame from the video. 41 | The code uses the same json format as [Instant NGP](https://github.com/NVlabs/instant-ngp). 42 | 43 | -------------------------------------- 44 | 45 | ## Run Neuralangelo! 46 | ```bash 47 | EXPERIMENT=toy_example 48 | GROUP=example_group 49 | NAME=example_name 50 | CONFIG=projects/neuralangelo/configs/custom/${EXPERIMENT}.yaml 51 | GPUS=1 # use >1 for multi-GPU training! 52 | torchrun --nproc_per_node=${GPUS} train.py \ 53 | --logdir=logs/${GROUP}/${NAME} \ 54 | --config=${CONFIG} \ 55 | --show_pbar 56 | ``` 57 | Some useful notes: 58 | - This codebase supports logging with [Weights & Biases](https://wandb.ai/site). You should have a W&B account for this. 59 | - Add `--wandb` to the command line argument to enable W&B logging. 60 | - Add `--wandb_name` to specify the W&B project name. 61 | - More detailed control can be found in the `init_wandb()` function in `imaginaire/trainers/base.py`. 62 | - Configs can be overridden through the command line (e.g. `--optim.params.lr=1e-2`). 63 | - Set `--checkpoint={CHECKPOINT_PATH}` to initialize with a certain checkpoint; set `--resume` to resume training. 64 | - If appearance embeddings are enabled, make sure `data.num_images` is set to the number of training images. 65 | 66 | -------------------------------------- 67 | 68 | ## Isosurface extraction 69 | Use the following command to run isosurface mesh extraction: 70 | ```bash 71 | CHECKPOINT=logs/${GROUP}/${NAME}/xxx.pt 72 | OUTPUT_MESH=xxx.ply 73 | CONFIG=logs/${GROUP}/${NAME}/config.yaml 74 | RESOLUTION=2048 75 | BLOCK_RES=128 76 | GPUS=1 # use >1 for multi-GPU mesh extraction 77 | torchrun --nproc_per_node=${GPUS} projects/neuralangelo/scripts/extract_mesh.py \ 78 | --config=${CONFIG} \ 79 | --checkpoint=${CHECKPOINT} \ 80 | --output_file=${OUTPUT_MESH} \ 81 | --resolution=${RESOLUTION} \ 82 | --block_res=${BLOCK_RES} 83 | ``` 84 | Some useful notes: 85 | - Add `--textured` to extract meshes with textures. 86 | - Add `--keep_lcc` to remove noises. May also remove thin structures. 87 | - Lower `BLOCK_RES` to reduce GPU memory usage. 88 | - Lower `RESOLUTION` to reduce mesh size. 89 | 90 | -------------------------------------- 91 | 92 | ## Frequently asked questions (FAQ) 93 | 1. **Q:** CUDA out of memory. How do I decrease the memory footprint? 94 | **A:** Neuralangelo requires at least 24GB GPU memory with our default configuration. If you run out of memory, consider adjusting the following hyperparameters under `model.object.sdf.encoding.hashgrid` (with suggested values): 95 | 96 | | GPU VRAM | Hyperparameter | 97 | | :-----------: | :---------------------: | 98 | | 8GB | `dict_size=20`, `dim=4` | 99 | | 12GB | `dict_size=21`, `dim=4` | 100 | | 16GB | `dict_size=21`, `dim=8` | 101 | 102 | Please note that the above hyperparameter adjustment may sacrifice the reconstruction quality. 103 | 104 | If Neuralangelo runs fine during training but CUDA out of memory during evaluation, consider adjusting the evaluation parameters under `data.val`, including setting smaller `image_size` (e.g., maximum resolution 200x200), and setting `batch_size=1`, `subset=1`. 105 | 106 | 2. **Q:** The reconstruction of my custom dataset is bad. What can I do? 107 | **A:** It is worth looking into the following: 108 | - The camera poses recovered by COLMAP may be off. We have implemented tools (using [Blender](https://github.com/mli0603/BlenderNeuralangelo) or [Jupyter notebook](projects/neuralangelo/scripts/visualize_colmap.ipynb)) to inspect the COLMAP results. 109 | - The computed bounding regions may be off and/or too small/large. Please refer to [data preprocessing](DATA_PROCESSING.md) on how to adjust the bounding regions manually. 110 | - The video capture sequence may contain significant motion blur or out-of-focus frames. Higher shutter speed (reducing motion blur) and smaller aperture (increasing focus range) are very helpful. 111 | 112 | -------------------------------------- 113 | 114 | ## Citation 115 | If you find our code useful for your research, please cite 116 | ``` 117 | @inproceedings{li2023neuralangelo, 118 | title={Neuralangelo: High-Fidelity Neural Surface Reconstruction}, 119 | author={Li, Zhaoshuo and M\"uller, Thomas and Evans, Alex and Taylor, Russell H and Unberath, Mathias and Liu, Ming-Yu and Lin, Chen-Hsuan}, 120 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition ({CVPR})}, 121 | year={2023} 122 | } 123 | ``` 124 | -------------------------------------------------------------------------------- /assets/teaser.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/neuralangelo/94390b64683c067c620d9e075224ccfe582647d0/assets/teaser.gif -------------------------------------------------------------------------------- /docker/Dockerfile-colmap: -------------------------------------------------------------------------------- 1 | # docker build -f docker/Dockerfile-colmap -t chenhsuanlin/colmap:3.8 . 2 | # docker push chenhsuanlin/colmap:3.8 3 | 4 | FROM nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu20.04 5 | ARG DEBIAN_FRONTEND=noninteractive 6 | 7 | # colmap dependencies 8 | RUN apt-get update && apt-get install -y \ 9 | git \ 10 | cmake \ 11 | ninja-build \ 12 | build-essential \ 13 | libboost-program-options-dev \ 14 | libboost-filesystem-dev \ 15 | libboost-graph-dev \ 16 | libboost-system-dev \ 17 | libboost-test-dev \ 18 | libeigen3-dev \ 19 | libflann-dev \ 20 | libfreeimage-dev \ 21 | libmetis-dev \ 22 | libgoogle-glog-dev \ 23 | libgflags-dev \ 24 | libsqlite3-dev \ 25 | libglew-dev \ 26 | qtbase5-dev \ 27 | libqt5opengl5-dev \ 28 | libcgal-dev \ 29 | libceres-dev 30 | # headless servers 31 | RUN apt-get update && apt-get install -y \ 32 | xvfb 33 | # Colmap 34 | RUN git clone https://github.com/colmap/colmap.git && cd colmap && git checkout 3.8 35 | RUN cd colmap && mkdir build && cd build && cmake .. -DCUDA_ENABLED=ON -DCMAKE_CUDA_ARCHITECTURES="70;72;75;80;86" -GNinja 36 | RUN cd colmap/build && ninja && ninja install 37 | 38 | # additional python packages 39 | RUN apt-get update && apt-get install -y \ 40 | pip \ 41 | ffmpeg 42 | RUN pip install \ 43 | addict \ 44 | k3d \ 45 | opencv-python-headless \ 46 | pillow \ 47 | plotly \ 48 | pyyaml \ 49 | trimesh 50 | -------------------------------------------------------------------------------- /docker/Dockerfile-neuralangelo: -------------------------------------------------------------------------------- 1 | # docker build -f docker/Dockerfile-neuralangelo -t chenhsuanlin/neuralangelo:23.04-py3 . 2 | # docker push chenhsuanlin/neuralangelo:23.04-py3 3 | 4 | FROM nvcr.io/nvidia/pytorch:23.04-py3 5 | ARG DEBIAN_FRONTEND=noninteractive 6 | 7 | # Install basics 8 | RUN apt-get update && apt-get install -y --no-install-recommends \ 9 | build-essential \ 10 | bzip2 \ 11 | ca-certificates \ 12 | cmake \ 13 | curl \ 14 | ffmpeg \ 15 | g++ \ 16 | git \ 17 | libx264-dev \ 18 | tmux \ 19 | wget 20 | 21 | # Update pip 22 | RUN pip install --upgrade pip 23 | 24 | # Code formatting 25 | RUN pip install --upgrade \ 26 | flake8 \ 27 | pre-commit 28 | 29 | # Install base Python libraries for Imaginaire 30 | COPY requirements.txt requirements.txt 31 | ARG FORCE_CUDA=1 32 | ARG TCNN_CUDA_ARCHITECTURES=70,72,75,80,86 33 | RUN pip install --upgrade -r requirements.txt 34 | -------------------------------------------------------------------------------- /imaginaire/config_base.yaml: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | # This is the base configuration file. 12 | 13 | # We often dump images to understand what's going on in the training. 14 | # image_save_iter specifies how often we dump images. 15 | image_save_iter: 9999999999 16 | # metrics_iter and metrics_epoch specify how often we compute the performance metrics 17 | # If these two numbers are not set, they are copied from checkpoint.save_iter and checkpoint.save_epoch respectively. 18 | metrics_iter: 19 | metrics_epoch: 20 | # max_epoch and max_iter specify what is the maximum epoch and iteration that we will train our model. 21 | # min( max_epoch * dataset_size / batch_size, max_iter) will be the total number of iterations that the model will be trained. 22 | max_epoch: 9999999999 23 | max_iter: 9999999999 24 | # logging_iter controls how often we log the training stats. 25 | logging_iter: 100 26 | # If speed_benchmark is True, we will print out time required for forward, backward, and gradient update. 27 | speed_benchmark: False 28 | # Kill the process if `timeout_period` seconds have passed since the last iteration. This usually means the process gets stuck. 29 | timeout_period: 9999999 30 | 31 | # Default local rank 32 | local_rank: 0 33 | # Toggle NVTX profiler 34 | nvtx_profile: False 35 | 36 | # Checkpointer 37 | checkpoint: 38 | # If save_iter is set to M, then we save the checkpoint every M iteration. 39 | # If save_latest_iter is set to M, then we save the checkpoint every M iteration using the name 40 | # 'latest_checkpoint.pt', so that the new checkpoint will overwrite previous ones. 41 | # If save_epoch is set to N, then we save the checkpoint every N epoch. 42 | # Both can be set at the same time. 43 | save_iter: 9999999999 44 | save_latest_iter: 9999999999 45 | save_epoch: 9999999999 46 | save_period: 9999999999 47 | # If True, load state_dict to the models in strict mode 48 | strict_resume: True 49 | 50 | # Trainer 51 | trainer: 52 | ema_config: 53 | enabled: False 54 | beta: 0.9999 55 | start_iteration: 0 56 | 57 | image_to_tensorboard: False 58 | ddp_config: 59 | find_unused_parameters: False 60 | static_graph: True 61 | init: 62 | type: none 63 | gain: 64 | amp_config: 65 | init_scale: 65536.0 66 | growth_factor: 2.0 67 | backoff_factor: 0.5 68 | growth_interval: 2000 69 | enabled: False 70 | grad_accum_iter: 1 71 | 72 | # Networks 73 | model: 74 | type: dummy 75 | 76 | # Optimizers 77 | optim: 78 | type: Adam 79 | params: 80 | # This defines the parameters for the specified PyTorch optimizer class (e.g. betas, eps). 81 | lr: 0.0001 82 | fused_opt: False 83 | # Default learning rate policy is step with iteration_mode=False (epoch mode), step_size=10^10, and gamma=1. 84 | # This means a constant learning rate 85 | sched: 86 | iteration_mode: False 87 | type: step 88 | step_size: 9999999999 89 | gamma: 1 90 | 91 | # Data 92 | data: 93 | name: dummy 94 | type: imaginaire.datasets.images 95 | use_multi_epoch_loader: False 96 | num_workers: 0 97 | test_data: 98 | name: dummy 99 | type: imaginaire.datasets.images 100 | num_workers: 0 101 | test: 102 | is_lmdb: False 103 | roots: 104 | batch_size: 1 105 | 106 | # cuDNN 107 | # set deterministic to True for better reproducibility of the results. When deterministic is True, it will only use CUDNN functions that are deterministic. 108 | # If benchmark is set to True, cudnn will benchmark several algorithms and pick that which it found to be fastest at the first iteration. 109 | cudnn: 110 | deterministic: False 111 | benchmark: True 112 | 113 | # Others 114 | pretrained_weight: 115 | inference_args: {} 116 | -------------------------------------------------------------------------------- /imaginaire/datasets/utils/dataloader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | 15 | 16 | class MultiEpochsDataLoader(torch.utils.data.DataLoader): 17 | """ 18 | Relentlessly sample from the dataset. 19 | This eliminates the overhead of prefetching data before each epoch. 20 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/loader.py 21 | """ 22 | 23 | def __init__(self, *args, **kwargs): 24 | super().__init__(*args, **kwargs) 25 | self._DataLoader__initialized = False 26 | self.batch_sampler = _RepeatSampler(self.batch_sampler) 27 | self._DataLoader__initialized = True 28 | self.iterator = super().__iter__() 29 | 30 | def __len__(self): 31 | return len(self.batch_sampler.sampler) 32 | 33 | def __iter__(self): 34 | for i in range(len(self)): 35 | yield next(self.iterator) 36 | 37 | 38 | class _RepeatSampler(object): 39 | """ Sampler that repeats forever. 40 | Args: 41 | sampler (Sampler) 42 | """ 43 | 44 | def __init__(self, sampler): 45 | self.sampler = sampler 46 | 47 | def __iter__(self): 48 | while True: 49 | yield from iter(self.sampler) 50 | -------------------------------------------------------------------------------- /imaginaire/datasets/utils/get_dataloader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import importlib 14 | 15 | import torch 16 | import torch.distributed as dist 17 | 18 | from imaginaire.utils.distributed import master_only_print as print 19 | 20 | from imaginaire.datasets.utils.sampler import DistributedSamplerPreemptable 21 | from imaginaire.datasets.utils.dataloader import MultiEpochsDataLoader 22 | 23 | 24 | def _get_train_dataset_objects(cfg, subset_indices=None): 25 | r"""Return dataset objects for the training set. 26 | Args: 27 | cfg (obj): Global configuration file. 28 | subset_indices (sequence): Indices of the subset to use. 29 | 30 | Returns: 31 | train_dataset (obj): PyTorch training dataset object. 32 | """ 33 | dataset_module = importlib.import_module(cfg.data.type) 34 | train_dataset = dataset_module.Dataset(cfg, is_inference=False) 35 | if subset_indices is not None: 36 | train_dataset = torch.utils.data.Subset(train_dataset, subset_indices) 37 | print('Train dataset length:', len(train_dataset)) 38 | return train_dataset 39 | 40 | 41 | def _get_val_dataset_objects(cfg, subset_indices=None): 42 | r"""Return dataset objects for the validation set. 43 | Args: 44 | cfg (obj): Global configuration file. 45 | subset_indices (sequence): Indices of the subset to use. 46 | Returns: 47 | val_dataset (obj): PyTorch validation dataset object. 48 | """ 49 | dataset_module = importlib.import_module(cfg.data.type) 50 | if hasattr(cfg.data.val, 'type'): 51 | for key in ['type', 'input_types', 'input_image']: 52 | setattr(cfg.data, key, getattr(cfg.data.val, key)) 53 | dataset_module = importlib.import_module(cfg.data.type) 54 | val_dataset = dataset_module.Dataset(cfg, is_inference=True) 55 | 56 | if subset_indices is not None: 57 | val_dataset = torch.utils.data.Subset(val_dataset, subset_indices) 58 | print('Val dataset length:', len(val_dataset)) 59 | return val_dataset 60 | 61 | 62 | def _get_test_dataset_object(cfg, subset_indices=None): 63 | r"""Return dataset object for the test set 64 | 65 | Args: 66 | cfg (obj): Global configuration file. 67 | subset_indices (sequence): Indices of the subset to use. 68 | Returns: 69 | (obj): PyTorch dataset object. 70 | """ 71 | dataset_module = importlib.import_module(cfg.test_data.type) 72 | test_dataset = dataset_module.Dataset(cfg, is_inference=True, is_test=True) 73 | if subset_indices is not None: 74 | test_dataset = torch.utils.data.Subset(test_dataset, subset_indices) 75 | return test_dataset 76 | 77 | 78 | def _get_data_loader(cfg, dataset, batch_size, not_distributed=False, 79 | shuffle=True, drop_last=True, seed=0, use_multi_epoch_loader=False, 80 | preemptable=False): 81 | r"""Return data loader . 82 | 83 | Args: 84 | cfg (obj): Global configuration file. 85 | dataset (obj): PyTorch dataset object. 86 | batch_size (int): Batch size. 87 | not_distributed (bool): Do not use distributed samplers. 88 | shuffle (bool): Whether to shuffle the data 89 | drop_last (bool): Whether to drop the last batch is the number of samples is smaller than the batch size 90 | seed (int): random seed. 91 | preemptable (bool): Whether to handle preemptions. 92 | Return: 93 | (obj): Data loader. 94 | """ 95 | not_distributed = not_distributed or not dist.is_initialized() 96 | if not_distributed: 97 | sampler = None 98 | else: 99 | if preemptable: 100 | sampler = DistributedSamplerPreemptable(dataset, shuffle=shuffle, seed=seed) 101 | else: 102 | sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=shuffle, seed=seed) 103 | num_workers = getattr(cfg.data, 'num_workers', 8) 104 | persistent_workers = getattr(cfg.data, 'persistent_workers', False) 105 | data_loader = (MultiEpochsDataLoader if use_multi_epoch_loader else torch.utils.data.DataLoader)( 106 | dataset, 107 | batch_size=batch_size, 108 | shuffle=shuffle and (sampler is None), 109 | sampler=sampler, 110 | pin_memory=True, 111 | num_workers=num_workers, 112 | drop_last=drop_last, 113 | persistent_workers=persistent_workers if num_workers > 0 else False 114 | ) 115 | return data_loader 116 | 117 | 118 | def get_train_dataloader( 119 | cfg, shuffle=True, drop_last=True, subset_indices=None, seed=0, preemptable=False): 120 | r"""Return dataset objects for the training and validation sets. 121 | Args: 122 | cfg (obj): Global configuration file. 123 | shuffle (bool): Whether to shuffle the data 124 | drop_last (bool): Whether to drop the last batch is the number of samples is smaller than the batch size 125 | subset_indices (sequence): Indices of the subset to use. 126 | seed (int): random seed. 127 | preemptable (bool): Flag for preemption handling 128 | Returns: 129 | train_data_loader (obj): Train data loader. 130 | """ 131 | train_dataset = _get_train_dataset_objects(cfg, subset_indices=subset_indices) 132 | train_data_loader = _get_data_loader( 133 | cfg, train_dataset, cfg.data.train.batch_size, not_distributed=False, 134 | shuffle=shuffle, drop_last=drop_last, seed=seed, 135 | use_multi_epoch_loader=cfg.data.use_multi_epoch_loader, 136 | preemptable=preemptable 137 | ) 138 | return train_data_loader 139 | 140 | 141 | def get_val_dataloader(cfg, subset_indices=None, seed=0): 142 | r"""Return dataset objects for the training and validation sets. 143 | Args: 144 | cfg (obj): Global configuration file. 145 | subset_indices (sequence): Indices of the subset to use. 146 | seed (int): random seed. 147 | Returns: 148 | val_data_loader (obj): Val data loader. 149 | """ 150 | val_dataset = _get_val_dataset_objects(cfg, subset_indices=subset_indices) 151 | not_distributed = getattr(cfg.data, 'val_data_loader_not_distributed', False) 152 | # We often use a folder of images to represent a video. As doing evaluation, we like the images to preserve the 153 | # original order. As a result, we do not want to distribute images from the same video to different GPUs. 154 | not_distributed = 'video' in cfg.data.type or not_distributed 155 | drop_last = getattr(cfg.data.val, 'drop_last', False) 156 | # Validation loader need not have preemption handling. 157 | val_data_loader = _get_data_loader( 158 | cfg, val_dataset, cfg.data.val.batch_size, not_distributed=not_distributed, 159 | shuffle=False, drop_last=drop_last, seed=seed, 160 | preemptable=False 161 | ) 162 | return val_data_loader 163 | 164 | 165 | def get_test_dataloader(cfg, subset_indices=None): 166 | r"""Return dataset objects for testing 167 | 168 | Args: 169 | cfg (obj): Global configuration file. 170 | subset_indices (sequence): Indices of the subset to use. 171 | Returns: 172 | (obj): Test data loader. It may not contain the ground truth. 173 | """ 174 | test_dataset = _get_test_dataset_object(cfg, subset_indices=subset_indices) 175 | not_distributed = getattr( 176 | cfg.test_data, 'val_data_loader_not_distributed', False) 177 | not_distributed = 'video' in cfg.test_data.type or not_distributed 178 | test_data_loader = _get_data_loader( 179 | cfg, test_dataset, cfg.test_data.test.batch_size, not_distributed=not_distributed, 180 | shuffle=False) 181 | return test_data_loader 182 | -------------------------------------------------------------------------------- /imaginaire/datasets/utils/sampler.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import math 14 | import torch.distributed as dist 15 | import torch 16 | 17 | from torch.utils.data import Sampler 18 | from typing import TypeVar 19 | 20 | T_co = TypeVar('T_co', covariant=True) 21 | 22 | 23 | class DistributedSamplerPreemptable(Sampler[T_co]): 24 | r"""Sampler that supports loading from an iteration. 25 | This is very useful for preemptable jobs. 26 | 27 | Args: 28 | dataset (torch.utils.data.Dataset): Dataset object 29 | num_replicas (int): Number of replicas to the distribute the dataloader over. 30 | This is typically the world size in DDP jobs. 31 | rank (int): Rank of the current process. 32 | shuffle (bool): Whether to shuffle the dataloader in each epoch. 33 | seed (int): Random seed used for shuffling the dataloader. 34 | drop_last (bool): Whether to drop the last batch. 35 | """ 36 | 37 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, 38 | seed=0, drop_last=False): 39 | 40 | if num_replicas is None: 41 | if not dist.is_available(): 42 | raise RuntimeError("Requires distributed package to be available") 43 | num_replicas = dist.get_world_size() 44 | if rank is None: 45 | if not dist.is_available(): 46 | raise RuntimeError("Requires distributed package to be available") 47 | rank = dist.get_rank() 48 | if rank >= num_replicas or rank < 0: 49 | raise ValueError( 50 | "Invalid rank {}, rank should be in the interval" 51 | " [0, {}]".format(rank, num_replicas - 1)) 52 | self.dataset = dataset 53 | self.num_replicas = num_replicas 54 | self.rank = rank 55 | self.epoch = 0 56 | 57 | # start_index is the index to begin the dataloader from. 58 | self.start_index = 0 59 | 60 | self.drop_last = drop_last 61 | # If the dataset length is evenly divisible by # of replicas, then there 62 | # is no need to drop any data, since the dataset will be split equally. 63 | if self.drop_last and len(self.dataset) % self.num_replicas != 0: # type: ignore[arg-type] 64 | # Split to nearest available length that is evenly divisible. 65 | # This is to ensure each rank receives the same amount of data when 66 | # using this Sampler. 67 | self.num_samples = math.ceil( 68 | (len(self.dataset) - self.num_replicas) / self.num_replicas # type: ignore[arg-type] 69 | ) 70 | else: 71 | self.num_samples = math.ceil(len(self.dataset) / self.num_replicas) # type: ignore[arg-type] 72 | self.total_size = self.num_samples * self.num_replicas 73 | self.shuffle = shuffle 74 | self.seed = seed 75 | 76 | def __iter__(self): 77 | if self.shuffle: 78 | # deterministically shuffle based on epoch and seed 79 | g = torch.Model() 80 | g.manual_seed(self.seed + self.epoch) 81 | indices = torch.randperm(len(self.dataset), generator=g).tolist() # type: ignore[arg-type] 82 | else: 83 | indices = list(range(len(self.dataset))) # type: ignore[arg-type] 84 | 85 | if not self.drop_last: 86 | # add extra samples to make it evenly divisible 87 | padding_size = self.total_size - len(indices) 88 | if padding_size <= len(indices): 89 | indices += indices[:padding_size] 90 | else: 91 | indices += (indices * math.ceil(padding_size / len(indices)))[:padding_size] 92 | else: 93 | # remove tail of data to make it evenly divisible. 94 | indices = indices[:self.total_size] 95 | assert len(indices) == self.total_size 96 | 97 | # subsample 98 | indices = indices[self.rank:self.total_size:self.num_replicas] 99 | assert len(indices) == self.num_samples 100 | 101 | # assert self.start_index < len(indices) 102 | if self.start_index >= len(indices): 103 | print('(Warning): Start index is less than len of dataloader. Goint to the last batch of dataset instead') 104 | # This is hardcoded to go one batch before. 105 | self.start_index = len(indices) - 64 106 | indices = indices[self.start_index:] 107 | 108 | return iter(indices) 109 | 110 | def __len__(self): 111 | return self.num_samples 112 | 113 | def set_epoch(self, epoch): 114 | self.epoch = epoch 115 | 116 | def set_iteration(self, start_index): 117 | self.start_index = start_index 118 | -------------------------------------------------------------------------------- /imaginaire/models/base.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | 15 | 16 | class Model(torch.nn.Module): 17 | 18 | def __init__(self, cfg_model, cfg_data): 19 | super().__init__() 20 | 21 | def get_param_groups(self, cfg_optim): 22 | """Allow the network to use different hyperparameters (e.g., learning rate) for different parameters. 23 | Returns: 24 | PyTorch parameter group (list or generator). See the PyTorch documentation for details. 25 | """ 26 | return self.parameters() 27 | 28 | def device(self): 29 | """Return device on which model resides.""" 30 | return next(self.parameters()).device 31 | -------------------------------------------------------------------------------- /imaginaire/models/utils/init_weight.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | from torch.nn import init 15 | 16 | 17 | def weights_init(init_type, gain, bias=None): 18 | r"""Initialize weights in the network. 19 | 20 | Args: 21 | init_type (str): The name of the initialization scheme. 22 | gain (float): The parameter that is required for the initialization 23 | scheme. 24 | bias (object): If not ``None``, specifies the initialization parameter 25 | for bias. 26 | 27 | Returns: 28 | (obj): init function to be applied. 29 | """ 30 | 31 | def init_func(m): 32 | r"""Init function 33 | 34 | Args: 35 | m: module to be weight initialized. 36 | """ 37 | class_name = m.__class__.__name__ 38 | if hasattr(m, 'weight') and ( 39 | class_name.find('Conv') != -1 or 40 | class_name.find('Linear') != -1 or 41 | class_name.find('Embedding') != -1): 42 | lr_mul = getattr(m, 'lr_mul', 1.) 43 | gain_final = gain / lr_mul 44 | if init_type == 'normal': 45 | init.normal_(m.weight.data, 0.0, gain_final) 46 | elif init_type == 'xavier': 47 | init.xavier_normal_(m.weight.data, gain=gain_final) 48 | elif init_type == 'xavier_uniform': 49 | init.xavier_uniform_(m.weight.data, gain=gain_final) 50 | elif init_type == 'kaiming': 51 | init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') 52 | with torch.no_grad(): 53 | m.weight.data *= gain_final 54 | elif init_type == 'kaiming_linear': 55 | init.kaiming_normal_( 56 | m.weight.data, a=0, mode='fan_in', nonlinearity='linear' 57 | ) 58 | with torch.no_grad(): 59 | m.weight.data *= gain_final 60 | elif init_type == 'orthogonal': 61 | init.orthogonal_(m.weight.data, gain=gain_final) 62 | elif init_type == 'none': 63 | pass 64 | else: 65 | raise NotImplementedError( 66 | 'initialization method [%s] is ' 67 | 'not implemented' % init_type) 68 | if hasattr(m, 'bias') and m.bias is not None: 69 | if init_type == 'none': 70 | pass 71 | elif bias is not None: 72 | bias_type = getattr(bias, 'type', 'normal') 73 | if bias_type == 'normal': 74 | bias_gain = getattr(bias, 'gain', 0.5) 75 | init.normal_(m.bias.data, 0.0, bias_gain) 76 | else: 77 | raise NotImplementedError( 78 | 'initialization method [%s] is ' 79 | 'not implemented' % bias_type) 80 | else: 81 | init.constant_(m.bias.data, 0.0) 82 | return init_func 83 | 84 | 85 | def weights_rescale(): 86 | def init_func(m): 87 | if hasattr(m, 'init_gain'): 88 | for name, p in m.named_parameters(): 89 | if 'output_scale' not in name: 90 | p.data.mul_(m.init_gain) 91 | return init_func 92 | -------------------------------------------------------------------------------- /imaginaire/models/utils/model_average.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import copy 14 | 15 | import torch 16 | from torch import nn 17 | from imaginaire.utils.misc import requires_grad 18 | 19 | 20 | def reset_batch_norm(m): 21 | r"""Reset batch norm statistics 22 | 23 | Args: 24 | m: Pytorch module 25 | """ 26 | if hasattr(m, 'reset_running_stats'): 27 | m.reset_running_stats() 28 | 29 | 30 | def calibrate_batch_norm_momentum(m): 31 | r"""Calibrate batch norm momentum 32 | 33 | Args: 34 | m: Pytorch module 35 | """ 36 | if hasattr(m, 'reset_running_stats'): 37 | # if m._get_name() == 'SyncBatchNorm': 38 | if 'BatchNorm' in m._get_name(): 39 | m.momentum = 1.0 / float(m.num_batches_tracked + 1) 40 | 41 | 42 | class ModelAverage(nn.Module): 43 | r"""In this model average implementation, the spectral layers are 44 | absorbed in the model parameter by default. If such options are 45 | turned on, be careful with how you do the training. Remember to 46 | re-estimate the batch norm parameters before using the model. 47 | 48 | Args: 49 | module (torch nn module): Torch network. 50 | beta (float): Moving average weights. How much we weight the past. 51 | start_iteration (int): From which iteration, we start the update. 52 | """ 53 | def __init__(self, module, beta=0.9999, start_iteration=0): 54 | super(ModelAverage, self).__init__() 55 | 56 | self.module = module 57 | # A shallow copy creates a new object which stores the reference of 58 | # the original elements. 59 | # A deep copy creates a new object and recursively adds the copies of 60 | # nested objects present in the original elements. 61 | self._averaged_model = copy.deepcopy(self.module).to('cuda') 62 | self.stream = torch.cuda.Stream() 63 | 64 | self.beta = beta 65 | 66 | self.start_iteration = start_iteration 67 | # This buffer is to track how many iterations has the model been 68 | # trained for. We will ignore the first $(start_iterations) and start 69 | # the averaging after. 70 | self.register_buffer('num_updates_tracked', 71 | torch.tensor(0, dtype=torch.long)) 72 | self.num_updates_tracked = self.num_updates_tracked.to('cuda') 73 | self.averaged_model.eval() 74 | 75 | # Averaged model does not require grad. 76 | requires_grad(self.averaged_model, False) 77 | 78 | @property 79 | def averaged_model(self): 80 | self.stream.synchronize() 81 | return self._averaged_model 82 | 83 | def forward(self, *inputs, **kwargs): 84 | r"""PyTorch module forward function overload.""" 85 | return self.module(*inputs, **kwargs) 86 | 87 | @torch.no_grad() 88 | def update_average(self): 89 | r"""Update the moving average.""" 90 | self.stream.wait_stream(torch.cuda.current_stream()) 91 | with torch.cuda.stream(self.stream): 92 | self.num_updates_tracked += 1 93 | if self.num_updates_tracked <= self.start_iteration: 94 | beta = 0. 95 | else: 96 | beta = self.beta 97 | source_dict = self.module.state_dict() 98 | target_dict = self._averaged_model.state_dict() 99 | source_list = [] 100 | target_list = [] 101 | for key in target_dict: 102 | if 'num_batches_tracked' in key: 103 | continue 104 | source_list.append(source_dict[key].data) 105 | target_list.append(target_dict[key].data.float()) 106 | 107 | torch._foreach_mul_(target_list, beta) 108 | torch._foreach_add_(target_list, source_list, alpha=1 - beta) 109 | 110 | def __repr__(self): 111 | r"""Returns a string that holds a printable representation of an 112 | object""" 113 | return self.module.__repr__() 114 | -------------------------------------------------------------------------------- /imaginaire/trainers/utils/get_trainer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import importlib 14 | import torch 15 | import torch.distributed as dist 16 | import torch.nn as nn 17 | from torch.optim import lr_scheduler 18 | from imaginaire.models.utils.model_average import ModelAverage 19 | 20 | 21 | def get_trainer(cfg, is_inference=True, seed=0): 22 | """Return the trainer object. 23 | 24 | Args: 25 | cfg (Config): Loaded config object. 26 | is_inference (bool): Inference mode. 27 | 28 | Returns: 29 | (obj): Trainer object. 30 | """ 31 | trainer_lib = importlib.import_module(cfg.trainer.type) 32 | trainer = trainer_lib.Trainer(cfg, is_inference=is_inference, seed=seed) 33 | return trainer 34 | 35 | 36 | def wrap_model(cfg, model): 37 | r"""Wrap the networks with AMP DDP and (optionally) model average. 38 | 39 | Args: 40 | cfg (obj): Global configuration. 41 | model (obj): Model object. 42 | 43 | Returns: 44 | (dict): 45 | - model (obj): Model object. 46 | """ 47 | # Apply model average wrapper. 48 | if cfg.trainer.ema_config.enabled: 49 | model = ModelAverage(model, 50 | cfg.trainer.ema_config.beta, 51 | cfg.trainer.ema_config.start_iteration, 52 | ) 53 | model = _wrap_model(cfg, model) 54 | return model 55 | 56 | 57 | class WrappedModel(nn.Module): 58 | r"""Dummy wrapping the module. 59 | """ 60 | 61 | def __init__(self, module): 62 | super(WrappedModel, self).__init__() 63 | self.module = module 64 | 65 | def forward(self, *args, **kwargs): 66 | r"""PyTorch module forward function overload.""" 67 | return self.module(*args, **kwargs) 68 | 69 | 70 | def _wrap_model(cfg, model): 71 | r"""Wrap a model for distributed data parallel training. 72 | 73 | Args: 74 | model (obj): PyTorch network model. 75 | 76 | Returns: 77 | (obj): Wrapped PyTorch network model. 78 | """ 79 | # Apply DDP wrapper. 80 | if dist.is_available() and dist.is_initialized(): 81 | model = torch.nn.parallel.DistributedDataParallel( 82 | model, 83 | device_ids=[cfg.local_rank], 84 | output_device=cfg.local_rank, 85 | find_unused_parameters=cfg.trainer.ddp_config.find_unused_parameters, 86 | static_graph=cfg.trainer.ddp_config.static_graph, 87 | broadcast_buffers=False, 88 | ) 89 | else: 90 | model = WrappedModel(model) 91 | return model 92 | 93 | 94 | def _calculate_model_size(model): 95 | r"""Calculate number of parameters in a PyTorch network. 96 | 97 | Args: 98 | model (obj): PyTorch network. 99 | 100 | Returns: 101 | (int): Number of parameters. 102 | """ 103 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 104 | 105 | 106 | def get_optimizer(cfg_optim, model): 107 | r"""Return the optimizer object. 108 | 109 | Args: 110 | cfg_optim (obj): Config for the specific optimization module (gen/dis). 111 | model (obj): PyTorch network object. 112 | 113 | Returns: 114 | (obj): Pytorch optimizer 115 | """ 116 | if hasattr(model, 'get_param_groups'): 117 | # Allow the network to use different hyperparameters (e.g., learning rate) for different parameters. 118 | params = model.get_param_groups(cfg_optim) 119 | else: 120 | params = model.parameters() 121 | 122 | try: 123 | # Try the PyTorch optimizer class first. 124 | optimizer_class = getattr(torch.optim, cfg_optim.type) 125 | except AttributeError: 126 | raise NotImplementedError(f"Optimizer {cfg_optim.type} is not yet implemented.") 127 | optimizer_kwargs = cfg_optim.params 128 | 129 | # We will try to use fuse optimizers by default. 130 | try: 131 | from apex.optimizers import FusedAdam, FusedSGD 132 | fused_opt = cfg_optim.fused_opt 133 | except (ImportError, ModuleNotFoundError): 134 | fused_opt = False 135 | 136 | if fused_opt: 137 | if cfg_optim.type == 'Adam': 138 | optimizer_class = FusedAdam 139 | optimizer_kwargs['adam_w_mode'] = False 140 | elif cfg_optim.type == 'AdamW': 141 | optimizer_class = FusedAdam 142 | optimizer_kwargs['adam_w_mode'] = True 143 | elif cfg_optim.type == 'SGD': 144 | optimizer_class = FusedSGD 145 | if cfg_optim.type in ["RAdam", "RMSprop"]: 146 | optimizer_kwargs["foreach"] = fused_opt 147 | 148 | optim = optimizer_class(params, **optimizer_kwargs) 149 | 150 | return optim 151 | 152 | 153 | def get_scheduler(cfg_optim, optim): 154 | """Return the scheduler object. 155 | 156 | Args: 157 | cfg_optim (obj): Config for the specific optimization module (gen/dis). 158 | optim (obj): PyTorch optimizer object. 159 | 160 | Returns: 161 | (obj): Scheduler 162 | """ 163 | if cfg_optim.sched.type == 'step': 164 | scheduler = lr_scheduler.StepLR(optim, 165 | step_size=cfg_optim.sched.step_size, 166 | gamma=cfg_optim.sched.gamma) 167 | elif cfg_optim.sched.type == 'constant': 168 | scheduler = lr_scheduler.LambdaLR(optim, lambda x: 1) 169 | elif cfg_optim.sched.type == 'linear_warmup': 170 | scheduler = lr_scheduler.LambdaLR( 171 | optim, lambda x: x * 1.0 / cfg_optim.sched.warmup if x < cfg_optim.sched.warmup else 1.0) 172 | elif cfg_optim.sched.type == 'cosine_warmup': 173 | 174 | warmup_scheduler = lr_scheduler.LinearLR( 175 | optim, 176 | start_factor=1.0 / cfg_optim.sched.warmup, 177 | end_factor=1.0, 178 | total_iters=cfg_optim.sched.warmup 179 | ) 180 | T_max_val = cfg_optim.sched.decay_steps - cfg_optim.sched.warmup 181 | cosine_lr_scheduler = lr_scheduler.CosineAnnealingLR( 182 | optim, 183 | T_max=T_max_val, 184 | eta_min=getattr(cfg_optim.sched, 'eta_min', 0), 185 | ) 186 | scheduler = lr_scheduler.SequentialLR( 187 | optim, 188 | schedulers=[warmup_scheduler, cosine_lr_scheduler], 189 | milestones=[cfg_optim.sched.warmup] 190 | ) 191 | 192 | elif cfg_optim.sched.type == 'linear': 193 | # Start linear decay from here. 194 | decay_start = cfg_optim.sched.decay_start 195 | # End linear decay here. 196 | # Continue to train using the lowest learning rate till the end. 197 | decay_end = cfg_optim.sched.decay_end 198 | # Lowest learning rate multiplier. 199 | decay_target = cfg_optim.sched.decay_target 200 | 201 | def sch(x): 202 | decay = ((x - decay_start) * decay_target + decay_end - x) / (decay_end - decay_start) 203 | return min(max(decay, decay_target), 1.) 204 | 205 | scheduler = lr_scheduler.LambdaLR(optim, lambda x: sch(x)) 206 | elif cfg_optim.sched.type == 'step_with_warmup': 207 | # The step_size and gamma follows the signature of lr_scheduler.StepLR. 208 | step_size = cfg_optim.sched.step_size, 209 | gamma = cfg_optim.sched.gamma 210 | # An additional parameter defines the warmup iteration. 211 | warmup_step_size = cfg_optim.sched.warmup_step_size 212 | 213 | def sch(x): 214 | lr_after_warmup = gamma ** (warmup_step_size // step_size) 215 | if x < warmup_step_size: 216 | return x / warmup_step_size * lr_after_warmup 217 | else: 218 | return gamma ** (x // step_size) 219 | 220 | scheduler = lr_scheduler.LambdaLR(optim, lambda x: sch(x)) 221 | else: 222 | return NotImplementedError('Learning rate policy {} not implemented.'.format(cfg_optim.sched.type)) 223 | return scheduler 224 | -------------------------------------------------------------------------------- /imaginaire/trainers/utils/logging.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import datetime 14 | import os 15 | 16 | import torch.distributed as dist 17 | 18 | from imaginaire.utils.distributed import is_master, broadcast_object_list 19 | from imaginaire.utils.distributed import master_only_print as print 20 | 21 | 22 | def get_date_uid(): 23 | """Generate a unique id based on date. 24 | Returns: 25 | str: Return uid string, e.g. '20171122171307111552'. 26 | """ 27 | return str(datetime.datetime.now().strftime("%Y_%m%d_%H%M_%S")) 28 | 29 | 30 | def init_logging(config_path, logdir, makedir=True): 31 | r"""Create log directory for storing checkpoints and output images. 32 | 33 | Args: 34 | config_path (str): Path to the configuration file. 35 | logdir (str or None): Log directory name 36 | makedir (bool): Make a new dir or not 37 | Returns: 38 | str: Return log dir 39 | """ 40 | def _create_logdir(_config_path, _logdir, _root_dir): 41 | config_file = os.path.basename(_config_path) 42 | date_uid = get_date_uid() 43 | # example: logs/2019_0125_1047_58_spade_cocostuff 44 | _log_file = '_'.join([date_uid, os.path.splitext(config_file)[0]]) 45 | if _logdir is None: 46 | _logdir = os.path.join(_root_dir, _log_file) 47 | if makedir: 48 | print('Make folder {}'.format(_logdir)) 49 | os.makedirs(_logdir, exist_ok=True) 50 | return _logdir 51 | 52 | root_dir = 'logs' 53 | if dist.is_available(): 54 | if dist.is_initialized(): 55 | message = [None] 56 | if is_master(): 57 | logdir = _create_logdir(config_path, logdir, root_dir) 58 | message = [logdir] 59 | 60 | # Send logdir from master to all workers. 61 | message = broadcast_object_list(message=message, src=0) 62 | logdir = message[0] 63 | else: 64 | logdir = _create_logdir(config_path, logdir, root_dir) 65 | else: 66 | logdir = _create_logdir(config_path, logdir, root_dir) 67 | 68 | return logdir 69 | -------------------------------------------------------------------------------- /imaginaire/trainers/utils/meters.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import math 14 | import torch 15 | import wandb 16 | from torch.utils.tensorboard import SummaryWriter 17 | 18 | from imaginaire.utils.distributed import master_only, dist_all_reduce_tensor, \ 19 | is_master, get_rank 20 | 21 | from imaginaire.utils.distributed import master_only_print as print 22 | 23 | LOG_WRITER = None 24 | LOG_DIR = None 25 | 26 | 27 | @torch.no_grad() 28 | def sn_reshape_weight_to_matrix(weight): 29 | r"""Reshape weight to obtain the matrix form. 30 | 31 | Args: 32 | weight (Parameters): pytorch layer parameter tensor. 33 | """ 34 | weight_mat = weight 35 | height = weight_mat.size(0) 36 | return weight_mat.reshape(height, -1) 37 | 38 | 39 | @torch.no_grad() 40 | def get_weight_stats(mod): 41 | r"""Get weight state 42 | 43 | Args: 44 | mod: Pytorch module 45 | """ 46 | if mod.weight_orig.grad is not None: 47 | grad_norm = mod.weight_orig.grad.data.norm().item() 48 | else: 49 | grad_norm = 0. 50 | weight_norm = mod.weight_orig.data.norm().item() 51 | weight_mat = sn_reshape_weight_to_matrix(mod.weight_orig) 52 | sigma = torch.sum(mod.weight_u * torch.mv(weight_mat, mod.weight_v)) 53 | return grad_norm, weight_norm, sigma 54 | 55 | 56 | @master_only 57 | def set_summary_writer(log_dir): 58 | r"""Set summary writer 59 | 60 | Args: 61 | log_dir (str): Log directory. 62 | """ 63 | global LOG_DIR, LOG_WRITER 64 | LOG_DIR = log_dir 65 | LOG_WRITER = SummaryWriter(log_dir=log_dir) 66 | 67 | 68 | def write_summary(name, summary, step, hist=False): 69 | """Utility function for write summary to log_writer. 70 | """ 71 | global LOG_WRITER 72 | lw = LOG_WRITER 73 | if lw is None: 74 | raise Exception("Log writer not set.") 75 | if hist: 76 | lw.add_histogram(name, summary, step) 77 | else: 78 | lw.add_scalar(name, summary, step) 79 | 80 | 81 | class Meter(object): 82 | """Meter is to keep track of statistics along steps. 83 | Meters write values for purpose like printing average values. 84 | Meters can be flushed to log files (i.e. TensorBoard for now) 85 | regularly. 86 | 87 | Args: 88 | name (str): the name of meter 89 | reduce (bool): If ``True``, perform a distributed reduce for the log 90 | values across all GPUs. 91 | """ 92 | 93 | def __init__(self, name, reduce=True): 94 | self.name = name 95 | self.reduce = reduce 96 | self.values = [] 97 | 98 | def reset(self): 99 | r"""Reset the meter values""" 100 | if not self.reduce and get_rank() != 0: 101 | return 102 | self.values = [] 103 | 104 | def write(self, value): 105 | r"""Record the value""" 106 | if not self.reduce and get_rank() != 0: 107 | return 108 | if value is not None: 109 | self.values.append(value) 110 | 111 | def flush(self, step): 112 | r"""Write the value in the tensorboard. 113 | 114 | Args: 115 | step (int): Epoch or iteration number. 116 | """ 117 | if not self.reduce and get_rank() != 0: 118 | return 119 | values = torch.tensor(self.values, device="cuda") 120 | if self.reduce: 121 | values = dist_all_reduce_tensor(values) 122 | 123 | if not all(math.isfinite(x) for x in values): 124 | print("meter {} contained a nan or inf.".format(self.name)) 125 | filtered_values = list(filter(lambda x: math.isfinite(x), self.values)) 126 | if float(len(filtered_values)) != 0: 127 | value = float(sum(filtered_values)) / float(len(filtered_values)) 128 | if is_master(): 129 | write_summary(self.name, value, step) 130 | wandb.log({self.name: value}, step=step) 131 | self.reset() 132 | 133 | @master_only 134 | def write_image(self, img_grid, step): 135 | r"""Write the value in the tensorboard. 136 | 137 | Args: 138 | img_grid: 139 | step (int): Epoch or iteration number. 140 | """ 141 | if not self.reduce and get_rank() != 0: 142 | return 143 | global LOG_WRITER 144 | lw = LOG_WRITER 145 | if lw is None: 146 | raise Exception("Log writer not set.") 147 | lw.add_image("Visualizations", img_grid, step) 148 | -------------------------------------------------------------------------------- /imaginaire/utils/cudnn.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch.backends.cudnn as cudnn 14 | 15 | from imaginaire.utils.distributed import master_only_print as print 16 | 17 | 18 | def init_cudnn(deterministic, benchmark): 19 | r"""Initialize the cudnn module. The two things to consider is whether to 20 | use cudnn benchmark and whether to use cudnn deterministic. If cudnn 21 | benchmark is set, then the cudnn deterministic is automatically false. 22 | 23 | Args: 24 | deterministic (bool): Whether to use cudnn deterministic. 25 | benchmark (bool): Whether to use cudnn benchmark. 26 | """ 27 | cudnn.deterministic = deterministic 28 | cudnn.benchmark = benchmark 29 | print('cudnn benchmark: {}'.format(benchmark)) 30 | print('cudnn deterministic: {}'.format(deterministic)) 31 | -------------------------------------------------------------------------------- /imaginaire/utils/distributed.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import functools 14 | import ctypes 15 | 16 | import torch 17 | import torch.distributed as dist 18 | from contextlib import contextmanager 19 | 20 | 21 | def init_dist(local_rank, backend='nccl', **kwargs): 22 | r"""Initialize distributed training""" 23 | if dist.is_available(): 24 | if dist.is_initialized(): 25 | return torch.cuda.current_device() 26 | torch.cuda.set_device(local_rank) 27 | dist.init_process_group(backend=backend, init_method='env://', **kwargs) 28 | 29 | # Increase the L2 fetch granularity for faster speed. 30 | _libcudart = ctypes.CDLL('libcudart.so') 31 | # Set device limit on the current device 32 | # cudaLimitMaxL2FetchGranularity = 0x05 33 | pValue = ctypes.cast((ctypes.c_int * 1)(), ctypes.POINTER(ctypes.c_int)) 34 | _libcudart.cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128)) 35 | _libcudart.cudaDeviceGetLimit(pValue, ctypes.c_int(0x05)) 36 | # assert pValue.contents.value == 128 37 | 38 | 39 | def get_rank(): 40 | r"""Get rank of the thread.""" 41 | rank = 0 42 | if dist.is_available(): 43 | if dist.is_initialized(): 44 | rank = dist.get_rank() 45 | return rank 46 | 47 | 48 | def get_world_size(): 49 | r"""Get world size. How many GPUs are available in this job.""" 50 | world_size = 1 51 | if dist.is_available(): 52 | if dist.is_initialized(): 53 | world_size = dist.get_world_size() 54 | return world_size 55 | 56 | 57 | def broadcast_object_list(message, src=0): 58 | r"""Broadcast object list from the master to the others""" 59 | # Send logdir from master to all workers. 60 | if dist.is_available(): 61 | if dist.is_initialized(): 62 | torch.distributed.broadcast_object_list(message, src=src) 63 | return message 64 | 65 | 66 | def master_only(func): 67 | r"""Apply this function only to the master GPU.""" 68 | @functools.wraps(func) 69 | def wrapper(*args, **kwargs): 70 | r"""Simple function wrapper for the master function""" 71 | if get_rank() == 0: 72 | return func(*args, **kwargs) 73 | else: 74 | return None 75 | return wrapper 76 | 77 | 78 | def is_master(): 79 | r"""check if current process is the master""" 80 | return get_rank() == 0 81 | 82 | 83 | def is_dist(): 84 | return dist.is_initialized() 85 | 86 | 87 | def barrier(): 88 | if is_dist(): 89 | dist.barrier() 90 | 91 | 92 | @contextmanager 93 | def master_first(): 94 | if not is_master(): 95 | barrier() 96 | yield 97 | if dist.is_initialized() and is_master(): 98 | barrier() 99 | 100 | 101 | def is_local_master(): 102 | return torch.cuda.current_device() == 0 103 | 104 | 105 | @master_only 106 | def master_only_print(*args): 107 | r"""master-only print""" 108 | print(*args) 109 | 110 | 111 | def dist_reduce_tensor(tensor, rank=0, reduce='mean'): 112 | r""" Reduce to rank 0 """ 113 | world_size = get_world_size() 114 | if world_size < 2: 115 | return tensor 116 | with torch.no_grad(): 117 | dist.reduce(tensor, dst=rank) 118 | if get_rank() == rank: 119 | if reduce == 'mean': 120 | tensor /= world_size 121 | elif reduce == 'sum': 122 | pass 123 | else: 124 | raise NotImplementedError 125 | return tensor 126 | 127 | 128 | def dist_all_reduce_tensor(tensor, reduce='mean'): 129 | r""" Reduce to all ranks """ 130 | world_size = get_world_size() 131 | if world_size < 2: 132 | return tensor 133 | with torch.no_grad(): 134 | dist.all_reduce(tensor) 135 | if reduce == 'mean': 136 | tensor /= world_size 137 | elif reduce == 'sum': 138 | pass 139 | else: 140 | raise NotImplementedError 141 | return tensor 142 | 143 | 144 | def dist_all_gather_tensor(tensor): 145 | r""" gather to all ranks """ 146 | world_size = get_world_size() 147 | if world_size < 2: 148 | return [tensor] 149 | tensor_list = [ 150 | torch.ones_like(tensor) for _ in range(dist.get_world_size())] 151 | with torch.no_grad(): 152 | dist.all_gather(tensor_list, tensor) 153 | return tensor_list 154 | -------------------------------------------------------------------------------- /imaginaire/utils/gpu_affinity.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import math 14 | import os 15 | # pynvml is a python bindings to the NVIDIA Management Library 16 | # https://developer.nvidia.com/nvidia-management-library-nvml 17 | # An API for monitoring and managing various states of the NVIDIA GPU devices. 18 | # It provides direct access to the queries and commands exposed via nvidia-smi. 19 | 20 | import pynvml 21 | 22 | pynvml.nvmlInit() 23 | 24 | 25 | def system_get_driver_version(): 26 | r"""Get Driver Version""" 27 | return pynvml.nvmlSystemGetDriverVersion() 28 | 29 | 30 | def device_get_count(): 31 | r"""Get number of devices""" 32 | return pynvml.nvmlDeviceGetCount() 33 | 34 | 35 | class Device(object): 36 | r"""Device used for nvml.""" 37 | _nvml_affinity_elements = math.ceil(os.cpu_count() / 64) 38 | 39 | def __init__(self, device_idx): 40 | super().__init__() 41 | self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_idx) 42 | 43 | def get_name(self): 44 | r"""Get obect name""" 45 | return pynvml.nvmlDeviceGetName(self.handle) 46 | 47 | def get_cpu_affinity(self): 48 | r"""Get CPU affinity""" 49 | affinity_string = '' 50 | for j in pynvml.nvmlDeviceGetCpuAffinity(self.handle, Device._nvml_affinity_elements): 51 | # assume nvml returns list of 64 bit ints 52 | affinity_string = '{:064b}'.format(j) + affinity_string 53 | affinity_list = [int(x) for x in affinity_string] 54 | affinity_list.reverse() # so core 0 is in 0th element of list 55 | 56 | return [i for i, e in enumerate(affinity_list) if e != 0] 57 | 58 | 59 | def set_affinity(gpu_id=None): 60 | r"""Set GPU affinity 61 | 62 | Args: 63 | gpu_id (int): Which gpu device. 64 | """ 65 | if gpu_id is None: 66 | gpu_id = int(os.getenv('LOCAL_RANK', 0)) 67 | 68 | try: 69 | dev = Device(gpu_id) 70 | # os.sched_setaffinity() method in Python is used to set the CPU affinity mask of a process indicated 71 | # by the specified process id. 72 | # A process’s CPU affinity mask determines the set of CPUs on which it is eligible to run. 73 | # Syntax: os.sched_setaffinity(pid, mask) 74 | # pid=0 means the current process 75 | os.sched_setaffinity(0, dev.get_cpu_affinity()) 76 | # list of ints 77 | # representing the logical cores this process is now affinitied with 78 | return os.sched_getaffinity(0) 79 | 80 | except pynvml.NVMLError: 81 | print("(Setting affinity with NVML failed, skipping...)") 82 | -------------------------------------------------------------------------------- /imaginaire/utils/set_random_seed.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import random 14 | import numpy as np 15 | import torch 16 | 17 | from imaginaire.utils.distributed import get_rank 18 | from imaginaire.utils.distributed import master_only_print as print 19 | 20 | 21 | def set_random_seed(seed, by_rank=False): 22 | r"""Set random seeds for everything, including random, numpy, torch.manual_seed, torch.cuda_manual_seed. 23 | torch.cuda.manual_seed_all is not necessary (included in torch.manual_seed) 24 | 25 | Args: 26 | seed (int): Random seed. 27 | by_rank (bool): if true, each gpu will use a different random seed. 28 | """ 29 | if by_rank: 30 | seed += get_rank() 31 | print(f"Using random seed {seed}") 32 | random.seed(seed) 33 | np.random.seed(seed) 34 | torch.manual_seed(seed) # sets seed on the current CPU & all GPUs 35 | torch.cuda.manual_seed(seed) # sets seed on current GPU 36 | # torch.cuda.manual_seed_all(seed) # included in torch.manual_seed 37 | -------------------------------------------------------------------------------- /imaginaire/utils/termcolor.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import pprint 14 | 15 | import termcolor 16 | 17 | 18 | def red(x): return termcolor.colored(str(x), color="red") 19 | def green(x): return termcolor.colored(str(x), color="green") 20 | def blue(x): return termcolor.colored(str(x), color="blue") 21 | def cyan(x): return termcolor.colored(str(x), color="cyan") 22 | def yellow(x): return termcolor.colored(str(x), color="yellow") 23 | def magenta(x): return termcolor.colored(str(x), color="magenta") 24 | def grey(x): return termcolor.colored(str(x), color="grey") 25 | 26 | 27 | COLORS = { 28 | 'red': red, 'green': green, 'blue': blue, 'cyan': cyan, 'yellow': yellow, 'magenta': magenta, 'grey': grey 29 | } 30 | 31 | 32 | def PP(x): 33 | string = pprint.pformat(x, indent=2) 34 | if isinstance(x, dict): 35 | string = '{\n ' + string[1:-1] + '\n}' 36 | return string 37 | 38 | 39 | def alert(x, color='red'): 40 | color = COLORS[color] 41 | print(color('-' * 32)) 42 | print(color(f'* {x}')) 43 | print(color('-' * 32)) 44 | -------------------------------------------------------------------------------- /imaginaire/utils/visualization.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import wandb 14 | import torch 15 | import torchvision 16 | 17 | from matplotlib import pyplot as plt 18 | from torchvision.transforms import functional as torchvision_F 19 | 20 | 21 | def wandb_image(images, from_range=(0, 1)): 22 | images = preprocess_image(images, from_range=from_range) 23 | image_grid = torchvision.utils.make_grid(images, nrow=1, pad_value=1) 24 | image_grid = torchvision_F.to_pil_image(image_grid) 25 | wandb_image = wandb.Image(image_grid) 26 | return wandb_image 27 | 28 | 29 | def preprocess_image(images, from_range=(0, 1), cmap="gray"): 30 | min, max = from_range 31 | images = (images - min) / (max - min) 32 | images = images.detach().cpu().float().clamp_(min=0, max=1) 33 | if images.shape[1] == 1: 34 | images = get_heatmap(images[:, 0], cmap=cmap) 35 | return images 36 | 37 | 38 | def get_heatmap(gray, cmap): # [N,H,W] 39 | color = plt.get_cmap(cmap)(gray.numpy()) 40 | color = torch.from_numpy(color[..., :3]).permute(0, 3, 1, 2).float() # [N,3,H,W] 41 | return color 42 | -------------------------------------------------------------------------------- /neuralangelo.yaml: -------------------------------------------------------------------------------- 1 | # conda env create --file neuralangelo.yaml 2 | name: neuralangelo 3 | channels: 4 | - conda-forge 5 | - pytorch 6 | dependencies: 7 | # general 8 | - gpustat 9 | - gdown 10 | - cudatoolkit-dev 11 | - cmake 12 | # python general 13 | - python=3.8 14 | - pip 15 | - numpy 16 | - scipy 17 | - ipython 18 | - jupyterlab 19 | - cython 20 | - ninja 21 | - diskcache 22 | # pytorch 23 | - pytorch 24 | - torchvision 25 | - pip: 26 | - -r requirements.txt 27 | -------------------------------------------------------------------------------- /projects/nerf/configs/ingp_blender.yaml: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | _parent_: projects/nerf/configs/nerf_blender.yaml 12 | 13 | max_iter: 500000 14 | 15 | wandb_scalar_iter: 500 16 | wandb_image_iter: 10000 17 | validation_iter: 10000 18 | 19 | model: 20 | type: projects.nerf.models.ingp 21 | mlp: 22 | layers_feat: [null,64,64] 23 | layers_rgb: [null,64,3] 24 | voxel: 25 | levels: 26 | min: 4 27 | max: 12 28 | num: 16 29 | dict_size: 19 30 | dim: 4 31 | range: [-5,5] 32 | init_scale: 1e-4 33 | sample_intvs: 256 34 | fine_sampling: False 35 | 36 | optim: 37 | type: Adam 38 | params: 39 | lr: 0.01 40 | sched: 41 | gamma: 1 42 | -------------------------------------------------------------------------------- /projects/nerf/configs/nerf_blender.yaml: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | aws_credentials_file: scripts/s3/pbss_dir.secret 12 | logging_iter: 9999999999999 # disable the printing logger 13 | 14 | max_iter: 500000 15 | 16 | wandb_scalar_iter: 100 17 | wandb_image_iter: 1000 18 | validation_iter: 2000 19 | 20 | speed_benchmark: False 21 | 22 | checkpoint: 23 | save_to_s3: False 24 | load_from_s3: False 25 | s3_credentials: scripts/s3/pbss_dir.secret 26 | s3_bucket: nerf 27 | save_iter: 10000 28 | 29 | trainer: 30 | type: projects.nerf.trainers.nerf 31 | ema_config: 32 | enabled: False 33 | load_ema_checkpoint: False 34 | loss_weight: 35 | render: 1.0 36 | render_fine: 1.0 37 | init: 38 | type: xavier 39 | amp_config: 40 | enabled: True 41 | 42 | model: 43 | type: projects.nerf.models.nerf 44 | mlp: 45 | layers_feat: [null,256,256,256,256,256,256,256,256] 46 | layers_rgb: [null,128,3] 47 | skip: [4] 48 | posenc: 49 | L_3D: 10 50 | L_view: 4 51 | density_activ: softplus 52 | view_dep: True 53 | dist: 54 | param: metric 55 | range: [2,6] 56 | sample_intvs: 64 57 | sample_stratified: True 58 | fine_sampling: True 59 | sample_intvs_fine: 128 60 | rand_rays: 1024 61 | density_noise_reg: 62 | opaque_background: False 63 | camera_ndc: False 64 | 65 | optim: 66 | type: Adam 67 | params: 68 | lr: 0.0005 69 | betas: [0.9, 0.999] 70 | sched: 71 | iteration_mode: False 72 | type: step 73 | step_size: 20 74 | gamma: 0.97724 75 | 76 | data: 77 | type: projects.nerf.datasets.nerf_blender 78 | use_multi_epoch_loader: True 79 | num_workers: 4 80 | root: datasets/nerf-synthetic/lego 81 | image_size: [400,400] 82 | preload: True 83 | bgcolor: 1 84 | train: 85 | batch_size: 2 86 | subset: 87 | val: 88 | batch_size: 2 89 | subset: 4 90 | max_viz_samples: 16 91 | 92 | test_data: 93 | type: projects.nerf.datasets.nerf_blender 94 | num_workers: 4 95 | root: datasets/nerf-synthetic/lego 96 | image_size: [400,400] 97 | preload: True 98 | bgcolor: 1 99 | test: 100 | batch_size: 2 101 | subset: 102 | -------------------------------------------------------------------------------- /projects/nerf/configs/nerf_llff.yaml: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | _parent_: projects/nerf/configs/nerf_blender.yaml 12 | 13 | max_iter: 200000 14 | 15 | wandb_scalar_iter: 50 16 | wandb_image_iter: 500 17 | validation_iter: 1000 18 | 19 | model: 20 | type: projects.nerf.models.nerf 21 | dist: 22 | param: ndc 23 | range: [0,1] 24 | sample_intvs: 64 25 | fine_sampling: True 26 | sample_intvs_fine: 128 27 | rand_rays: 1024 28 | camera_ndc: True 29 | 30 | optim: 31 | type: Adam 32 | params: 33 | lr: 0.0005 34 | betas: [0.9, 0.999] 35 | sched: 36 | iteration_mode: False 37 | type: step 38 | step_size: 100 39 | gamma: 0.97724 40 | 41 | data: 42 | type: projects.nerf.datasets.nerf_llff 43 | use_multi_epoch_loader: True 44 | num_workers: 4 45 | root: datasets/nerf-llff/fern 46 | image_size: [480,640] 47 | preload: True 48 | val_ratio: 0.1 49 | train: 50 | batch_size: 2 51 | subset: 52 | val: 53 | batch_size: 2 54 | subset: 4 55 | 56 | test_data: 57 | type: projects.nerf.datasets.nerf_llff 58 | num_workers: 4 59 | root: datasets/nerf-llff/fern 60 | image_size: [480,640] 61 | preload: True 62 | test: 63 | batch_size: 2 64 | subset: 65 | -------------------------------------------------------------------------------- /projects/nerf/datasets/base.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | import tqdm 15 | import threading 16 | import queue 17 | 18 | 19 | class Dataset(torch.utils.data.Dataset): 20 | 21 | def __init__(self, cfg, is_inference=False, is_test=False): 22 | super().__init__() 23 | self.split = "test" if is_test else "val" if is_inference else "train" 24 | 25 | def _preload_worker(self, data_list, load_func, q, lock, idx_tqdm): 26 | # Keep preloading data in parallel. 27 | while True: 28 | idx = q.get() 29 | data_list[idx] = load_func(idx) 30 | with lock: 31 | idx_tqdm.update() 32 | q.task_done() 33 | 34 | def preload_threading(self, load_func, num_workers, data_str="images"): 35 | # Use threading to preload data in parallel. 36 | data_list = [None] * len(self) 37 | q = queue.Queue(maxsize=len(self)) 38 | idx_tqdm = tqdm.tqdm(range(len(self)), desc=f"preloading {data_str} ({self.split})", leave=False) 39 | for i in range(len(self)): 40 | q.put(i) 41 | lock = threading.Lock() 42 | for ti in range(num_workers): 43 | t = threading.Thread(target=self._preload_worker, 44 | args=(data_list, load_func, q, lock, idx_tqdm), daemon=True) 45 | t.start() 46 | q.join() 47 | idx_tqdm.close() 48 | assert all(map(lambda x: x is not None, data_list)) 49 | return data_list 50 | 51 | def __getitem__(self, idx): 52 | raise NotImplementedError 53 | 54 | def __len__(self): 55 | return len(self.list) 56 | -------------------------------------------------------------------------------- /projects/nerf/datasets/nerf_blender.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import json 14 | import numpy as np 15 | import torch 16 | import torchvision.transforms.functional as torchvision_F 17 | from PIL import Image, ImageFile 18 | 19 | from projects.nerf.datasets import base 20 | from projects.nerf.utils import camera 21 | 22 | ImageFile.LOAD_TRUNCATED_IMAGES = True 23 | 24 | 25 | class Dataset(base.Dataset): 26 | 27 | def __init__(self, cfg, is_inference=False, is_test=False): 28 | super().__init__(cfg, is_inference=is_inference, is_test=is_test) 29 | cfg_data = cfg.test_data if self.split == "test" else cfg.data 30 | data_info = cfg_data[self.split] 31 | self.root = cfg_data.root 32 | self.preload = cfg_data.preload 33 | self.bgcolor = cfg_data.bgcolor 34 | self.raw_H, self.raw_W = 800, 800 35 | self.H, self.W = cfg_data.image_size 36 | meta_fname = f"{cfg_data.root}/transforms_{self.split}.json" 37 | with open(meta_fname) as file: 38 | self.meta = json.load(file) 39 | self.focal = 0.5 * self.raw_W / np.tan(0.5 * self.meta["camera_angle_x"]) 40 | self.list = self.meta["frames"] 41 | # Consider only a subset of data. 42 | if data_info.subset: 43 | self.list = self.list[:data_info.subset] 44 | # Preload dataset if possible. 45 | if cfg_data.preload: 46 | self.images = self.preload_threading(self.get_image, cfg_data.num_workers) 47 | self.cameras = self.preload_threading(self.get_camera, cfg_data.num_workers, data_str="cameras") 48 | 49 | def __getitem__(self, idx): 50 | """Process raw data and return processed data in a dictionary. 51 | 52 | Args: 53 | idx: The index of the sample of the dataset. 54 | Returns: A dictionary containing the data. 55 | idx (scalar): The index of the sample of the dataset. 56 | image (3xHxW tensor): Image with pixel values in [0,1] for supervision. 57 | intr (3x3 tensor): The camera intrinsics of `image`. 58 | pose (3x4 tensor): The camera extrinsics [R,t] of `image`. 59 | """ 60 | # Keep track of sample index for convenience. 61 | sample = dict(idx=idx) 62 | # Get the images. 63 | image = self.images[idx] if self.preload else self.get_image(idx) 64 | image = self.preprocess_image(image) 65 | # Get the cameras (intrinsics and pose). 66 | intr, pose = self.cameras[idx] if self.preload else self.get_camera(idx) 67 | intr, pose = self.preprocess_camera(intr, pose) 68 | # Update the data sample. 69 | sample.update( 70 | image=image, 71 | intr=intr, 72 | pose=pose, 73 | ) 74 | return sample 75 | 76 | def get_image(self, idx): 77 | fpath = self.list[idx]["file_path"][2:] 78 | image_fname = f"{self.root}/{fpath}.png" 79 | image = Image.open(image_fname) 80 | image.load() 81 | return image 82 | 83 | def preprocess_image(self, image): 84 | # Resize the image. 85 | image = image.resize((self.W, self.H)) 86 | image = torchvision_F.to_tensor(image) 87 | # Background masking. 88 | rgb, mask = image[:3], image[3:] 89 | if self.bgcolor is not None: 90 | rgb = rgb * mask + self.bgcolor * (1 - mask) 91 | return rgb 92 | 93 | def get_camera(self, idx): 94 | # Camera intrinsics. 95 | intr = torch.tensor([[self.focal, 0, self.raw_W / 2], 96 | [0, self.focal, self.raw_H / 2], 97 | [0, 0, 1]]).float() 98 | # Camera pose. 99 | pose_raw = torch.tensor(self.list[idx]["transform_matrix"], dtype=torch.float32) 100 | pose = self.parse_raw_camera(pose_raw) 101 | return intr, pose 102 | 103 | def preprocess_camera(self, intr, pose): 104 | # Adjust the intrinsics according to the resized image. 105 | intr = intr.clone() 106 | intr[0] *= self.W / self.raw_W 107 | intr[1] *= self.H / self.raw_H 108 | return intr, pose 109 | 110 | def parse_raw_camera(self, pose_raw): 111 | pose_flip = camera.pose(R=torch.diag(torch.tensor([1, -1, -1]))) 112 | pose = camera.pose.compose([pose_flip, pose_raw[:3]]) 113 | pose = camera.pose.invert(pose) 114 | return pose 115 | -------------------------------------------------------------------------------- /projects/nerf/datasets/nerf_llff.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import numpy as np 14 | import torch 15 | import torch.nn.functional as torch_F 16 | import torchvision.transforms.functional as torchvision_F 17 | from PIL import Image, ImageFile 18 | 19 | from projects.nerf.datasets import base 20 | from projects.nerf.utils import camera 21 | 22 | ImageFile.LOAD_TRUNCATED_IMAGES = True 23 | 24 | 25 | class Dataset(base.Dataset): 26 | 27 | def __init__(self, cfg, is_inference=False, is_test=False): 28 | super().__init__(cfg, is_inference=is_inference, is_test=is_test) 29 | cfg_data = cfg.test_data if self.split == "test" else cfg.data 30 | data_info = cfg_data[self.split] 31 | self.root = cfg_data.root 32 | self.preload = cfg_data.preload 33 | self.raw_H, self.raw_W = 3024, 4032 34 | self.H, self.W = cfg_data.image_size 35 | list_fname = f"{cfg_data.root}/images.list" 36 | image_fnames = open(list_fname).read().splitlines() 37 | poses_raw, bounds = self.parse_cameras_and_bounds(cfg_data) 38 | self.list = list(zip(image_fnames, poses_raw, bounds)) 39 | # Manually split train/val subsets. 40 | num_val_split = int(len(self) * cfg_data.val_ratio) 41 | self.list = self.list[:-num_val_split] if self.split == "train" else self.list[-num_val_split:] 42 | # Consider only a subset of data. 43 | if data_info.subset: 44 | self.list = self.list[:data_info.subset] 45 | # Preload dataset if possible. 46 | if cfg_data.preload: 47 | self.images = self.preload_threading(self.get_image, cfg_data.num_workers) 48 | self.cameras = self.preload_threading(self.get_camera, cfg_data.num_workers, data_str="cameras") 49 | 50 | def parse_cameras_and_bounds(self, cfg_data): 51 | fname = f"{cfg_data.root}/poses_bounds.npy" 52 | data = torch.tensor(np.load(fname), dtype=torch.float32) 53 | # Parse cameras (intrinsics and poses). 54 | cam_data = data[:, :-2].view([-1, 3, 5]) # [N,3,5] 55 | poses_raw = cam_data[..., :4] # [N,3,4] 56 | poses_raw[..., 0], poses_raw[..., 1] = poses_raw[..., 1], -poses_raw[..., 0] 57 | raw_H, raw_W, self.focal = cam_data[0, :, -1] 58 | assert self.raw_H == raw_H and self.raw_W == raw_W 59 | # Parse depth bounds. 60 | bounds = data[:, -2:] # [N,2] 61 | scale = 1. / (bounds.min() * 0.75) # Not sure how this was determined? 62 | poses_raw[..., 3] *= scale 63 | bounds *= scale 64 | # Roughly center camera poses. 65 | poses_raw = self.center_camera_poses(poses_raw) 66 | return poses_raw, bounds 67 | 68 | def center_camera_poses(self, poses): 69 | # Compute average pose. 70 | center = poses[..., 3].mean(dim=0) 71 | v1 = torch_F.normalize(poses[..., 1].mean(dim=0), dim=0) 72 | v2 = torch_F.normalize(poses[..., 2].mean(dim=0), dim=0) 73 | v0 = v1.cross(v2) 74 | pose_avg = torch.stack([v0, v1, v2, center], dim=-1)[None] # [1,3,4] 75 | # Apply inverse of averaged pose. 76 | poses = camera.pose.compose([poses, camera.pose.invert(pose_avg)]) 77 | return poses 78 | 79 | def __getitem__(self, idx): 80 | """Process raw data and return processed data in a dictionary. 81 | 82 | Args: 83 | idx: The index of the sample of the dataset. 84 | Returns: A dictionary containing the data. 85 | idx (scalar): The index of the sample of the dataset. 86 | image (3xHxW tensor): Image with pixel values in [0,1] for supervision. 87 | intr (3x3 tensor): The camera intrinsics of `image`. 88 | pose (3x4 tensor): The camera extrinsics [R,t] of `image`. 89 | """ 90 | # Keep track of sample index for convenience. 91 | sample = dict(idx=idx) 92 | # Get the images. 93 | image = self.images[idx] if self.preload else self.get_image(idx) 94 | image = self.preprocess_image(image) 95 | # Get the cameras (intrinsics and pose). 96 | intr, pose = self.cameras[idx] if self.preload else self.get_camera(idx) 97 | intr, pose = self.preprocess_camera(intr, pose) 98 | # Update the data sample. 99 | sample.update( 100 | image=image, 101 | intr=intr, 102 | pose=pose, 103 | ) 104 | return sample 105 | 106 | def get_image(self, idx): 107 | image_fname = f"{self.root}/images/{self.list[idx][0]}" 108 | image = Image.open(image_fname) 109 | image.load() 110 | return image 111 | 112 | def preprocess_image(self, image): 113 | # Resize the image and convert to Pytorch. 114 | image = image.resize((self.W, self.H)) 115 | image = torchvision_F.to_tensor(image) 116 | return image 117 | 118 | def get_camera(self, idx): 119 | # Camera intrinsics. 120 | intr = torch.tensor([[self.focal, 0, self.raw_W / 2], 121 | [0, self.focal, self.raw_H / 2], 122 | [0, 0, 1]]).float() 123 | # Camera pose. 124 | pose_raw = self.list[idx][1] 125 | pose = self.parse_raw_camera(pose_raw) 126 | return intr, pose 127 | 128 | def preprocess_camera(self, intr, pose): 129 | # Adjust the intrinsics according to the resized image. 130 | intr = intr.clone() 131 | intr[0] *= self.W / self.raw_W 132 | intr[1] *= self.H / self.raw_H 133 | return intr, pose 134 | 135 | def parse_raw_camera(self, pose_raw): 136 | pose_flip = camera.pose(R=torch.diag(torch.tensor([1, -1, -1]))) 137 | pose = camera.pose.compose([pose_flip, pose_raw[:3]]) 138 | pose = camera.pose.invert(pose) 139 | pose = camera.pose.compose([pose_flip, pose]) 140 | return pose 141 | -------------------------------------------------------------------------------- /projects/nerf/models/ingp.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import numpy as np 14 | import torch 15 | import tinycudann as tcnn 16 | 17 | from projects.nerf.models import nerf 18 | 19 | 20 | class Model(nerf.Model): 21 | 22 | def __init__(self, cfg_model, cfg_data): 23 | super().__init__(cfg_model, cfg_data) 24 | self.fine_sampling = False 25 | self.density_reg = cfg_model.density_noise_reg 26 | # Define models. 27 | self.nerf = InstantNGP(cfg_model) 28 | 29 | 30 | class InstantNGP(nerf.NeRF): 31 | 32 | def __init__(self, cfg_model): 33 | self.voxel = cfg_model.voxel 34 | super().__init__(cfg_model) 35 | 36 | def set_input_dims(self, cfg_model): 37 | # Define the input encoding dimensions. 38 | self.input_3D_dim = 3 + cfg_model.voxel.dim * cfg_model.voxel.levels.num 39 | self.input_view_dim = 3 if cfg_model.view_dep else None 40 | 41 | def build_model(self, cfg_model): 42 | super().build_model(cfg_model) 43 | # Build the tcnn hash grid. 44 | l_min, l_max = self.voxel.levels.min, self.voxel.levels.max 45 | r_min, r_max = 2 ** l_min, 2 ** l_max 46 | num_levels = self.voxel.levels.num 47 | growth_rate = np.exp((np.log(r_max) - np.log(r_min)) / (num_levels - 1)) 48 | config = dict( 49 | otype="HashGrid", 50 | n_levels=cfg_model.voxel.levels.num, 51 | n_features_per_level=cfg_model.voxel.dim, 52 | log2_hashmap_size=cfg_model.voxel.dict_size, 53 | base_resolution=2 ** cfg_model.voxel.levels.min, 54 | per_level_scale=growth_rate, 55 | ) 56 | self.tiny_cuda_encoding = tcnn.Encoding(3, config) 57 | # Compute resolutions of all levels. 58 | self.resolutions = [] 59 | for lv in range(0, num_levels): 60 | size = np.floor(r_min * growth_rate ** lv).astype(int) + 1 61 | self.resolutions.append(size) 62 | 63 | def forward(self, points_3D, ray_unit, density_reg=None): 64 | return super().forward(points_3D, ray_unit, density_reg) 65 | 66 | def _encode_3D(self, points_3D): 67 | # Tri-linear interpolate the corresponding embeddings from the dictionary. 68 | vol_min, vol_max = self.voxel.range 69 | points_3D_normalized = (points_3D - vol_min) / (vol_max - vol_min) # Normalize to [0,1]. 70 | tcnn_input = points_3D_normalized.view(-1, 3) 71 | tcnn_output = self.tiny_cuda_encoding(tcnn_input) 72 | points_enc = tcnn_output.view(*points_3D_normalized.shape[:-1], tcnn_output.shape[-1]) 73 | points_enc = torch.cat([points_enc, points_3D], dim=-1) # [B,R,N,LD+3] 74 | return points_enc 75 | 76 | def _encode_view(self, ray_unit): 77 | return ray_unit 78 | -------------------------------------------------------------------------------- /projects/nerf/trainers/base.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | import wandb 15 | from imaginaire.trainers.base import BaseTrainer 16 | from imaginaire.utils.distributed import is_master, master_only 17 | from tqdm import tqdm 18 | 19 | from projects.nerf.utils.misc import collate_test_data_batches, get_unique_test_data, trim_test_samples 20 | 21 | 22 | class BaseTrainer(BaseTrainer): 23 | """ 24 | A customized BaseTrainer. 25 | """ 26 | 27 | def __init__(self, cfg, is_inference=True, seed=0): 28 | super().__init__(cfg, is_inference=is_inference, seed=seed) 29 | self.metrics = dict() 30 | # The below configs should be properly overridden. 31 | cfg.setdefault("wandb_scalar_iter", 9999999999999) 32 | cfg.setdefault("wandb_image_iter", 9999999999999) 33 | cfg.setdefault("validation_epoch", 9999999999999) 34 | cfg.setdefault("validation_iter", 9999999999999) 35 | 36 | def init_losses(self, cfg): 37 | super().init_losses(cfg) 38 | self.weights = {key: value for key, value in cfg.trainer.loss_weight.items() if value} 39 | 40 | def _end_of_iteration(self, data, current_epoch, current_iteration): 41 | # Log to wandb. 42 | if current_iteration % self.cfg.wandb_scalar_iter == 0: 43 | # Compute the elapsed time (as in the original base trainer). 44 | self.timer.time_iteration = self.elapsed_iteration_time / self.cfg.wandb_scalar_iter 45 | self.elapsed_iteration_time = 0 46 | # Log scalars. 47 | self.log_wandb_scalars(data, mode="train") 48 | # Exit if the training loss has gone to NaN/inf. 49 | if is_master() and self.losses["total"].isnan(): 50 | self.finalize(self.cfg) 51 | raise ValueError("Training loss has gone to NaN!!!") 52 | if is_master() and self.losses["total"].isinf(): 53 | self.finalize(self.cfg) 54 | raise ValueError("Training loss has gone to infinity!!!") 55 | if current_iteration % self.cfg.wandb_image_iter == 0: 56 | self.log_wandb_images(data, mode="train") 57 | # Run validation on val set. 58 | if current_iteration % self.cfg.validation_iter == 0: 59 | data_all = self.test(self.eval_data_loader, mode="val") 60 | # Log the results to W&B. 61 | if is_master(): 62 | self.log_wandb_scalars(data_all, mode="val") 63 | self.log_wandb_images(data_all, mode="val", max_samples=self.cfg.data.val.max_viz_samples) 64 | 65 | def _end_of_epoch(self, data, current_epoch, current_iteration): 66 | # Run validation on val set. 67 | if current_epoch % self.cfg.validation_epoch == 0: 68 | data_all = self.test(self.eval_data_loader, mode="val") 69 | # Log the results to W&B. 70 | if is_master(): 71 | self.log_wandb_scalars(data_all, mode="val") 72 | self.log_wandb_images(data_all, mode="val", max_samples=self.cfg.data.val.max_viz_samples) 73 | 74 | @master_only 75 | def log_wandb_scalars(self, data, mode=None): 76 | scalars = dict() 77 | # Log scalars (basic info & losses). 78 | if mode == "train": 79 | scalars.update({"optim/lr": self.sched.get_last_lr()[0]}) 80 | scalars.update({"time/iteration": self.timer.time_iteration}) 81 | scalars.update({"time/epoch": self.timer.time_epoch}) 82 | scalars.update({f"{mode}/loss/{key}": value for key, value in self.losses.items()}) 83 | scalars.update(iteration=self.current_iteration, epoch=self.current_epoch) 84 | wandb.log(scalars, step=self.current_iteration) 85 | 86 | @master_only 87 | def log_wandb_images(self, data, mode=None, max_samples=None): 88 | trim_test_samples(data, max_samples=max_samples) 89 | 90 | def model_forward(self, data): 91 | # Model forward. 92 | output = self.model(data) # data = self.model(data) will not return the same data in the case of DDP. 93 | data.update(output) 94 | # Compute loss. 95 | self.timer._time_before_loss() 96 | self._compute_loss(data, mode="train") 97 | total_loss = self._get_total_loss() 98 | return total_loss 99 | 100 | def _compute_loss(self, data, mode=None): 101 | raise NotImplementedError 102 | 103 | def train(self, cfg, data_loader, single_gpu=False, profile=False, show_pbar=False): 104 | self.current_epoch = self.checkpointer.resume_epoch or self.current_epoch 105 | self.current_iteration = self.checkpointer.resume_iteration or self.current_iteration 106 | if ((self.current_epoch % self.cfg.validation_epoch == 0 or 107 | self.current_iteration % self.cfg.validation_iter == 0)): 108 | # Do an initial validation. 109 | data_all = self.test(self.eval_data_loader, mode="val", show_pbar=show_pbar) 110 | # Log the results to W&B. 111 | if is_master(): 112 | self.log_wandb_scalars(data_all, mode="val") 113 | self.log_wandb_images(data_all, mode="val", max_samples=self.cfg.data.val.max_viz_samples) 114 | # Train. 115 | super().train(cfg, data_loader, single_gpu, profile, show_pbar) 116 | 117 | @torch.no_grad() 118 | def test(self, data_loader, output_dir=None, inference_args=None, mode="test", show_pbar=False): 119 | """The evaluation/inference engine. 120 | Args: 121 | data_loader: The data loader. 122 | output_dir: Output directory to dump the test results. 123 | inference_args: (unused) 124 | mode: Evaluation mode {"val", "test"}. Can be other modes, but will only gather the data. 125 | Returns: 126 | data_all: A dictionary of all the data. 127 | """ 128 | if self.cfg.trainer.ema_config.enabled: 129 | model = self.model.module.averaged_model 130 | else: 131 | model = self.model.module 132 | model.eval() 133 | if show_pbar: 134 | data_loader = tqdm(data_loader, desc="Evaluating", leave=False) 135 | data_batches = [] 136 | for it, data in enumerate(data_loader): 137 | data = self.start_of_iteration(data, current_iteration=self.current_iteration) 138 | output = model.inference(data) 139 | data.update(output) 140 | data_batches.append(data) 141 | # Aggregate the data from all devices and process the results. 142 | data_gather = collate_test_data_batches(data_batches) 143 | # Only the master process should process the results; slaves will just return. 144 | if is_master(): 145 | data_all = get_unique_test_data(data_gather, data_gather["idx"]) 146 | tqdm.write(f"Evaluating with {len(data_all['idx'])} samples.") 147 | # Validate/test. 148 | if mode == "val": 149 | self._compute_loss(data_all, mode=mode) 150 | _ = self._get_total_loss() 151 | if mode == "test": 152 | # Dump the test results for postprocessing. 153 | self.dump_test_results(data_all, output_dir) 154 | return data_all 155 | else: 156 | return 157 | 158 | def dump_test_results(self, data_all, output_dir): 159 | raise NotImplementedError 160 | -------------------------------------------------------------------------------- /projects/nerf/trainers/nerf.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | import torch.nn.functional as torch_F 15 | import wandb 16 | import skvideo.io 17 | 18 | from imaginaire.utils.distributed import master_only 19 | from projects.nerf.trainers.base import BaseTrainer 20 | from imaginaire.utils.visualization import wandb_image, preprocess_image 21 | 22 | 23 | class Trainer(BaseTrainer): 24 | 25 | def __init__(self, cfg, is_inference=True, seed=0): 26 | super().__init__(cfg, is_inference=is_inference, seed=seed) 27 | self.batch_idx, _ = torch.meshgrid(torch.arange(cfg.data.train.batch_size), 28 | torch.arange(cfg.model.rand_rays), indexing="ij") # [B,R] 29 | self.batch_idx = self.batch_idx.cuda() 30 | 31 | def _init_loss(self, cfg): 32 | self.criteria["render"] = self.criteria["render_fine"] = torch.nn.MSELoss() 33 | 34 | def _compute_loss(self, data, mode=None): 35 | if mode == "train": 36 | # Extract the corresponding sampled rays. 37 | batch_size = len(data["image"]) 38 | image_vec = data["image"].permute(0, 2, 3, 1).view(batch_size, -1, 3) # [B,HW,3] 39 | image_sampled = image_vec[self.batch_idx, data["ray_idx"]] # [B,R,3] 40 | # Compute loss only on randomly sampled rays. 41 | self.losses["render"] = self.criteria["render"](data["rgb"], image_sampled) 42 | self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb"], image_sampled).log10() 43 | if self.cfg.model.fine_sampling: 44 | self.losses["render_fine"] = self.criteria["render_fine"](data["rgb_fine"], image_sampled) 45 | self.metrics["psnr_fine"] = -10 * torch_F.mse_loss(data["rgb_fine"], image_sampled).log10() 46 | else: 47 | # Compute loss on the entire image. 48 | self.losses["render"] = self.criteria["render"](data["rgb_map"], data["image"]) 49 | self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb_map"], data["image"]).log10() 50 | if self.cfg.model.fine_sampling: 51 | self.losses["render_fine"] = self.criteria["render_fine"](data["rgb_map_fine"], data["image"]) 52 | self.metrics["psnr_fine"] = -10 * torch_F.mse_loss(data["rgb_map_fine"], data["image"]).log10() 53 | 54 | @master_only 55 | def log_wandb_scalars(self, data, mode=None): 56 | super().log_wandb_scalars(data, mode=mode) 57 | scalars = {f"{mode}/PSNR/nerf": self.metrics["psnr"].detach()} 58 | if "render_fine" in self.losses: 59 | scalars.update({f"{mode}/PSNR/nerf_fine": self.metrics["psnr_fine"].detach()}) 60 | wandb.log(scalars, step=self.current_iteration) 61 | 62 | @master_only 63 | def log_wandb_images(self, data, mode=None, max_samples=None): 64 | super().log_wandb_images(data, mode=mode, max_samples=max_samples) 65 | images = {f"{mode}/image_target": wandb_image(data["image"])} 66 | if mode == "val": 67 | images_error = (data["rgb_map"] - data["image"]).abs() 68 | images.update({ 69 | f"{mode}/images": wandb_image(data["rgb_map"]), 70 | f"{mode}/images_error": wandb_image(images_error), 71 | f"{mode}/inv_depth": wandb_image(data["inv_depth_map"]), 72 | }) 73 | if self.cfg.model.fine_sampling: 74 | images_error_fine = (data["rgb_map_fine"] - data["image"]).abs() 75 | images.update({ 76 | f"{mode}/images_fine": wandb_image(data["rgb_map_fine"]), 77 | f"{mode}/images_error_fine": wandb_image(images_error_fine), 78 | f"{mode}/inv_depth_fine": wandb_image(data["inv_depth_map_fine"]), 79 | }) 80 | images.update({"iteration": self.current_iteration}) 81 | images.update({"epoch": self.current_epoch}) 82 | wandb.log(images, step=self.current_iteration) 83 | 84 | def dump_test_results(self, data_all, output_dir): 85 | results = dict( 86 | images_target=preprocess_image(data_all["images_target"]), 87 | image=preprocess_image(data_all["rgb_map"]), 88 | inv_depth=preprocess_image(data_all["inv_depth_map"]), 89 | ) 90 | if self.cfg.model.fine_sampling: 91 | results.update( 92 | image_fine=preprocess_image(data_all["rgb_map_fine"]), 93 | inv_depth_fine=preprocess_image(data_all["inv_depth_map_fine"]), 94 | ) 95 | # Write results as videos. 96 | inputdict, outputdict = self._get_ffmpeg_dicts() 97 | for key, image_list in results.items(): 98 | print(f"writing video ({key})...") 99 | video_fname = f"{output_dir}/{key}.mp4" 100 | video_writer = skvideo.io.FFmpegWriter(video_fname, inputdict=inputdict, outputdict=outputdict) 101 | for image in image_list: 102 | image = (image * 255).byte().permute(1, 2, 0).numpy() 103 | video_writer.writeFrame(image) 104 | video_writer.close() 105 | 106 | def _get_ffmpeg_dicts(self): 107 | inputdict = {"-r": str(30)} 108 | outputdict = {"-crf": str(10), "-pix_fmt": "yuv420p"} 109 | return inputdict, outputdict 110 | -------------------------------------------------------------------------------- /projects/nerf/utils/misc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | 15 | from imaginaire.utils.distributed import dist_all_gather_tensor 16 | 17 | 18 | def collate_test_data_batches(data_batches): 19 | """Aggregate the list of test data from all devices and process the results. 20 | Args: 21 | data_batches (list): List of (hierarchical) dictionaries, where leaf entries are tensors. 22 | Returns: 23 | data_gather (dict): (hierarchical) dictionaries, where leaf entries are concatenated tensors. 24 | """ 25 | data_gather = dict() 26 | for key in data_batches[0].keys(): 27 | data_list = [data[key] for data in data_batches] 28 | if isinstance(data_batches[0][key], dict): 29 | data_gather[key] = collate_test_data_batches(data_list) 30 | elif isinstance(data_batches[0][key], torch.Tensor): 31 | data_gather[key] = torch.cat(data_list, dim=0) 32 | data_gather[key] = torch.cat(dist_all_gather_tensor(data_gather[key].contiguous()), dim=0) 33 | else: 34 | raise TypeError 35 | return data_gather 36 | 37 | 38 | def get_unique_test_data(data_gather, idx): 39 | """Aggregate the list of test data from all devices and process the results. 40 | Args: 41 | data_gather (dict): (hierarchical) dictionaries, where leaf entries are tensors. 42 | idx (tensor): sample indices. 43 | Returns: 44 | data_all (dict): (hierarchical) dictionaries, where leaf entries are tensors ordered by idx. 45 | """ 46 | data_all = dict() 47 | for key, value in data_gather.items(): 48 | if isinstance(value, dict): 49 | data_all[key] = get_unique_test_data(value, idx) 50 | elif isinstance(value, torch.Tensor): 51 | data_all[key] = [] 52 | for i in range(max(idx) + 1): 53 | # If multiple occurrences of the same idx, just choose the first one. If no occurrence, just ignore. 54 | matches = (idx == i).nonzero(as_tuple=True)[0] 55 | if matches.numel() != 0: 56 | data_all[key].append(value[matches[0]]) 57 | data_all[key] = torch.stack(data_all[key], dim=0) 58 | else: 59 | raise TypeError 60 | return data_all 61 | 62 | 63 | def trim_test_samples(data, max_samples=None): 64 | for key, value in data.items(): 65 | if isinstance(value, dict): 66 | data[key] = trim_test_samples(value, max_samples=max_samples) 67 | elif isinstance(value, torch.Tensor): 68 | if max_samples is not None: 69 | data[key] = value[:max_samples] 70 | else: 71 | raise TypeError 72 | -------------------------------------------------------------------------------- /projects/nerf/utils/render.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | from torch.cuda.amp import autocast 15 | 16 | 17 | def volume_rendering_weights(ray, densities, depths, depth_far=None): 18 | """The volume rendering function. Details can be found in the NeRF paper. 19 | Args: 20 | ray (tensor [batch,ray,3]): The ray directions in world space. 21 | densities (tensor [batch,ray,samples]): The predicted volume density samples. 22 | depths (tensor [batch,ray,samples,1]): The corresponding depth samples. 23 | depth_far (tensor [batch,ray,1,1]): The farthest depth for computing the last interval. 24 | Returns: 25 | weights (tensor [batch,ray,samples,1]): The predicted weight of each sampled point along the ray (in [0,1]). 26 | """ 27 | ray_length = ray.norm(dim=-1, keepdim=True) # [B,R,1] 28 | if depth_far is None: 29 | depth_far = torch.empty_like(depths[..., :1, :]).fill_(1e10) # [B,R,1,1] 30 | depths_aug = torch.cat([depths, depth_far], dim=2) # [B,R,N+1,1] 31 | dists = depths_aug * ray_length[..., None] # [B,R,N+1,1] 32 | # Volume rendering: compute rendering weights (using quadrature). 33 | dist_intvs = dists[..., 1:, 0] - dists[..., :-1, 0] # [B,R,N] 34 | sigma_delta = densities * dist_intvs # [B,R,N] 35 | sigma_delta_0 = torch.cat([torch.zeros_like(sigma_delta[..., :1]), 36 | sigma_delta[..., :-1]], dim=2) # [B,R,N] 37 | T = (-sigma_delta_0.cumsum(dim=2)).exp_() # [B,R,N] 38 | alphas = 1 - (-sigma_delta).exp_() # [B,R,N] 39 | # Compute weights for compositing samples. 40 | weights = (T * alphas)[..., None] # [B,R,N,1] 41 | return weights 42 | 43 | 44 | def volume_rendering_weights_dist(densities, dists, dist_far=None): 45 | """The volume rendering function. Details can be found in the NeRF paper. 46 | Args: 47 | densities (tensor [batch,ray,samples]): The predicted volume density samples. 48 | dists (tensor [batch,ray,samples,1]): The corresponding distance samples. 49 | dist_far (tensor [batch,ray,1,1]): The farthest distance for computing the last interval. 50 | Returns: 51 | weights (tensor [batch,ray,samples,1]): The predicted weight of each sampled point along the ray (in [0,1]). 52 | """ 53 | # TODO: re-consolidate!! 54 | if dist_far is None: 55 | dist_far = torch.empty_like(dists[..., :1, :]).fill_(1e10) # [B,R,1,1] 56 | dists = torch.cat([dists, dist_far], dim=2) # [B,R,N+1,1] 57 | # Volume rendering: compute rendering weights (using quadrature). 58 | dist_intvs = dists[..., 1:, 0] - dists[..., :-1, 0] # [B,R,N] 59 | sigma_delta = densities * dist_intvs # [B,R,N] 60 | sigma_delta_0 = torch.cat([torch.zeros_like(sigma_delta[..., :1]), sigma_delta[..., :-1]], dim=2) # [B,R,N] 61 | T = (-sigma_delta_0.cumsum(dim=2)).exp_() # [B,R,N] 62 | alphas = 1 - (-sigma_delta).exp_() # [B,R,N] 63 | # Compute weights for compositing samples. 64 | weights = (T * alphas)[..., None] # [B,R,N,1] 65 | return weights 66 | 67 | 68 | def volume_rendering_alphas_dist(densities, dists, dist_far=None): 69 | """The volume rendering function. Details can be found in the NeRF paper. 70 | Args: 71 | densities (tensor [batch,ray,samples]): The predicted volume density samples. 72 | dists (tensor [batch,ray,samples,1]): The corresponding distance samples. 73 | dist_far (tensor [batch,ray,1,1]): The farthest distance for computing the last interval. 74 | Returns: 75 | alphas (tensor [batch,ray,samples,1]): The occupancy of each sampled point along the ray (in [0,1]). 76 | """ 77 | if dist_far is None: 78 | dist_far = torch.empty_like(dists[..., :1, :]).fill_(1e10) # [B,R,1,1] 79 | dists = torch.cat([dists, dist_far], dim=2) # [B,R,N+1,1] 80 | # Volume rendering: compute rendering weights (using quadrature). 81 | dist_intvs = dists[..., 1:, 0] - dists[..., :-1, 0] # [B,R,N] 82 | sigma_delta = densities * dist_intvs # [B,R,N] 83 | alphas = 1 - (-sigma_delta).exp_() # [B,R,N] 84 | return alphas 85 | 86 | 87 | def alpha_compositing_weights(alphas): 88 | """Alpha compositing of (sampled) MPIs given their RGBs and alphas. 89 | Args: 90 | alphas (tensor [batch,ray,samples]): The predicted opacity values. 91 | Returns: 92 | weights (tensor [batch,ray,samples,1]): The predicted weight of each MPI (in [0,1]). 93 | """ 94 | alphas_front = torch.cat([torch.zeros_like(alphas[..., :1]), 95 | alphas[..., :-1]], dim=2) # [B,R,N] 96 | with autocast(enabled=False): # TODO: may be unstable in some cases. 97 | visibility = (1 - alphas_front).cumprod(dim=2) # [B,R,N] 98 | weights = (alphas * visibility)[..., None] # [B,R,N,1] 99 | return weights 100 | 101 | 102 | def composite(quantities, weights): 103 | """Composite the samples to render the RGB/depth/opacity of the corresponding pixels. 104 | Args: 105 | quantities (tensor [batch,ray,samples,k]): The quantity to be weighted summed. 106 | weights (tensor [batch,ray,samples,1]): The predicted weight of each sampled point along the ray. 107 | Returns: 108 | quantity (tensor [batch,ray,k]): The expected (rendered) quantity. 109 | """ 110 | # Integrate RGB and depth weighted by probability. 111 | quantity = (quantities * weights).sum(dim=2) # [B,R,K] 112 | return quantity 113 | -------------------------------------------------------------------------------- /projects/nerf/utils/visualize.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import numpy as np 14 | import torch 15 | import matplotlib.pyplot as plt 16 | import plotly.graph_objs as go 17 | import k3d 18 | 19 | from projects.nerf.utils import camera 20 | 21 | 22 | def get_camera_mesh(pose, depth=1): 23 | vertices = torch.tensor([[-0.5, -0.5, 1], 24 | [0.5, -0.5, 1], 25 | [0.5, 0.5, 1], 26 | [-0.5, 0.5, 1], 27 | [0, 0, 0]]) * depth # [6,3] 28 | faces = torch.tensor([[0, 1, 2], 29 | [0, 2, 3], 30 | [0, 1, 4], 31 | [1, 2, 4], 32 | [2, 3, 4], 33 | [3, 0, 4]]) # [6,3] 34 | vertices = camera.cam2world(vertices[None], pose) # [N,6,3] 35 | wireframe = vertices[:, [0, 1, 2, 3, 0, 4, 1, 2, 4, 3]] # [N,10,3] 36 | return vertices, faces, wireframe 37 | 38 | 39 | def merge_meshes(vertices, faces): 40 | mesh_N, vertex_N = vertices.shape[:2] 41 | faces_merged = torch.cat([faces + i * vertex_N for i in range(mesh_N)], dim=0) 42 | vertices_merged = vertices.view(-1, vertices.shape[-1]) 43 | return vertices_merged, faces_merged 44 | 45 | 46 | def merge_wireframes_k3d(wireframe): 47 | wf_first, wf_last, wf_dummy = wireframe[:, :1], wireframe[:, -1:], wireframe[:, :1] * np.nan 48 | wireframe_merged = torch.cat([wf_first, wireframe, wf_last, wf_dummy], dim=1) 49 | return wireframe_merged 50 | 51 | 52 | def merge_wireframes_plotly(wireframe): 53 | wf_dummy = wireframe[:, :1] * np.nan 54 | wireframe_merged = torch.cat([wireframe, wf_dummy], dim=1).view(-1, 3) 55 | return wireframe_merged 56 | 57 | 58 | def get_xyz_indicators(pose, length=0.1): 59 | xyz = torch.eye(4, 3)[None] * length 60 | xyz = camera.cam2world(xyz, pose) 61 | return xyz 62 | 63 | 64 | def merge_xyz_indicators_k3d(xyz): # [N,4,3] 65 | xyz = xyz[:, [[-1, 0], [-1, 1], [-1, 2]]] # [N,3,2,3] 66 | xyz_0, xyz_1 = xyz.unbind(dim=2) # [N,3,3] 67 | xyz_dummy = xyz_0 * np.nan 68 | xyz_merged = torch.stack([xyz_0, xyz_0, xyz_1, xyz_1, xyz_dummy], dim=2) # [N,3,5,3] 69 | return xyz_merged 70 | 71 | 72 | def merge_xyz_indicators_plotly(xyz): # [N,4,3] 73 | xyz = xyz[:, [[-1, 0], [-1, 1], [-1, 2]]] # [N,3,2,3] 74 | xyz_0, xyz_1 = xyz.unbind(dim=2) # [N,3,3] 75 | xyz_dummy = xyz_0 * np.nan 76 | xyz_merged = torch.stack([xyz_0, xyz_1, xyz_dummy], dim=2) # [N,3,3,3] 77 | xyz_merged = xyz_merged.view(-1, 3) 78 | return xyz_merged 79 | 80 | 81 | def k3d_visualize_pose(poses, vis_depth=0.5, xyz_length=0.1, center_size=0.1, xyz_width=0.02, mesh_opacity=0.05): 82 | # poses has shape [N,3,4] potentially in sequential order 83 | N = len(poses) 84 | centers_cam = torch.zeros(N, 1, 3) 85 | centers_world = camera.cam2world(centers_cam, poses) 86 | centers_world = centers_world[:, 0] 87 | # Get the camera wireframes. 88 | vertices, faces, wireframe = get_camera_mesh(poses, depth=vis_depth) 89 | xyz = get_xyz_indicators(poses, length=xyz_length) 90 | vertices_merged, faces_merged = merge_meshes(vertices, faces) 91 | wireframe_merged = merge_wireframes_k3d(wireframe) 92 | xyz_merged = merge_xyz_indicators_k3d(xyz) 93 | # Set the color map for the camera trajectory and the xyz indicators. 94 | color_map = plt.get_cmap("gist_rainbow") 95 | center_color = [] 96 | vertices_merged_color = [] 97 | wireframe_color = [] 98 | xyz_color = [] 99 | x_hex, y_hex, z_hex = int(255) << 16, int(255) << 8, int(255) 100 | for i in range(N): 101 | # Set the camera pose colors (with a smooth gradient color map). 102 | r, g, b, _ = color_map(i / (N - 1)) 103 | r, g, b = r * 0.8, g * 0.8, b * 0.8 104 | pose_rgb_hex = (int(r * 255) << 16) + (int(g * 255) << 8) + int(b * 255) 105 | center_color += [pose_rgb_hex] 106 | vertices_merged_color += [pose_rgb_hex] * 5 107 | wireframe_color += [pose_rgb_hex] * 13 108 | # Set the xyz indicator colors. 109 | xyz_color += [x_hex] * 5 + [y_hex] * 5 + [z_hex] * 5 110 | # Plot in K3D. 111 | k3d_objects = [ 112 | k3d.points(centers_world, colors=center_color, point_size=center_size, shader="3d"), 113 | k3d.mesh(vertices_merged, faces_merged, colors=vertices_merged_color, side="double", opacity=mesh_opacity), 114 | k3d.line(wireframe_merged, colors=wireframe_color, shader="simple"), 115 | k3d.line(xyz_merged, colors=xyz_color, shader="thick", width=xyz_width), 116 | ] 117 | return k3d_objects 118 | 119 | 120 | def plotly_visualize_pose(poses, vis_depth=0.5, xyz_length=0.5, center_size=2, xyz_width=5, mesh_opacity=0.05): 121 | # poses has shape [N,3,4] potentially in sequential order 122 | N = len(poses) 123 | centers_cam = torch.zeros(N, 1, 3) 124 | centers_world = camera.cam2world(centers_cam, poses) 125 | centers_world = centers_world[:, 0] 126 | # Get the camera wireframes. 127 | vertices, faces, wireframe = get_camera_mesh(poses, depth=vis_depth) 128 | xyz = get_xyz_indicators(poses, length=xyz_length) 129 | vertices_merged, faces_merged = merge_meshes(vertices, faces) 130 | wireframe_merged = merge_wireframes_plotly(wireframe) 131 | xyz_merged = merge_xyz_indicators_plotly(xyz) 132 | # Break up (x,y,z) coordinates. 133 | wireframe_x, wireframe_y, wireframe_z = wireframe_merged.unbind(dim=-1) 134 | xyz_x, xyz_y, xyz_z = xyz_merged.unbind(dim=-1) 135 | centers_x, centers_y, centers_z = centers_world.unbind(dim=-1) 136 | vertices_x, vertices_y, vertices_z = vertices_merged.unbind(dim=-1) 137 | # Set the color map for the camera trajectory and the xyz indicators. 138 | color_map = plt.get_cmap("gist_rainbow") 139 | center_color = [] 140 | faces_merged_color = [] 141 | wireframe_color = [] 142 | xyz_color = [] 143 | x_color, y_color, z_color = *np.eye(3).T, 144 | for i in range(N): 145 | # Set the camera pose colors (with a smooth gradient color map). 146 | r, g, b, _ = color_map(i / (N - 1)) 147 | rgb = np.array([r, g, b]) * 0.8 148 | wireframe_color += [rgb] * 11 149 | center_color += [rgb] 150 | faces_merged_color += [rgb] * 6 151 | xyz_color += [x_color] * 3 + [y_color] * 3 + [z_color] * 3 152 | # Plot in plotly. 153 | plotly_traces = [ 154 | go.Scatter3d(x=wireframe_x, y=wireframe_y, z=wireframe_z, mode="lines", 155 | line=dict(color=wireframe_color, width=1)), 156 | go.Scatter3d(x=xyz_x, y=xyz_y, z=xyz_z, mode="lines", line=dict(color=xyz_color, width=xyz_width)), 157 | go.Scatter3d(x=centers_x, y=centers_y, z=centers_z, mode="markers", 158 | marker=dict(color=center_color, size=center_size, opacity=1)), 159 | go.Mesh3d(x=vertices_x, y=vertices_y, z=vertices_z, 160 | i=[f[0] for f in faces_merged], j=[f[1] for f in faces_merged], k=[f[2] for f in faces_merged], 161 | facecolor=faces_merged_color, opacity=mesh_opacity), 162 | ] 163 | return plotly_traces 164 | -------------------------------------------------------------------------------- /projects/neuralangelo/configs/base.yaml: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | logging_iter: 9999999999999 # disable the printing logger 12 | 13 | max_iter: 500000 14 | 15 | wandb_scalar_iter: 100 16 | wandb_image_iter: 10000 17 | validation_iter: 5000 18 | speed_benchmark: False 19 | 20 | checkpoint: 21 | save_iter: 20000 22 | 23 | trainer: 24 | type: projects.neuralangelo.trainer 25 | ema_config: 26 | enabled: False 27 | load_ema_checkpoint: False 28 | loss_weight: 29 | render: 1.0 30 | eikonal: 0.1 31 | curvature: 5e-4 32 | init: 33 | type: none 34 | amp_config: 35 | enabled: False 36 | depth_vis_scale: 0.5 37 | 38 | model: 39 | type: projects.neuralangelo.model 40 | object: 41 | sdf: 42 | mlp: 43 | num_layers: 1 44 | hidden_dim: 256 45 | skip: [] 46 | activ: softplus 47 | activ_params: 48 | beta: 100 49 | geometric_init: True 50 | weight_norm: True 51 | out_bias: 0.5 52 | inside_out: False 53 | encoding: 54 | type: hashgrid 55 | levels: 16 56 | hashgrid: 57 | min_logres: 5 58 | max_logres: 11 59 | dict_size: 22 60 | dim: 8 61 | range: [-2,2] 62 | coarse2fine: 63 | enabled: True 64 | init_active_level: 4 65 | step: 5000 66 | gradient: 67 | mode: numerical 68 | taps: 4 69 | rgb: 70 | mlp: 71 | num_layers: 4 72 | hidden_dim: 256 73 | skip: [] 74 | activ: relu_ 75 | activ_params: {} 76 | weight_norm: True 77 | mode: idr 78 | encoding_view: 79 | type: spherical 80 | levels: 3 81 | s_var: 82 | init_val: 3. 83 | anneal_end: 0.1 84 | background: 85 | enabled: True 86 | white: False 87 | mlp: 88 | num_layers: 8 89 | hidden_dim: 256 90 | skip: [4] 91 | num_layers_rgb: 2 92 | hidden_dim_rgb: 128 93 | skip_rgb: [] 94 | activ: relu 95 | activ_params: {} 96 | activ_density: softplus 97 | activ_density_params: {} 98 | view_dep: True 99 | encoding: 100 | type: fourier 101 | levels: 10 102 | encoding_view: 103 | type: spherical 104 | levels: 3 105 | render: 106 | rand_rays: 512 107 | num_samples: 108 | coarse: 64 109 | fine: 16 110 | background: 32 111 | num_sample_hierarchy: 4 112 | stratified: True 113 | appear_embed: 114 | enabled: False 115 | dim: 8 116 | 117 | optim: 118 | type: AdamW 119 | params: 120 | lr: 1e-3 121 | weight_decay: 1e-2 122 | sched: 123 | iteration_mode: True 124 | type: two_steps_with_warmup 125 | warm_up_end: 5000 126 | two_steps: [300000,400000] 127 | gamma: 10.0 128 | 129 | data: 130 | type: projects.nerf.datasets.nerf_blender 131 | root: datasets/nerf-synthetic/lego 132 | use_multi_epoch_loader: True 133 | num_workers: 4 134 | preload: True 135 | num_images: # The number of training images. 136 | train: 137 | image_size: [800,800] 138 | batch_size: 2 139 | subset: 140 | val: 141 | image_size: [400,400] 142 | batch_size: 2 143 | subset: 4 144 | max_viz_samples: 16 145 | readjust: 146 | center: [0.,0.,0.] 147 | scale: 1. 148 | -------------------------------------------------------------------------------- /projects/neuralangelo/configs/custom/template.yaml: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | # NOTE: this template is an example for designing your custom config file. It is not intended to be used directly. 12 | # Please preprocess your custom data and auto-generate the config file before customizing. 13 | 14 | _parent_: projects/neuralangelo/configs/base.yaml 15 | 16 | model: 17 | object: 18 | sdf: 19 | mlp: 20 | inside_out: False 21 | encoding: 22 | coarse2fine: 23 | init_active_level: 8 24 | appear_embed: 25 | enabled: True 26 | dim: 8 27 | 28 | data: 29 | type: projects.neuralangelo.data 30 | root: # The root path of the dataset. 31 | num_images: # The number of training images. 32 | train: 33 | image_size: [1200,1600] 34 | batch_size: 1 35 | subset: 36 | val: 37 | image_size: [300,400] 38 | batch_size: 1 39 | subset: 1 40 | max_viz_samples: 41 | readjust: 42 | center: [0.,0.,0.] 43 | scale: 1. 44 | -------------------------------------------------------------------------------- /projects/neuralangelo/configs/dtu.yaml: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | _parent_: projects/neuralangelo/configs/base.yaml 12 | 13 | model: 14 | object: 15 | sdf: 16 | mlp: 17 | inside_out: False 18 | encoding: 19 | coarse2fine: 20 | init_active_level: 4 21 | s_var: 22 | init_val: 1.4 23 | appear_embed: 24 | enabled: False 25 | 26 | data: 27 | type: projects.neuralangelo.data 28 | root: datasets/dtu/dtu_scan24 29 | train: 30 | image_size: [1200,1600] 31 | batch_size: 1 32 | subset: 33 | val: 34 | image_size: [300,400] 35 | batch_size: 1 36 | subset: 1 37 | max_viz_samples: 16 38 | -------------------------------------------------------------------------------- /projects/neuralangelo/configs/tnt.yaml: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | _parent_: projects/neuralangelo/configs/base.yaml 12 | 13 | model: 14 | object: 15 | sdf: 16 | mlp: 17 | inside_out: False # True for Meetingroom. 18 | encoding: 19 | coarse2fine: 20 | init_active_level: 8 21 | appear_embed: 22 | enabled: True 23 | dim: 8 24 | 25 | data: 26 | type: projects.neuralangelo.data 27 | root: datasets/tanks_and_temples/Barn 28 | num_images: 410 # The number of training images. 29 | train: 30 | image_size: [835,1500] 31 | batch_size: 1 32 | subset: 33 | val: 34 | image_size: [300,540] 35 | batch_size: 1 36 | subset: 1 37 | max_viz_samples: 16 38 | -------------------------------------------------------------------------------- /projects/neuralangelo/data.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import json 14 | import numpy as np 15 | import torch 16 | import torchvision.transforms.functional as torchvision_F 17 | from PIL import Image, ImageFile 18 | 19 | from projects.nerf.datasets import base 20 | from projects.nerf.utils import camera 21 | 22 | ImageFile.LOAD_TRUNCATED_IMAGES = True 23 | 24 | 25 | class Dataset(base.Dataset): 26 | 27 | def __init__(self, cfg, is_inference=False): 28 | super().__init__(cfg, is_inference=is_inference, is_test=False) 29 | cfg_data = cfg.data 30 | self.root = cfg_data.root 31 | self.preload = cfg_data.preload 32 | self.H, self.W = cfg_data.val.image_size if is_inference else cfg_data.train.image_size 33 | meta_fname = f"{cfg_data.root}/transforms.json" 34 | with open(meta_fname) as file: 35 | self.meta = json.load(file) 36 | self.list = self.meta["frames"] 37 | if cfg_data[self.split].subset: 38 | subset = cfg_data[self.split].subset 39 | subset_idx = np.linspace(0, len(self.list), subset+1)[:-1].astype(int) 40 | self.list = [self.list[i] for i in subset_idx] 41 | self.num_rays = cfg.model.render.rand_rays 42 | self.readjust = getattr(cfg_data, "readjust", None) 43 | # Preload dataset if possible. 44 | if cfg_data.preload: 45 | self.images = self.preload_threading(self.get_image, cfg_data.num_workers) 46 | self.cameras = self.preload_threading(self.get_camera, cfg_data.num_workers, data_str="cameras") 47 | 48 | def __getitem__(self, idx): 49 | """Process raw data and return processed data in a dictionary. 50 | 51 | Args: 52 | idx: The index of the sample of the dataset. 53 | Returns: A dictionary containing the data. 54 | idx (scalar): The index of the sample of the dataset. 55 | image (R tensor): Image idx for per-image embedding. 56 | image (Rx3 tensor): Image with pixel values in [0,1] for supervision. 57 | intr (3x3 tensor): The camera intrinsics of `image`. 58 | pose (3x4 tensor): The camera extrinsics [R,t] of `image`. 59 | """ 60 | # Keep track of sample index for convenience. 61 | sample = dict(idx=idx) 62 | # Get the images. 63 | image, image_size_raw = self.images[idx] if self.preload else self.get_image(idx) 64 | image = self.preprocess_image(image) 65 | # Get the cameras (intrinsics and pose). 66 | intr, pose = self.cameras[idx] if self.preload else self.get_camera(idx) 67 | intr, pose = self.preprocess_camera(intr, pose, image_size_raw) 68 | # Pre-sample ray indices. 69 | if self.split == "train": 70 | ray_idx = torch.randperm(self.H * self.W)[:self.num_rays] # [R] 71 | image_sampled = image.flatten(1, 2)[:, ray_idx].t() # [R,3] 72 | sample.update( 73 | ray_idx=ray_idx, 74 | image_sampled=image_sampled, 75 | intr=intr, 76 | pose=pose, 77 | ) 78 | else: # keep image during inference 79 | sample.update( 80 | image=image, 81 | intr=intr, 82 | pose=pose, 83 | ) 84 | return sample 85 | 86 | def get_image(self, idx): 87 | fpath = self.list[idx]["file_path"] 88 | image_fname = f"{self.root}/{fpath}" 89 | image = Image.open(image_fname) 90 | image.load() 91 | image_size_raw = image.size 92 | return image, image_size_raw 93 | 94 | def preprocess_image(self, image): 95 | # Resize the image. 96 | image = image.resize((self.W, self.H)) 97 | image = torchvision_F.to_tensor(image) 98 | rgb = image[:3] 99 | return rgb 100 | 101 | def get_camera(self, idx): 102 | # Camera intrinsics. 103 | intr = torch.tensor([[self.meta["fl_x"], self.meta["sk_x"], self.meta["cx"]], 104 | [self.meta["sk_y"], self.meta["fl_y"], self.meta["cy"]], 105 | [0, 0, 1]]).float() 106 | # Camera pose. 107 | c2w_gl = torch.tensor(self.list[idx]["transform_matrix"], dtype=torch.float32) 108 | c2w = self._gl_to_cv(c2w_gl) 109 | # center scene 110 | center = np.array(self.meta["sphere_center"]) 111 | center += np.array(getattr(self.readjust, "center", [0])) if self.readjust else 0. 112 | c2w[:3, -1] -= center 113 | # scale scene 114 | scale = np.array(self.meta["sphere_radius"]) 115 | scale *= getattr(self.readjust, "scale", 1.) if self.readjust else 1. 116 | c2w[:3, -1] /= scale 117 | w2c = camera.Pose().invert(c2w[:3]) 118 | return intr, w2c 119 | 120 | def preprocess_camera(self, intr, pose, image_size_raw): 121 | # Adjust the intrinsics according to the resized image. 122 | intr = intr.clone() 123 | raw_W, raw_H = image_size_raw 124 | intr[0] *= self.W / raw_W 125 | intr[1] *= self.H / raw_H 126 | return intr, pose 127 | 128 | def _gl_to_cv(self, gl): 129 | # convert to CV convention used in Imaginaire 130 | cv = gl * torch.tensor([1, -1, -1, 1]) 131 | return cv 132 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/convert_dtu_to_json.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import numpy as np 14 | import json 15 | from argparse import ArgumentParser 16 | import os 17 | import cv2 18 | from PIL import Image, ImageFile 19 | from glob import glob 20 | import math 21 | import sys 22 | from pathlib import Path 23 | 24 | 25 | dir_path = Path(os.path.dirname(os.path.realpath(__file__))).parents[2] 26 | sys.path.append(dir_path.__str__()) 27 | from projects.neuralangelo.scripts.convert_data_to_json import _cv_to_gl # noqa: E402 28 | 29 | ImageFile.LOAD_TRUNCATED_IMAGES = True 30 | 31 | 32 | def load_K_Rt_from_P(filename, P=None): 33 | # This function is borrowed from IDR: https://github.com/lioryariv/idr 34 | if P is None: 35 | lines = open(filename).read().splitlines() 36 | if len(lines) == 4: 37 | lines = lines[1:] 38 | lines = [[x[0], x[1], x[2], x[3]] for x in (x.split(" ") for x in lines)] 39 | P = np.asarray(lines).astype(np.float32).squeeze() 40 | 41 | out = cv2.decomposeProjectionMatrix(P) 42 | K = out[0] 43 | R = out[1] 44 | t = out[2] 45 | 46 | K = K / K[2, 2] 47 | intrinsics = np.eye(4) 48 | intrinsics[:3, :3] = K 49 | 50 | pose = np.eye(4, dtype=np.float32) 51 | pose[:3, :3] = R.transpose() 52 | pose[:3, 3] = (t[:3] / t[3])[:, 0] 53 | 54 | return intrinsics, pose 55 | 56 | 57 | def dtu_to_json(args): 58 | assert args.dtu_path, "Provide path to DTU dataset" 59 | scene_list = os.listdir(args.dtu_path) 60 | 61 | for scene in scene_list: 62 | scene_path = os.path.join(args.dtu_path, scene) 63 | if not os.path.isdir(scene_path) or 'scan' not in scene: 64 | continue 65 | 66 | out = { 67 | "k1": 0.0, # take undistorted images only 68 | "k2": 0.0, 69 | "k3": 0.0, 70 | "k4": 0.0, 71 | "p1": 0.0, 72 | "p2": 0.0, 73 | "is_fisheye": False, 74 | "frames": [] 75 | } 76 | 77 | camera_param = dict(np.load(os.path.join(scene_path, 'cameras_sphere.npz'))) 78 | images_lis = sorted(glob(os.path.join(scene_path, 'image/*.png'))) 79 | for idx, image in enumerate(images_lis): 80 | image = os.path.basename(image) 81 | 82 | world_mat = camera_param['world_mat_%d' % idx] 83 | scale_mat = camera_param['scale_mat_%d' % idx] 84 | 85 | # scale and decompose 86 | P = world_mat @ scale_mat 87 | P = P[:3, :4] 88 | intrinsic_param, c2w = load_K_Rt_from_P(None, P) 89 | c2w_gl = _cv_to_gl(c2w) 90 | 91 | frame = {"file_path": 'image/' + image, "transform_matrix": c2w_gl.tolist()} 92 | out["frames"].append(frame) 93 | 94 | fl_x = intrinsic_param[0][0] 95 | fl_y = intrinsic_param[1][1] 96 | cx = intrinsic_param[0][2] 97 | cy = intrinsic_param[1][2] 98 | sk_x = intrinsic_param[0][1] 99 | sk_y = intrinsic_param[1][0] 100 | w, h = Image.open(os.path.join(scene_path, 'image', image)).size 101 | 102 | angle_x = math.atan(w / (fl_x * 2)) * 2 103 | angle_y = math.atan(h / (fl_y * 2)) * 2 104 | 105 | scale_mat = scale_mat.astype(float) 106 | 107 | out.update({ 108 | "camera_angle_x": angle_x, 109 | "camera_angle_y": angle_y, 110 | "fl_x": fl_x, 111 | "fl_y": fl_y, 112 | "cx": cx, 113 | "cy": cy, 114 | "sk_x": sk_x, 115 | "sk_y": sk_y, 116 | "w": int(w), 117 | "h": int(h), 118 | "aabb_scale": np.exp2(np.rint(np.log2(scale_mat[0, 0]))), # power of two, for INGP resolution computation 119 | "sphere_center": [0., 0., 0.], 120 | "sphere_radius": 1., 121 | }) 122 | 123 | file_path = os.path.join(scene_path, 'transforms.json') 124 | with open(file_path, "w") as outputfile: 125 | json.dump(out, outputfile, indent=2) 126 | print('Writing data to json file: ', file_path) 127 | 128 | 129 | if __name__ == '__main__': 130 | parser = ArgumentParser() 131 | parser.add_argument('--dtu_path', type=str, default=None) 132 | 133 | args = parser.parse_args() 134 | 135 | dtu_to_json(args) 136 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/extract_mesh.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import argparse 14 | import json 15 | import os 16 | import sys 17 | import numpy as np 18 | from functools import partial 19 | 20 | sys.path.append(os.getcwd()) 21 | from imaginaire.config import Config, recursive_update_strict, parse_cmdline_arguments # noqa: E402 22 | from imaginaire.utils.distributed import init_dist, get_world_size, is_master, master_only_print as print # noqa: E402 23 | from imaginaire.utils.gpu_affinity import set_affinity # noqa: E402 24 | from imaginaire.trainers.utils.get_trainer import get_trainer # noqa: E402 25 | from projects.neuralangelo.utils.mesh import extract_mesh, extract_texture # noqa: E402 26 | 27 | 28 | def parse_args(): 29 | parser = argparse.ArgumentParser(description="Training") 30 | parser.add_argument("--config", required=True, help="Path to the training config file.") 31 | parser.add_argument("--checkpoint", default="", help="Checkpoint path.") 32 | parser.add_argument('--local_rank', type=int, default=os.getenv('LOCAL_RANK', 0)) 33 | parser.add_argument('--single_gpu', action='store_true') 34 | parser.add_argument("--resolution", default=512, type=int, help="Marching cubes resolution") 35 | parser.add_argument("--block_res", default=64, type=int, help="Block-wise resolution for marching cubes") 36 | parser.add_argument("--output_file", default="mesh.ply", type=str, help="Output file name") 37 | parser.add_argument("--textured", action="store_true", help="Export mesh with texture") 38 | parser.add_argument("--keep_lcc", action="store_true", 39 | help="Keep only largest connected component. May remove thin structures.") 40 | args, cfg_cmd = parser.parse_known_args() 41 | return args, cfg_cmd 42 | 43 | 44 | def main(): 45 | args, cfg_cmd = parse_args() 46 | set_affinity(args.local_rank) 47 | cfg = Config(args.config) 48 | 49 | cfg_cmd = parse_cmdline_arguments(cfg_cmd) 50 | recursive_update_strict(cfg, cfg_cmd) 51 | 52 | # If args.single_gpu is set to True, we will disable distributed data parallel. 53 | if not args.single_gpu: 54 | # this disables nccl timeout 55 | os.environ["NCLL_BLOCKING_WAIT"] = "0" 56 | os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "0" 57 | cfg.local_rank = args.local_rank 58 | init_dist(cfg.local_rank, rank=-1, world_size=-1) 59 | print(f"Running mesh extraction with {get_world_size()} GPUs.") 60 | 61 | cfg.logdir = '' 62 | 63 | # Initialize data loaders and models. 64 | trainer = get_trainer(cfg, is_inference=True, seed=0) 65 | # Load checkpoint. 66 | trainer.checkpointer.load(args.checkpoint, load_opt=False, load_sch=False) 67 | trainer.model.eval() 68 | 69 | # Set the coarse-to-fine levels. 70 | trainer.current_iteration = trainer.checkpointer.eval_iteration 71 | if cfg.model.object.sdf.encoding.coarse2fine.enabled: 72 | trainer.model_module.neural_sdf.set_active_levels(trainer.current_iteration) 73 | if cfg.model.object.sdf.gradient.mode == "numerical": 74 | trainer.model_module.neural_sdf.set_normal_epsilon() 75 | 76 | meta_fname = f"{cfg.data.root}/transforms.json" 77 | with open(meta_fname) as file: 78 | meta = json.load(file) 79 | 80 | if "aabb_range" in meta: 81 | bounds = (np.array(meta["aabb_range"]) - np.array(meta["sphere_center"])[..., None]) / meta["sphere_radius"] 82 | else: 83 | bounds = np.array([[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]]) 84 | 85 | sdf_func = lambda x: -trainer.model_module.neural_sdf.sdf(x) # noqa: E731 86 | texture_func = partial(extract_texture, neural_sdf=trainer.model_module.neural_sdf, 87 | neural_rgb=trainer.model_module.neural_rgb, 88 | appear_embed=trainer.model_module.appear_embed) if args.textured else None 89 | mesh = extract_mesh(sdf_func=sdf_func, bounds=bounds, intv=(2.0 / args.resolution), 90 | block_res=args.block_res, texture_func=texture_func, filter_lcc=args.keep_lcc) 91 | 92 | if is_master(): 93 | print(f"vertices: {len(mesh.vertices)}") 94 | print(f"faces: {len(mesh.faces)}") 95 | if args.textured: 96 | print(f"colors: {len(mesh.visual.vertex_colors)}") 97 | # center and scale 98 | mesh.vertices = mesh.vertices * meta["sphere_radius"] + np.array(meta["sphere_center"]) 99 | mesh.update_faces(mesh.nondegenerate_faces()) 100 | os.makedirs(os.path.dirname(args.output_file), exist_ok=True) 101 | mesh.export(args.output_file) 102 | 103 | 104 | if __name__ == "__main__": 105 | main() 106 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/generate_config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import os 14 | import sys 15 | from argparse import ArgumentParser 16 | from pathlib import Path 17 | import yaml 18 | from addict import Dict 19 | from PIL import Image, ImageFile 20 | 21 | dir_path = Path(os.path.dirname(os.path.realpath(__file__))).parents[2] 22 | sys.path.append(dir_path.__str__()) 23 | 24 | ImageFile.LOAD_TRUNCATED_IMAGES = True 25 | 26 | 27 | def generate_config(args): 28 | cfg = Dict() 29 | cfg._parent_ = "projects/neuralangelo/configs/base.yaml" 30 | num_images = len(os.listdir(os.path.join(args.data_dir, "images"))) 31 | # model cfg 32 | if args.auto_exposure_wb: 33 | cfg.data.num_images = num_images 34 | cfg.model.appear_embed.enabled = True 35 | cfg.model.appear_embed.dim = 8 36 | if num_images < 4: # default is 4 37 | cfg.data.val.subset = num_images 38 | else: 39 | cfg.model.appear_embed.enabled = False 40 | if args.scene_type == "outdoor": 41 | cfg.model.object.sdf.mlp.inside_out = False 42 | cfg.model.object.sdf.encoding.coarse2fine.init_active_level = 8 43 | elif args.scene_type == "indoor": 44 | cfg.model.object.sdf.mlp.inside_out = True 45 | cfg.model.object.sdf.encoding.coarse2fine.init_active_level = 8 46 | cfg.model.background.enabled = False 47 | cfg.model.render.num_samples.background = 0 48 | elif args.scene_type == "object": 49 | cfg.model.object.sdf.mlp.inside_out = False 50 | cfg.model.object.sdf.encoding.coarse2fine.init_active_level = 4 51 | else: 52 | raise TypeError("Unknown scene type") 53 | # data config 54 | cfg.data.type = "projects.neuralangelo.data" 55 | cfg.data.root = args.data_dir 56 | img = Image.open(os.path.join(args.data_dir, "images", os.listdir(os.path.join(args.data_dir, "images"))[0])) 57 | w, h = img.size 58 | cfg.data.train.image_size = [h, w] 59 | short_size = args.val_short_size 60 | cfg.data.val.image_size = [short_size, int(w/h*short_size)] if w > h else [int(h/w*short_size), short_size] 61 | cfg.data.readjust.center = [0., 0., 0.] 62 | cfg.data.readjust.scale = 1. 63 | # export cfg 64 | cfg_fname = os.path.join(dir_path, "projects/neuralangelo/configs", f"custom/{args.sequence_name}.yaml") 65 | with open(cfg_fname, "w") as file: 66 | yaml.safe_dump(cfg.to_dict(), file, default_flow_style=False, indent=4) 67 | print("Config generated to file:", cfg_fname) 68 | return 69 | 70 | 71 | if __name__ == "__main__": 72 | parser = ArgumentParser() 73 | parser.add_argument("--sequence_name", type=str, default="recon", help="Name of sequence") 74 | parser.add_argument("--data_dir", type=str, default=None, help="Path to data") 75 | parser.add_argument("--auto_exposure_wb", action="store_true", 76 | help="Video capture with auto-exposure or white-balance") 77 | parser.add_argument("--scene_type", type=str, default="outdoor", choices=["outdoor", "indoor", "object"], 78 | help="Select scene type. Outdoor for building-scale reconstruction; " 79 | "indoor for room-scale reconstruction; object for object-centric scene reconstruction.") 80 | parser.add_argument("--val_short_size", type=int, default=300, 81 | help="Set the short side of validation images (for saving compute when rendering val images)") 82 | args = parser.parse_args() 83 | generate_config(args) 84 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/preprocess.sh: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | # usage: preprocess.sh 12 | 13 | data_path=datasets/${1}_ds${3} 14 | bash projects/neuralangelo/scripts/run_ffmpeg.sh ${1} ${2} ${3} 15 | bash projects/neuralangelo/scripts/run_colmap.sh ${data_path} 16 | python3 projects/neuralangelo/scripts/convert_data_to_json.py --data_dir ${data_path} --scene_type ${4} 17 | python3 projects/neuralangelo/scripts/generate_config.py --sequence_name ${1} --data_dir ${data_path} --scene_type ${4} 18 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/preprocess_dtu.sh: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | # usage: dtu_download.sh 12 | 13 | echo "Download DTU data" 14 | mkdir -p "${1}" 15 | curl -L -o data.zip https://www.dropbox.com/sh/w0y8bbdmxzik3uk/AAAaZffBiJevxQzRskoOYcyja?dl=1 16 | unzip data.zip "data_DTU.zip" 17 | rm data.zip 18 | unzip -q data_DTU.zip -d ${1} 19 | rm data_DTU.zip 20 | echo "Generate json files" 21 | python3 projects/neuralangelo/scripts/convert_dtu_to_json.py --dtu_path ${1} 22 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/preprocess_tnt.sh: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | # usage: tnt_download.sh 12 | 13 | echo "Download fixed poses for Courthouse" 14 | gdown 10pcCwaQY6hqyiegJGdgmLp_HMFOnsmgq 15 | gdown 19TT5aTz3z60eUVIDhFJ7EgGqpcqQnJEi 16 | mv Courthouse_COLMAP_SfM.log ${1}/Courthouse/Courthouse_COLMAP_SfM.log 17 | mv Courthouse_trans.txt ${1}/Courthouse/Courthouse_trans.txt 18 | 19 | echo "Compute intrinsics, undistort images and generate json files. This may take a while" 20 | python3 projects/neuralangelo/scripts/convert_tnt_to_json.py --tnt_path ${1} 21 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/run_colmap.sh: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | # usage: run_colmap.sh 12 | 13 | colmap feature_extractor \ 14 | --database_path=${1}/database.db \ 15 | --image_path=${1}/images_raw \ 16 | --ImageReader.camera_model=SIMPLE_RADIAL \ 17 | --ImageReader.single_camera=true \ 18 | --SiftExtraction.use_gpu=true \ 19 | --SiftExtraction.num_threads=32 20 | 21 | colmap sequential_matcher \ 22 | --database_path=${1}/database.db \ 23 | --SiftMatching.use_gpu=true 24 | 25 | mkdir -p ${1}/sparse 26 | colmap mapper \ 27 | --database_path=${1}/database.db \ 28 | --image_path=${1}/images_raw \ 29 | --output_path=${1}/sparse 30 | 31 | cp ${1}/sparse/0/*.bin ${1}/sparse/ 32 | for path in ${1}/sparse/*/; do 33 | m=$(basename ${path}) 34 | if [ ${m} != "0" ]; then 35 | colmap model_merger \ 36 | --input_path1=${1}/sparse \ 37 | --input_path2=${1}/sparse/${m} \ 38 | --output_path=${1}/sparse 39 | colmap bundle_adjuster \ 40 | --input_path=${1}/sparse \ 41 | --output_path=${1}/sparse 42 | fi 43 | done 44 | 45 | colmap image_undistorter \ 46 | --image_path=${1}/images_raw \ 47 | --input_path=${1}/sparse \ 48 | --output_path=${1} \ 49 | --output_type=COLMAP 50 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/run_ffmpeg.sh: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # NVIDIA CORPORATION and its licensors retain all intellectual property 5 | # and proprietary rights in and to this software, related documentation 6 | # and any modifications thereto. Any use, reproduction, disclosure or 7 | # distribution of this software and related documentation without an express 8 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 9 | # ----------------------------------------------------------------------------- 10 | 11 | # usage: run_ffmpeg.sh 12 | 13 | data_path=datasets/${1}_ds${3} 14 | image_path=${data_path}/images_raw 15 | mkdir -p ${image_path} 16 | ffmpeg -i ${2} -vf "select=not(mod(n\,$3))" -vsync vfr -q:v 2 ${image_path}/%06d.jpg 17 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/visualize_colmap.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Set the work directory to the imaginaire root.\n", 11 | "import os, sys, time\n", 12 | "import pathlib\n", 13 | "root_dir = pathlib.Path().absolute().parents[2]\n", 14 | "os.chdir(root_dir)\n", 15 | "print(f\"Root Directory Path: {root_dir}\")" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62", 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Import Python libraries.\n", 26 | "import numpy as np\n", 27 | "import torch\n", 28 | "import k3d\n", 29 | "import json\n", 30 | "import plotly.graph_objs as go\n", 31 | "from collections import OrderedDict\n", 32 | "# Import imaginaire modules.\n", 33 | "from projects.nerf.utils import camera, visualize\n", 34 | "from third_party.colmap.scripts.python.read_write_model import read_model" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "id": "76033016-2d92-4a5d-9e50-3978553e8df4", 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# Read the COLMAP data.\n", 45 | "colmap_path = \"datasets/lego_ds2\"\n", 46 | "cameras, images, points_3D = read_model(path=f\"{colmap_path}/sparse\", ext=\".bin\")\n", 47 | "# Convert camera poses.\n", 48 | "images = OrderedDict(sorted(images.items()))\n", 49 | "qvecs = torch.from_numpy(np.stack([image.qvec for image in images.values()]))\n", 50 | "tvecs = torch.from_numpy(np.stack([image.tvec for image in images.values()]))\n", 51 | "Rs = camera.quaternion.q_to_R(qvecs)\n", 52 | "poses = torch.cat([Rs, tvecs[..., None]], dim=-1) # [N,3,4]\n", 53 | "print(f\"# images: {len(poses)}\")\n", 54 | "# Get the sparse 3D points and the colors.\n", 55 | "xyzs = torch.from_numpy(np.stack([point.xyz for point in points_3D.values()]))\n", 56 | "rgbs = np.stack([point.rgb for point in points_3D.values()])\n", 57 | "rgbs_int32 = (rgbs[:, 0] * 2**16 + rgbs[:, 1] * 2**8 + rgbs[:, 2]).astype(np.uint32)\n", 58 | "print(f\"# points: {len(xyzs)}\")" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "id": "47862ee1-286c-4877-a181-4b33b7733719", 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "vis_depth = 0.2" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "id": "b6cf60ec-fe6a-43ba-9aaf-e3c7afd88208", 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "# Visualize the bounding sphere.\n", 79 | "json_fname = f\"{colmap_path}/transforms.json\"\n", 80 | "with open(json_fname) as file:\n", 81 | " meta = json.load(file)\n", 82 | "center = meta[\"sphere_center\"]\n", 83 | "radius = meta[\"sphere_radius\"]\n", 84 | "# ------------------------------------------------------------------------------------\n", 85 | "# These variables can be adjusted to make the bounding sphere fit the region of interest.\n", 86 | "# The adjusted values can then be set in the config as data.readjust.center and data.readjust.scale\n", 87 | "readjust_center = np.array([0., 0., 0.])\n", 88 | "readjust_scale = 1.\n", 89 | "# ------------------------------------------------------------------------------------\n", 90 | "center += readjust_center\n", 91 | "radius *= readjust_scale\n", 92 | "# Make some points to hallucinate a bounding sphere.\n", 93 | "sphere_points = np.random.randn(100000, 3)\n", 94 | "sphere_points = sphere_points / np.linalg.norm(sphere_points, axis=-1, keepdims=True)\n", 95 | "sphere_points = sphere_points * radius + center" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "id": "e986aed0-1aaf-4772-937c-136db7f2eaec", 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "# You can choose to visualize with Plotly...\n", 106 | "x, y, z = *xyzs.T,\n", 107 | "colors = rgbs / 255.0\n", 108 | "sphere_x, sphere_y, sphere_z = *sphere_points.T,\n", 109 | "sphere_colors = [\"#4488ff\"] * len(sphere_points)\n", 110 | "traces_poses = visualize.plotly_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.05)\n", 111 | "trace_points = go.Scatter3d(x=x, y=y, z=z, mode=\"markers\", marker=dict(size=1, color=colors, opacity=1), hoverinfo=\"skip\")\n", 112 | "trace_sphere = go.Scatter3d(x=sphere_x, y=sphere_y, z=sphere_z, mode=\"markers\", marker=dict(size=0.5, color=sphere_colors, opacity=0.7), hoverinfo=\"skip\")\n", 113 | "traces_all = traces_poses + [trace_points, trace_sphere]\n", 114 | "layout = go.Layout(scene=dict(xaxis=dict(showspikes=False, backgroundcolor=\"rgba(0,0,0,0)\", gridcolor=\"rgba(0,0,0,0.1)\"),\n", 115 | " yaxis=dict(showspikes=False, backgroundcolor=\"rgba(0,0,0,0)\", gridcolor=\"rgba(0,0,0,0.1)\"),\n", 116 | " zaxis=dict(showspikes=False, backgroundcolor=\"rgba(0,0,0,0)\", gridcolor=\"rgba(0,0,0,0.1)\"),\n", 117 | " xaxis_title=\"X\", yaxis_title=\"Y\", zaxis_title=\"Z\", dragmode=\"orbit\",\n", 118 | " aspectratio=dict(x=1, y=1, z=1), aspectmode=\"data\"), height=800)\n", 119 | "fig = go.Figure(data=traces_all, layout=layout)\n", 120 | "fig.show()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "id": "fdde170b-4546-4617-9162-a9fcb936347d", 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# ... or visualize with K3D.\n", 131 | "plot = k3d.plot(name=\"poses\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n", 132 | "k3d_objects = visualize.k3d_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.05)\n", 133 | "for k3d_object in k3d_objects:\n", 134 | " plot += k3d_object\n", 135 | "plot += k3d.points(xyzs, colors=rgbs_int32, point_size=0.02, shader=\"flat\")\n", 136 | "plot += k3d.points(sphere_points, color=0x4488ff, point_size=0.01, shader=\"flat\")\n", 137 | "plot.display()\n", 138 | "plot.camera_fov = 30.0" 139 | ] 140 | } 141 | ], 142 | "metadata": { 143 | "kernelspec": { 144 | "display_name": "Python 3 (ipykernel)", 145 | "language": "python", 146 | "name": "python3" 147 | }, 148 | "language_info": { 149 | "codemirror_mode": { 150 | "name": "ipython", 151 | "version": 3 152 | }, 153 | "file_extension": ".py", 154 | "mimetype": "text/x-python", 155 | "name": "python", 156 | "nbconvert_exporter": "python", 157 | "pygments_lexer": "ipython3", 158 | "version": "3.9.13" 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 5 163 | } 164 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/visualize_mesh.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Set the work directory to the imaginaire root.\n", 11 | "import os, sys, time\n", 12 | "import pathlib\n", 13 | "root_dir = pathlib.Path().absolute().parents[2]\n", 14 | "os.chdir(root_dir)\n", 15 | "print(f\"Root Directory Path: {root_dir}\")" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62", 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Import Python libraries.\n", 26 | "import numpy as np\n", 27 | "import trimesh\n", 28 | "import k3d" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "id": "84604c4a-8d95-462a-b7f0-acaa0b9f563d", 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# Load the 3D mesh.\n", 39 | "ply_fname = \"logs/0_debug/18/mesh.ply\"\n", 40 | "mesh = trimesh.load(ply_fname)\n", 41 | "print(f\"# vertices: {len(mesh.vertices)}\")\n", 42 | "print(f\"# faces: {len(mesh.faces)}\")" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "id": "d1f8df0b-8361-40f3-a801-0cc42b920fed", 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# Visualize with K3D.\n", 53 | "plot = k3d.plot(name=\"mesh\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n", 54 | "C = mesh.visual.vertex_colors\n", 55 | "colors = C[:,0]*256**2 + C[:,1]*256 + C[:,2]\n", 56 | "plot += k3d.mesh(mesh.vertices, mesh.faces, colors=colors, side=\"double\", opacity=1)\n", 57 | "plot.display()\n", 58 | "plot.camera_fov = 30.0" 59 | ] 60 | } 61 | ], 62 | "metadata": { 63 | "kernelspec": { 64 | "display_name": "Python 3 (ipykernel)", 65 | "language": "python", 66 | "name": "python3" 67 | }, 68 | "language_info": { 69 | "codemirror_mode": { 70 | "name": "ipython", 71 | "version": 3 72 | }, 73 | "file_extension": ".py", 74 | "mimetype": "text/x-python", 75 | "name": "python", 76 | "nbconvert_exporter": "python", 77 | "pygments_lexer": "ipython3", 78 | "version": "3.9.13" 79 | } 80 | }, 81 | "nbformat": 4, 82 | "nbformat_minor": 5 83 | } 84 | -------------------------------------------------------------------------------- /projects/neuralangelo/scripts/visualize_transforms.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "8b8d7b17-af50-42cd-b531-ef61c49c9e61", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Set the work directory to the imaginaire root.\n", 11 | "import os, sys, time\n", 12 | "import pathlib\n", 13 | "root_dir = pathlib.Path().absolute().parents[2]\n", 14 | "os.chdir(root_dir)\n", 15 | "print(f\"Root Directory Path: {root_dir}\")" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "id": "2b5b9e2f-841c-4815-92e0-0c76ed46da62", 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# Import Python libraries.\n", 26 | "import numpy as np\n", 27 | "import torch\n", 28 | "import k3d\n", 29 | "import json\n", 30 | "from collections import OrderedDict\n", 31 | "# Import imaginaire modules.\n", 32 | "from projects.nerf.utils import camera, visualize\n", 33 | "from third_party.colmap.scripts.python.read_write_model import read_model" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "id": "97bedecf-da68-44b1-96cf-580ef7e7f3f0", 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# Read the COLMAP data.\n", 44 | "colmap_path = \"datasets/lego_ds2\"\n", 45 | "json_fname = f\"{colmap_path}/transforms.json\"\n", 46 | "with open(json_fname) as file:\n", 47 | " meta = json.load(file)\n", 48 | "center = meta[\"sphere_center\"]\n", 49 | "radius = meta[\"sphere_radius\"]\n", 50 | "# Convert camera poses.\n", 51 | "poses = []\n", 52 | "for frame in meta[\"frames\"]:\n", 53 | " c2w = torch.tensor(frame[\"transform_matrix\"])\n", 54 | " c2w[:, 1:3] *= -1\n", 55 | " w2c = c2w.inverse()\n", 56 | " pose = w2c[:3] # [3,4]\n", 57 | " poses.append(pose)\n", 58 | "poses = torch.stack(poses, dim=0)\n", 59 | "print(f\"# images: {len(poses)}\")" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "id": "2016d20c-1e58-407f-9810-cbe76dc5ccec", 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "vis_depth = 0.2\n", 70 | "k3d_textures = []" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "id": "d7168a09-6654-4660-b140-66b9dfd6f1e8", 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "# (optional) visualize the images.\n", 81 | "# This block can be skipped if we don't want to visualize the image observations.\n", 82 | "for i, frame in enumerate(meta[\"frames\"]):\n", 83 | " image_fname = frame[\"file_path\"]\n", 84 | " image_path = f\"{colmap_path}/{image_fname}\"\n", 85 | " with open(image_path, \"rb\") as file:\n", 86 | " binary = file.read()\n", 87 | " # Compute the corresponding image corners in 3D.\n", 88 | " pose = poses[i]\n", 89 | " corners = torch.tensor([[-0.5, 0.5, 1], [0.5, 0.5, 1], [-0.5, -0.5, 1]])\n", 90 | " corners *= vis_depth\n", 91 | " corners = camera.cam2world(corners, pose)\n", 92 | " puv = [corners[0].tolist(), (corners[1]-corners[0]).tolist(), (corners[2]-corners[0]).tolist()]\n", 93 | " k3d_texture = k3d.texture(binary, file_format=\"jpg\", puv=puv)\n", 94 | " k3d_textures.append(k3d_texture)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "id": "b6cf60ec-fe6a-43ba-9aaf-e3c7afd88208", 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "# Visualize the bounding sphere.\n", 105 | "json_fname = f\"{colmap_path}/transforms.json\"\n", 106 | "with open(json_fname) as file:\n", 107 | " meta = json.load(file)\n", 108 | "center = meta[\"sphere_center\"]\n", 109 | "radius = meta[\"sphere_radius\"]\n", 110 | "# ------------------------------------------------------------------------------------\n", 111 | "# These variables can be adjusted to make the bounding sphere fit the region of interest.\n", 112 | "# The adjusted values can then be set in the config as data.readjust.center and data.readjust.scale\n", 113 | "readjust_center = np.array([0., 0., 0.])\n", 114 | "readjust_scale = 1.\n", 115 | "# ------------------------------------------------------------------------------------\n", 116 | "center += readjust_center\n", 117 | "radius *= readjust_scale\n", 118 | "# Make some points to hallucinate a bounding sphere.\n", 119 | "sphere_points = np.random.randn(100000, 3)\n", 120 | "sphere_points = sphere_points / np.linalg.norm(sphere_points, axis=-1, keepdims=True)\n", 121 | "sphere_points = sphere_points * radius + center" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "id": "fdde170b-4546-4617-9162-a9fcb936347d", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "# Visualize with K3D.\n", 132 | "plot = k3d.plot(name=\"poses\", height=800, camera_rotate_speed=5.0, camera_zoom_speed=3.0, camera_pan_speed=1.0)\n", 133 | "k3d_objects = visualize.k3d_visualize_pose(poses, vis_depth=vis_depth, xyz_length=0.02, center_size=0.01, xyz_width=0.005, mesh_opacity=0.)\n", 134 | "for k3d_object in k3d_objects:\n", 135 | " plot += k3d_object\n", 136 | "for k3d_texture in k3d_textures:\n", 137 | " plot += k3d_texture\n", 138 | "plot += k3d.points(sphere_points, color=0x4488ff, point_size=0.01, shader=\"flat\")\n", 139 | "plot.display()\n", 140 | "plot.camera_fov = 30.0" 141 | ] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "Python 3 (ipykernel)", 147 | "language": "python", 148 | "name": "python3" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.9.13" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 5 165 | } 166 | -------------------------------------------------------------------------------- /projects/neuralangelo/trainer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | import torch.nn.functional as torch_F 15 | import wandb 16 | 17 | from imaginaire.utils.distributed import master_only 18 | from imaginaire.utils.visualization import wandb_image 19 | from projects.nerf.trainers.base import BaseTrainer 20 | from projects.neuralangelo.utils.misc import get_scheduler, eikonal_loss, curvature_loss 21 | 22 | 23 | class Trainer(BaseTrainer): 24 | 25 | def __init__(self, cfg, is_inference=True, seed=0): 26 | super().__init__(cfg, is_inference=is_inference, seed=seed) 27 | self.metrics = dict() 28 | self.warm_up_end = cfg.optim.sched.warm_up_end 29 | self.cfg_gradient = cfg.model.object.sdf.gradient 30 | if cfg.model.object.sdf.encoding.type == "hashgrid" and cfg.model.object.sdf.encoding.coarse2fine.enabled: 31 | self.c2f_step = cfg.model.object.sdf.encoding.coarse2fine.step 32 | self.model.module.neural_sdf.warm_up_end = self.warm_up_end 33 | 34 | def _init_loss(self, cfg): 35 | self.criteria["render"] = torch.nn.L1Loss() 36 | 37 | def setup_scheduler(self, cfg, optim): 38 | return get_scheduler(cfg.optim, optim) 39 | 40 | def _compute_loss(self, data, mode=None): 41 | if mode == "train": 42 | # Compute loss only on randomly sampled rays. 43 | self.losses["render"] = self.criteria["render"](data["rgb"], data["image_sampled"]) * 3 # FIXME:sumRGB?! 44 | self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb"], data["image_sampled"]).log10() 45 | if "eikonal" in self.weights.keys(): 46 | self.losses["eikonal"] = eikonal_loss(data["gradients"], outside=data["outside"]) 47 | if "curvature" in self.weights: 48 | self.losses["curvature"] = curvature_loss(data["hessians"], outside=data["outside"]) 49 | else: 50 | # Compute loss on the entire image. 51 | self.losses["render"] = self.criteria["render"](data["rgb_map"], data["image"]) 52 | self.metrics["psnr"] = -10 * torch_F.mse_loss(data["rgb_map"], data["image"]).log10() 53 | 54 | def get_curvature_weight(self, current_iteration, init_weight): 55 | if "curvature" in self.weights: 56 | if current_iteration <= self.warm_up_end: 57 | self.weights["curvature"] = current_iteration / self.warm_up_end * init_weight 58 | else: 59 | model = self.model_module 60 | decay_factor = model.neural_sdf.growth_rate ** (model.neural_sdf.anneal_levels - 1) 61 | self.weights["curvature"] = init_weight / decay_factor 62 | 63 | def _start_of_iteration(self, data, current_iteration): 64 | model = self.model_module 65 | self.progress = model.progress = current_iteration / self.cfg.max_iter 66 | if self.cfg.model.object.sdf.encoding.coarse2fine.enabled: 67 | model.neural_sdf.set_active_levels(current_iteration) 68 | if self.cfg_gradient.mode == "numerical": 69 | model.neural_sdf.set_normal_epsilon() 70 | self.get_curvature_weight(current_iteration, self.cfg.trainer.loss_weight.curvature) 71 | elif self.cfg_gradient.mode == "numerical": 72 | model.neural_sdf.set_normal_epsilon() 73 | 74 | return super()._start_of_iteration(data, current_iteration) 75 | 76 | @master_only 77 | def log_wandb_scalars(self, data, mode=None): 78 | super().log_wandb_scalars(data, mode=mode) 79 | scalars = { 80 | f"{mode}/PSNR": self.metrics["psnr"].detach(), 81 | f"{mode}/s-var": self.model_module.s_var.item(), 82 | } 83 | if "curvature" in self.weights: 84 | scalars[f"{mode}/curvature_weight"] = self.weights["curvature"] 85 | if "eikonal" in self.weights: 86 | scalars[f"{mode}/eikonal_weight"] = self.weights["eikonal"] 87 | if mode == "train" and self.cfg_gradient.mode == "numerical": 88 | scalars[f"{mode}/epsilon"] = self.model.module.neural_sdf.normal_eps 89 | if self.cfg.model.object.sdf.encoding.coarse2fine.enabled: 90 | scalars[f"{mode}/active_levels"] = self.model.module.neural_sdf.active_levels 91 | wandb.log(scalars, step=self.current_iteration) 92 | 93 | @master_only 94 | def log_wandb_images(self, data, mode=None, max_samples=None): 95 | images = {"iteration": self.current_iteration, "epoch": self.current_epoch} 96 | if mode == "val": 97 | images_error = (data["rgb_map"] - data["image"]).abs() 98 | images.update({ 99 | f"{mode}/vis/rgb_target": wandb_image(data["image"]), 100 | f"{mode}/vis/rgb_render": wandb_image(data["rgb_map"]), 101 | f"{mode}/vis/rgb_error": wandb_image(images_error), 102 | f"{mode}/vis/normal": wandb_image(data["normal_map"], from_range=(-1, 1)), 103 | f"{mode}/vis/inv_depth": wandb_image(1 / (data["depth_map"] + 1e-8) * self.cfg.trainer.depth_vis_scale), 104 | f"{mode}/vis/opacity": wandb_image(data["opacity_map"]), 105 | }) 106 | wandb.log(images, step=self.current_iteration) 107 | 108 | def train(self, cfg, data_loader, single_gpu=False, profile=False, show_pbar=False): 109 | self.progress = self.model_module.progress = self.current_iteration / self.cfg.max_iter 110 | super().train(cfg, data_loader, single_gpu, profile, show_pbar) 111 | -------------------------------------------------------------------------------- /projects/neuralangelo/utils/mesh.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import numpy as np 14 | import trimesh 15 | import mcubes 16 | import torch 17 | import torch.distributed as dist 18 | import torch.nn.functional as torch_F 19 | from tqdm import tqdm 20 | 21 | from imaginaire.utils.distributed import get_world_size, is_master 22 | 23 | 24 | @torch.no_grad() 25 | def extract_mesh(sdf_func, bounds, intv, block_res=64, texture_func=None, filter_lcc=False): 26 | lattice_grid = LatticeGrid(bounds, intv=intv, block_res=block_res) 27 | data_loader = get_lattice_grid_loader(lattice_grid) 28 | mesh_blocks = [] 29 | if is_master(): 30 | data_loader = tqdm(data_loader, leave=False) 31 | for it, data in enumerate(data_loader): 32 | xyz = data["xyz"][0] 33 | xyz_cuda = xyz.cuda() 34 | sdf_cuda = sdf_func(xyz_cuda)[..., 0] 35 | sdf = sdf_cuda.cpu() 36 | mesh = marching_cubes(sdf.numpy(), xyz.numpy(), intv, texture_func, filter_lcc) 37 | mesh_blocks.append(mesh) 38 | mesh_blocks_gather = [None] * get_world_size() 39 | if dist.is_initialized(): 40 | dist.all_gather_object(mesh_blocks_gather, mesh_blocks) 41 | else: 42 | mesh_blocks_gather = [mesh_blocks] 43 | if is_master(): 44 | mesh_blocks_all = [mesh for mesh_blocks in mesh_blocks_gather for mesh in mesh_blocks 45 | if mesh.vertices.shape[0] > 0] 46 | mesh = trimesh.util.concatenate(mesh_blocks_all) 47 | return mesh 48 | else: 49 | return None 50 | 51 | 52 | @torch.no_grad() 53 | def extract_texture(xyz, neural_rgb, neural_sdf, appear_embed): 54 | num_samples, _ = xyz.shape 55 | xyz_cuda = torch.from_numpy(xyz).float().cuda()[None, None] # [N,3] -> [1,1,N,3] 56 | sdfs, feats = neural_sdf(xyz_cuda) 57 | gradients, _ = neural_sdf.compute_gradients(xyz_cuda, training=False, sdf=sdfs) 58 | normals = torch_F.normalize(gradients, dim=-1) 59 | if appear_embed is not None: 60 | feat_dim = appear_embed.embedding_dim # [1,1,N,C] 61 | app = torch.zeros([1, 1, num_samples, feat_dim], device=sdfs.device) # TODO: hard-coded to zero. better way? 62 | else: 63 | app = None 64 | rgbs = neural_rgb.forward(xyz_cuda, normals, -normals, feats, app=app) # [1,1,N,3] 65 | return (rgbs.squeeze().cpu().numpy() * 255).astype(np.uint8) 66 | 67 | 68 | class LatticeGrid(torch.utils.data.Dataset): 69 | 70 | def __init__(self, bounds, intv, block_res=64): 71 | super().__init__() 72 | self.block_res = block_res 73 | ((x_min, x_max), (y_min, y_max), (z_min, z_max)) = bounds 74 | self.x_grid = torch.arange(x_min, x_max, intv) 75 | self.y_grid = torch.arange(y_min, y_max, intv) 76 | self.z_grid = torch.arange(z_min, z_max, intv) 77 | res_x, res_y, res_z = len(self.x_grid), len(self.y_grid), len(self.z_grid) 78 | print("Extracting surface at resolution", res_x, res_y, res_z) 79 | self.num_blocks_x = int(np.ceil(res_x / block_res)) 80 | self.num_blocks_y = int(np.ceil(res_y / block_res)) 81 | self.num_blocks_z = int(np.ceil(res_z / block_res)) 82 | 83 | def __getitem__(self, idx): 84 | # Keep track of sample index for convenience. 85 | sample = dict(idx=idx) 86 | block_idx_x = idx // (self.num_blocks_y * self.num_blocks_z) 87 | block_idx_y = (idx // self.num_blocks_z) % self.num_blocks_y 88 | block_idx_z = idx % self.num_blocks_z 89 | xi = block_idx_x * self.block_res 90 | yi = block_idx_y * self.block_res 91 | zi = block_idx_z * self.block_res 92 | x, y, z = torch.meshgrid(self.x_grid[xi:xi+self.block_res+1], 93 | self.y_grid[yi:yi+self.block_res+1], 94 | self.z_grid[zi:zi+self.block_res+1], indexing="ij") 95 | xyz = torch.stack([x, y, z], dim=-1) 96 | sample.update(xyz=xyz) 97 | return sample 98 | 99 | def __len__(self): 100 | return self.num_blocks_x * self.num_blocks_y * self.num_blocks_z 101 | 102 | 103 | def get_lattice_grid_loader(dataset, num_workers=8): 104 | if dist.is_initialized(): 105 | sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=False) 106 | else: 107 | sampler = None 108 | return torch.utils.data.DataLoader( 109 | dataset, 110 | batch_size=1, 111 | shuffle=False, 112 | sampler=sampler, 113 | pin_memory=True, 114 | num_workers=num_workers, 115 | drop_last=False 116 | ) 117 | 118 | 119 | def marching_cubes(sdf, xyz, intv, texture_func, filter_lcc): 120 | # marching cubes 121 | V, F = mcubes.marching_cubes(sdf, 0.) 122 | if V.shape[0] > 0: 123 | V = V * intv + xyz[0, 0, 0] 124 | if texture_func is not None: 125 | C = texture_func(V) 126 | mesh = trimesh.Trimesh(V, F, vertex_colors=C) 127 | else: 128 | mesh = trimesh.Trimesh(V, F) 129 | mesh = filter_points_outside_bounding_sphere(mesh) 130 | mesh = filter_largest_cc(mesh) if filter_lcc else mesh 131 | else: 132 | mesh = trimesh.Trimesh() 133 | return mesh 134 | 135 | 136 | def filter_points_outside_bounding_sphere(old_mesh): 137 | mask = np.linalg.norm(old_mesh.vertices, axis=-1) < 1.0 138 | if np.any(mask): 139 | indices = np.ones(len(old_mesh.vertices), dtype=int) * -1 140 | indices[mask] = np.arange(mask.sum()) 141 | faces_mask = mask[old_mesh.faces[:, 0]] & mask[old_mesh.faces[:, 1]] & mask[old_mesh.faces[:, 2]] 142 | new_faces = indices[old_mesh.faces[faces_mask]] 143 | new_vertices = old_mesh.vertices[mask] 144 | new_colors = old_mesh.visual.vertex_colors[mask] 145 | new_mesh = trimesh.Trimesh(new_vertices, new_faces, vertex_colors=new_colors) 146 | else: 147 | new_mesh = trimesh.Trimesh() 148 | return new_mesh 149 | 150 | 151 | def filter_largest_cc(mesh): 152 | components = mesh.split(only_watertight=False) 153 | areas = np.array([c.area for c in components], dtype=float) 154 | if len(areas) > 0 and mesh.vertices.shape[0] > 0: 155 | new_mesh = components[areas.argmax()] 156 | else: 157 | new_mesh = trimesh.Trimesh() 158 | return new_mesh 159 | -------------------------------------------------------------------------------- /projects/neuralangelo/utils/misc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | from functools import partial 14 | import numpy as np 15 | import torch 16 | import torch.nn.functional as torch_F 17 | import imaginaire.trainers.utils 18 | from torch.optim import lr_scheduler 19 | 20 | flip_mat = np.array([ 21 | [1, 0, 0, 0], 22 | [0, -1, 0, 0], 23 | [0, 0, -1, 0], 24 | [0, 0, 0, 1] 25 | ]) 26 | 27 | 28 | def get_scheduler(cfg_opt, opt): 29 | """Return the scheduler object. 30 | 31 | Args: 32 | cfg_opt (obj): Config for the specific optimization module (gen/dis). 33 | opt (obj): PyTorch optimizer object. 34 | 35 | Returns: 36 | (obj): Scheduler 37 | """ 38 | if cfg_opt.sched.type == 'two_steps_with_warmup': 39 | warm_up_end = cfg_opt.sched.warm_up_end 40 | two_steps = cfg_opt.sched.two_steps 41 | gamma = cfg_opt.sched.gamma 42 | 43 | def sch(x): 44 | if x < warm_up_end: 45 | return x / warm_up_end 46 | else: 47 | if x > two_steps[1]: 48 | return 1.0 / gamma ** 2 49 | elif x > two_steps[0]: 50 | return 1.0 / gamma 51 | else: 52 | return 1.0 53 | 54 | scheduler = lr_scheduler.LambdaLR(opt, lambda x: sch(x)) 55 | elif cfg_opt.sched.type == 'cos_with_warmup': 56 | alpha = cfg_opt.sched.alpha 57 | max_iter = cfg_opt.sched.max_iter 58 | warm_up_end = cfg_opt.sched.warm_up_end 59 | 60 | def sch(x): 61 | if x < warm_up_end: 62 | return x / warm_up_end 63 | else: 64 | progress = (x - warm_up_end) / (max_iter - warm_up_end) 65 | learning_factor = (np.cos(np.pi * progress) + 1.0) * 0.5 * (1 - alpha) + alpha 66 | return learning_factor 67 | 68 | scheduler = lr_scheduler.LambdaLR(opt, lambda x: sch(x)) 69 | else: 70 | return imaginaire.trainers.utils.get_scheduler() 71 | return scheduler 72 | 73 | 74 | def eikonal_loss(gradients, outside=None): 75 | gradient_error = (gradients.norm(dim=-1) - 1.0) ** 2 # [B,R,N] 76 | gradient_error = gradient_error.nan_to_num(nan=0.0, posinf=0.0, neginf=0.0) # [B,R,N] 77 | if outside is not None: 78 | return (gradient_error * (~outside).float()).mean() 79 | else: 80 | return gradient_error.mean() 81 | 82 | 83 | def curvature_loss(hessian, outside=None): 84 | laplacian = hessian.sum(dim=-1).abs() # [B,R,N] 85 | laplacian = laplacian.nan_to_num(nan=0.0, posinf=0.0, neginf=0.0) # [B,R,N] 86 | if outside is not None: 87 | return (laplacian * (~outside).float()).mean() 88 | else: 89 | return laplacian.mean() 90 | 91 | 92 | def get_activation(activ, **kwargs): 93 | func = dict( 94 | identity=lambda x: x, 95 | relu=torch_F.relu, 96 | relu_=torch_F.relu_, 97 | abs=torch.abs, 98 | abs_=torch.abs_, 99 | sigmoid=torch.sigmoid, 100 | sigmoid_=torch.sigmoid_, 101 | exp=torch.exp, 102 | exp_=torch.exp_, 103 | softplus=torch_F.softplus, 104 | silu=torch_F.silu, 105 | silu_=partial(torch_F.silu, inplace=True), 106 | )[activ] 107 | return partial(func, **kwargs) 108 | 109 | 110 | def to_full_image(image, image_size=None, from_vec=True): 111 | # if from_vec is True: [B,HW,...,K] --> [B,K,H,W,...] 112 | # if from_vec is False: [B,H,W,...,K] --> [B,K,H,W,...] 113 | if from_vec: 114 | assert image_size is not None 115 | image = image.unflatten(dim=1, sizes=image_size) 116 | image = image.moveaxis(-1, 1) 117 | return image 118 | -------------------------------------------------------------------------------- /projects/neuralangelo/utils/mlp.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import numpy as np 14 | import torch 15 | import torch.nn.functional as torch_F 16 | 17 | 18 | class MLPforNeuralSDF(torch.nn.Module): 19 | 20 | def __init__(self, layer_dims, skip_connection=[], activ=None, use_layernorm=False, use_weightnorm=False, 21 | geometric_init=False, out_bias=0., invert=False): 22 | """Initialize a multi-layer perceptron with skip connection. 23 | Args: 24 | layer_dims: A list of integers representing the number of channels in each layer. 25 | skip_connection: A list of integers representing the index of layers to add skip connection. 26 | """ 27 | super().__init__() 28 | self.skip_connection = skip_connection 29 | self.use_layernorm = use_layernorm 30 | self.linears = torch.nn.ModuleList() 31 | if use_layernorm: 32 | self.layer_norm = torch.nn.ModuleList() 33 | # Hidden layers 34 | layer_dim_pairs = list(zip(layer_dims[:-1], layer_dims[1:])) 35 | for li, (k_in, k_out) in enumerate(layer_dim_pairs): 36 | if li in self.skip_connection: 37 | k_in += layer_dims[0] 38 | linear = torch.nn.Linear(k_in, k_out) 39 | if geometric_init: 40 | self._geometric_init(linear, k_in, k_out, first=(li == 0), 41 | skip_dim=(layer_dims[0] if li in self.skip_connection else 0)) 42 | if use_weightnorm: 43 | linear = torch.nn.utils.weight_norm(linear) 44 | self.linears.append(linear) 45 | if use_layernorm and li != len(layer_dim_pairs) - 1: 46 | self.layer_norm.append(torch.nn.LayerNorm(k_out)) 47 | if li == len(layer_dim_pairs) - 1: 48 | self.linears[-1].bias.data.fill_(0.0) 49 | # SDF prediction layer 50 | self.linear_sdf = torch.nn.Linear(k_in, 1) 51 | if geometric_init: 52 | self._geometric_init_sdf(self.linear_sdf, k_in, out_bias=out_bias, invert=invert) 53 | self.activ = activ or torch_F.relu_ 54 | 55 | def forward(self, input, with_sdf=True, with_feat=True): 56 | feat = input 57 | for li, linear in enumerate(self.linears): 58 | if li in self.skip_connection: 59 | feat = torch.cat([feat, input], dim=-1) 60 | if li != len(self.linears) - 1 or with_feat: 61 | feat_pre = linear(feat) 62 | if self.use_layernorm: 63 | feat_pre = self.layer_norm[li](feat_pre) 64 | feat_activ = self.activ(feat_pre) 65 | if li == len(self.linears) - 1: 66 | out = [self.linear_sdf(feat) if with_sdf else None, 67 | feat_activ if with_feat else None] 68 | feat = feat_activ 69 | return out 70 | 71 | def _geometric_init(self, linear, k_in, k_out, first=False, skip_dim=0): 72 | torch.nn.init.constant_(linear.bias, 0.0) 73 | torch.nn.init.normal_(linear.weight, 0.0, np.sqrt(2 / k_out)) 74 | if first: 75 | torch.nn.init.constant_(linear.weight[:, 3:], 0.0) # positional encodings 76 | if skip_dim: 77 | torch.nn.init.constant_(linear.weight[:, -skip_dim:], 0.0) # skip connections 78 | 79 | def _geometric_init_sdf(self, linear, k_in, out_bias=0., invert=False): 80 | torch.nn.init.normal_(linear.weight, mean=np.sqrt(np.pi / k_in), std=0.0001) 81 | torch.nn.init.constant_(linear.bias, -out_bias) 82 | if invert: 83 | linear.weight.data *= -1 84 | linear.bias.data *= -1 85 | -------------------------------------------------------------------------------- /projects/neuralangelo/utils/spherical_harmonics.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import torch 14 | 15 | 16 | SH_C0 = 0.28209479177387814 17 | SH_C1 = 0.4886025119029199 18 | SH_C2 = [ 19 | 1.0925484305920792, 20 | -1.0925484305920792, 21 | 0.31539156525252005, 22 | -1.0925484305920792, 23 | 0.5462742152960396 24 | ] 25 | SH_C3 = [ 26 | -0.5900435899266435, 27 | 2.890611442640554, 28 | -0.4570457994644658, 29 | 0.3731763325901154, 30 | -0.4570457994644658, 31 | 1.445305721320277, 32 | -0.5900435899266435 33 | ] 34 | SH_C4 = [ 35 | 2.5033429417967046, 36 | -1.7701307697799304, 37 | 0.9461746957575601, 38 | -0.6690465435572892, 39 | 0.10578554691520431, 40 | -0.6690465435572892, 41 | 0.47308734787878004, 42 | -1.7701307697799304, 43 | 0.6258357354491761, 44 | ] 45 | 46 | 47 | def get_spherical_harmonics(dirs, levels): 48 | # Evaluate spherical harmonics bases at unit directions, without taking linear combination. 49 | vals = torch.empty((*dirs.shape[:-1], (levels + 1) ** 2), device=dirs.device) 50 | vals[..., 0] = SH_C0 51 | if levels >= 1: 52 | x, y, z = dirs.unbind(-1) 53 | vals[..., 1] = -SH_C1 * y 54 | vals[..., 2] = SH_C1 * z 55 | vals[..., 3] = -SH_C1 * x 56 | if levels >= 2: 57 | xx, yy, zz = x * x, y * y, z * z 58 | xy, yz, xz = x * y, y * z, x * z 59 | vals[..., 4] = SH_C2[0] * xy 60 | vals[..., 5] = SH_C2[1] * yz 61 | vals[..., 6] = SH_C2[2] * (2.0 * zz - xx - yy) 62 | vals[..., 7] = SH_C2[3] * xz 63 | vals[..., 8] = SH_C2[4] * (xx - yy) 64 | if levels >= 3: 65 | vals[..., 9] = SH_C3[0] * y * (3 * xx - yy) 66 | vals[..., 10] = SH_C3[1] * xy * z 67 | vals[..., 11] = SH_C3[2] * y * (4 * zz - xx - yy) 68 | vals[..., 12] = SH_C3[3] * z * (2 * zz - 3 * xx - 3 * yy) 69 | vals[..., 13] = SH_C3[4] * x * (4 * zz - xx - yy) 70 | vals[..., 14] = SH_C3[5] * z * (xx - yy) 71 | vals[..., 15] = SH_C3[6] * x * (xx - 3 * yy) 72 | if levels >= 4: 73 | vals[..., 16] = SH_C4[0] * xy * (xx - yy) 74 | vals[..., 17] = SH_C4[1] * yz * (3 * xx - yy) 75 | vals[..., 18] = SH_C4[2] * xy * (7 * zz - 1) 76 | vals[..., 19] = SH_C4[3] * yz * (7 * zz - 3) 77 | vals[..., 20] = SH_C4[4] * (zz * (35 * zz - 30) + 3) 78 | vals[..., 21] = SH_C4[5] * xz * (7 * zz - 3) 79 | vals[..., 22] = SH_C4[6] * (xx - yy) * (7 * zz - 1) 80 | vals[..., 23] = SH_C4[7] * xz * (xx - 3 * yy) 81 | vals[..., 24] = SH_C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) 82 | if levels >= 5: 83 | raise NotImplementedError 84 | return vals 85 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | addict 2 | gdown 3 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch 4 | gpustat 5 | icecream 6 | imageio-ffmpeg 7 | imutils 8 | ipdb 9 | k3d 10 | kornia 11 | lpips 12 | matplotlib 13 | mediapy 14 | nvidia-ml-py3 15 | open3d 16 | opencv-python-headless 17 | OpenEXR 18 | pathlib 19 | pillow 20 | plotly 21 | pyequilib 22 | pyexr 23 | PyMCubes 24 | pyquaternion 25 | pyyaml 26 | requests 27 | scikit-image 28 | scikit-video 29 | scipy 30 | seaborn 31 | tensorboard 32 | termcolor 33 | tqdm 34 | trimesh 35 | wandb 36 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ----------------------------------------------------------------------------- 3 | Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. 4 | 5 | NVIDIA CORPORATION and its licensors retain all intellectual property 6 | and proprietary rights in and to this software, related documentation 7 | and any modifications thereto. Any use, reproduction, disclosure or 8 | distribution of this software and related documentation without an express 9 | license agreement from NVIDIA CORPORATION is strictly prohibited. 10 | ----------------------------------------------------------------------------- 11 | ''' 12 | 13 | import argparse 14 | import os 15 | 16 | import imaginaire.config 17 | from imaginaire.config import Config, recursive_update_strict, parse_cmdline_arguments 18 | from imaginaire.utils.cudnn import init_cudnn 19 | from imaginaire.utils.distributed import init_dist, get_world_size, master_only_print as print, is_master 20 | from imaginaire.utils.gpu_affinity import set_affinity 21 | from imaginaire.trainers.utils.logging import init_logging 22 | from imaginaire.trainers.utils.get_trainer import get_trainer 23 | from imaginaire.utils.set_random_seed import set_random_seed 24 | 25 | 26 | def parse_args(): 27 | parser = argparse.ArgumentParser(description='Training') 28 | parser.add_argument('--config', help='Path to the training config file.', required=True) 29 | parser.add_argument('--logdir', help='Dir for saving logs and models.', default=None) 30 | parser.add_argument('--checkpoint', default=None, help='Checkpoint path.') 31 | parser.add_argument('--seed', type=int, default=0, help='Random seed.') 32 | parser.add_argument('--local_rank', type=int, default=os.getenv('LOCAL_RANK', 0)) 33 | parser.add_argument('--single_gpu', action='store_true') 34 | parser.add_argument('--debug', action='store_true') 35 | parser.add_argument('--profile', action='store_true') 36 | parser.add_argument('--show_pbar', action='store_true') 37 | parser.add_argument('--wandb', action='store_true', help="Enable using Weights & Biases as the logger") 38 | parser.add_argument('--wandb_name', default='default', type=str) 39 | parser.add_argument('--resume', action='store_true') 40 | args, cfg_cmd = parser.parse_known_args() 41 | return args, cfg_cmd 42 | 43 | 44 | def main(): 45 | args, cfg_cmd = parse_args() 46 | set_affinity(args.local_rank) 47 | cfg = Config(args.config) 48 | 49 | cfg_cmd = parse_cmdline_arguments(cfg_cmd) 50 | recursive_update_strict(cfg, cfg_cmd) 51 | 52 | # If args.single_gpu is set to True, we will disable distributed data parallel. 53 | if not args.single_gpu: 54 | # this disables nccl timeout 55 | os.environ["NCLL_BLOCKING_WAIT"] = "0" 56 | os.environ["NCCL_ASYNC_ERROR_HANDLING"] = "0" 57 | cfg.local_rank = args.local_rank 58 | init_dist(cfg.local_rank, rank=-1, world_size=-1) 59 | print(f"Training with {get_world_size()} GPUs.") 60 | 61 | # set random seed by rank 62 | set_random_seed(args.seed, by_rank=True) 63 | 64 | # Global arguments. 65 | imaginaire.config.DEBUG = args.debug 66 | 67 | # Create log directory for storing training results. 68 | cfg.logdir = init_logging(args.config, args.logdir, makedir=True) 69 | 70 | # Print and save final config 71 | if is_master(): 72 | cfg.print_config() 73 | cfg.save_config(cfg.logdir) 74 | 75 | # Initialize cudnn. 76 | init_cudnn(cfg.cudnn.deterministic, cfg.cudnn.benchmark) 77 | 78 | # Initialize data loaders and models. 79 | trainer = get_trainer(cfg, is_inference=False, seed=args.seed) 80 | trainer.set_data_loader(cfg, split="train") 81 | trainer.set_data_loader(cfg, split="val") 82 | trainer.checkpointer.load(args.checkpoint, args.resume, load_sch=True, load_opt=True) 83 | 84 | # Initialize Wandb. 85 | trainer.init_wandb(cfg, 86 | project=args.wandb_name, 87 | mode="disabled" if args.debug or not args.wandb else "online", 88 | resume=args.resume, 89 | use_group=True) 90 | 91 | trainer.mode = 'train' 92 | # Start training. 93 | trainer.train(cfg, 94 | trainer.train_data_loader, 95 | single_gpu=args.single_gpu, 96 | profile=args.profile, 97 | show_pbar=args.show_pbar) 98 | 99 | # Finalize training. 100 | trainer.finalize(cfg) 101 | 102 | 103 | if __name__ == "__main__": 104 | main() 105 | --------------------------------------------------------------------------------