├── .github └── workflows │ ├── aws │ └── update_index.py │ ├── building.yml │ ├── code_checks.yml │ ├── cuda │ ├── Linux-env.sh │ ├── Linux.sh │ ├── Windows-env.sh │ └── Windows.sh │ ├── doc.yml │ └── publish.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── requirements.txt └── source │ ├── _static │ ├── css │ │ └── readthedocs.css │ └── images │ │ ├── coding │ │ └── gpu_util.png │ │ ├── illustration_occgrid.png │ │ ├── illustration_propnet.png │ │ ├── logo.png │ │ ├── logo4x.png │ │ ├── plot_occgrid.png │ │ ├── plot_propnet.png │ │ └── teaser.jpg │ ├── apis │ ├── estimators.rst │ ├── generated │ │ ├── nerfacc.accumulate_along_rays.rst │ │ ├── nerfacc.exclusive_prod.rst │ │ ├── nerfacc.exclusive_sum.rst │ │ ├── nerfacc.importance_sampling.rst │ │ ├── nerfacc.inclusive_prod.rst │ │ ├── nerfacc.inclusive_sum.rst │ │ ├── nerfacc.pack_info.rst │ │ ├── nerfacc.ray_aabb_intersect.rst │ │ ├── nerfacc.render_transmittance_from_alpha.rst │ │ ├── nerfacc.render_transmittance_from_density.rst │ │ ├── nerfacc.render_visibility_from_alpha.rst │ │ ├── nerfacc.render_visibility_from_density.rst │ │ ├── nerfacc.render_weight_from_alpha.rst │ │ ├── nerfacc.render_weight_from_density.rst │ │ ├── nerfacc.searchsorted.rst │ │ └── nerfacc.traverse_grids.rst │ ├── rendering.rst │ └── utils.rst │ ├── conf.py │ ├── examples │ ├── camera.rst │ ├── camera │ │ └── barf.rst │ ├── dynamic.rst │ ├── dynamic │ │ ├── kplanes.rst │ │ ├── tineuvox.rst │ │ └── tnerf.rst │ ├── static.rst │ └── static │ │ ├── nerf.rst │ │ ├── ngp.rst │ │ └── tensorf.rst │ ├── index.rst │ └── methodology │ ├── coding.rst │ └── sampling.rst ├── examples ├── datasets │ ├── __init__.py │ ├── dnerf_synthetic.py │ ├── nerf_360_v2.py │ ├── nerf_synthetic.py │ └── utils.py ├── radiance_fields │ ├── __init__.py │ ├── mlp.py │ └── ngp.py ├── requirements.txt ├── train_mlp_nerf.py ├── train_mlp_tnerf.py ├── train_ngp_nerf_occ.py ├── train_ngp_nerf_prop.py └── utils.py ├── nerfacc ├── __init__.py ├── cameras.py ├── cuda │ ├── __init__.py │ ├── _backend.py │ └── csrc │ │ ├── camera.cu │ │ ├── grid.cu │ │ ├── include │ │ ├── data_spec.hpp │ │ ├── data_spec_packed.cuh │ │ ├── utils.cub.cuh │ │ ├── utils_camera.cuh │ │ ├── utils_contraction.cuh │ │ ├── utils_cuda.cuh │ │ ├── utils_grid.cuh │ │ ├── utils_math.cuh │ │ └── utils_scan.cuh │ │ ├── nerfacc.cpp │ │ ├── pdf.cu │ │ ├── scan.cu │ │ └── scan_cub.cu ├── data_specs.py ├── estimators │ ├── __init__.py │ ├── base.py │ ├── n3tree.py │ ├── occ_grid.py │ ├── prop_net.py │ └── vdb.py ├── grid.py ├── losses.py ├── pack.py ├── pdf.py ├── scan.py ├── version.py └── volrend.py ├── scripts ├── run_aws_listing.py ├── run_dev_checks.py └── run_profiler.py ├── setup.cfg ├── setup.py └── tests ├── test_camera.py ├── test_grid.py ├── test_pack.py ├── test_pdf.py ├── test_rendering.py ├── test_scan.py └── test_vdb.py /.github/workflows/aws/update_index.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | import boto3 4 | 5 | ROOT_URL = 'https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl' 6 | html = '\n\n\n{}\n\n' 7 | href = ' {}
' 8 | args = { 9 | 'ContentType': 'text/html', 10 | 'CacheControl': 'max-age=300', 11 | 'ACL': 'public-read', 12 | } 13 | 14 | bucket = boto3.resource('s3').Bucket(name='nerfacc-bucket') 15 | 16 | wheels_dict = defaultdict(list) 17 | for obj in bucket.objects.filter(Prefix='whl'): 18 | if obj.key[-3:] != 'whl': 19 | continue 20 | torch_version, wheel = obj.key.split('/')[-2:] 21 | wheel = f'{torch_version}/{wheel}' 22 | wheels_dict[torch_version].append(wheel) 23 | 24 | index_html = html.format('\n'.join([ 25 | href.format(f'{torch_version}.html'.replace('+', '%2B'), version) 26 | for version in wheels_dict 27 | ])) 28 | 29 | with open('index.html', 'w') as f: 30 | f.write(index_html) 31 | bucket.Object('whl/index.html').upload_file('index.html', args) 32 | 33 | for torch_version, wheel_names in wheels_dict.items(): 34 | torch_version_html = html.format('\n'.join([ 35 | href.format(f'{ROOT_URL}/{wheel_name}'.replace('+', '%2B'), wheel_name) 36 | for wheel_name in wheel_names 37 | ])) 38 | 39 | with open(f'{torch_version}.html', 'w') as f: 40 | f.write(torch_version_html) 41 | bucket.Object(f'whl/{torch_version}.html').upload_file( 42 | f'{torch_version}.html', args) -------------------------------------------------------------------------------- /.github/workflows/building.yml: -------------------------------------------------------------------------------- 1 | name: Building Wheels 2 | 3 | on: [workflow_dispatch] 4 | 5 | jobs: 6 | 7 | wheel: 8 | runs-on: ${{ matrix.os }} 9 | environment: production 10 | 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | os: [ubuntu-20.04, windows-2019] 15 | # support version based on: https://download.pytorch.org/whl/torch/ 16 | python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] 17 | torch-version: [1.11.0, 1.12.0, 1.13.0, 2.0.0] 18 | cuda-version: ['cu113', 'cu115', 'cu116', 'cu117', 'cu118'] 19 | exclude: 20 | - torch-version: 1.11.0 21 | python-version: '3.11' 22 | - torch-version: 1.11.0 23 | cuda-version: 'cu116' 24 | - torch-version: 1.11.0 25 | cuda-version: 'cu117' 26 | - torch-version: 1.11.0 27 | cuda-version: 'cu118' 28 | 29 | - torch-version: 1.12.0 30 | python-version: '3.11' 31 | - torch-version: 1.12.0 32 | cuda-version: 'cu115' 33 | - torch-version: 1.12.0 34 | cuda-version: 'cu117' 35 | - torch-version: 1.12.0 36 | cuda-version: 'cu118' 37 | 38 | - torch-version: 1.13.0 39 | cuda-version: 'cu102' 40 | - torch-version: 1.13.0 41 | cuda-version: 'cu113' 42 | - torch-version: 1.13.0 43 | cuda-version: 'cu115' 44 | - torch-version: 1.13.0 45 | cuda-version: 'cu118' 46 | 47 | - torch-version: 2.0.0 48 | python-version: '3.7' 49 | - torch-version: 2.0.0 50 | cuda-version: 'cu102' 51 | - torch-version: 2.0.0 52 | cuda-version: 'cu113' 53 | - torch-version: 2.0.0 54 | cuda-version: 'cu115' 55 | - torch-version: 2.0.0 56 | cuda-version: 'cu116' 57 | 58 | - os: windows-2019 59 | cuda-version: 'cu102' 60 | - os: windows-2019 61 | torch-version: 1.13.0 62 | python-version: '3.11' 63 | 64 | # - os: windows-2019 65 | # torch-version: 1.13.0 66 | # cuda-version: 'cu117' 67 | # python-version: '3.9' 68 | 69 | 70 | 71 | steps: 72 | - uses: actions/checkout@v3 73 | 74 | - name: Set up Python ${{ matrix.python-version }} 75 | uses: actions/setup-python@v4 76 | with: 77 | python-version: ${{ matrix.python-version }} 78 | 79 | - name: Upgrade pip 80 | run: | 81 | pip install --upgrade setuptools 82 | pip install ninja 83 | 84 | - name: Free up disk space 85 | if: ${{ runner.os == 'Linux' }} 86 | run: | 87 | sudo rm -rf /usr/share/dotnet 88 | 89 | - name: Install CUDA ${{ matrix.cuda-version }} 90 | if: ${{ matrix.cuda-version != 'cpu' }} 91 | run: | 92 | bash .github/workflows/cuda/${{ runner.os }}.sh ${{ matrix.cuda-version }} 93 | 94 | - name: Install PyTorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }} 95 | run: | 96 | pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/${{ matrix.cuda-version }} 97 | python -c "import torch; print('PyTorch:', torch.__version__)" 98 | python -c "import torch; print('CUDA:', torch.version.cuda)" 99 | python -c "import torch; print('CUDA Available:', torch.cuda.is_available())" 100 | 101 | - name: Patch PyTorch static constexpr on Windows 102 | if: ${{ runner.os == 'Windows' }} 103 | run: | 104 | Torch_DIR=`python -c 'import os; import torch; print(os.path.dirname(torch.__file__))'` 105 | sed -i '31,38c\ 106 | TORCH_API void lazy_init_num_threads();' ${Torch_DIR}/include/ATen/Parallel.h 107 | shell: bash 108 | 109 | - name: Set version 110 | if: ${{ runner.os != 'macOS' }} 111 | run: | 112 | VERSION=`sed -n 's/^__version__ = "\(.*\)"/\1/p' nerfacc/version.py` 113 | TORCH_VERSION=`echo "pt${{ matrix.torch-version }}" | sed "s/..$//" | sed "s/\.//g"` 114 | CUDA_VERSION=`echo ${{ matrix.cuda-version }}` 115 | echo "New version name: $VERSION+$TORCH_VERSION$CUDA_VERSION" 116 | sed -i "s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/" nerfacc/version.py 117 | shell: 118 | bash 119 | 120 | - name: Install main package for CPU 121 | if: ${{ matrix.cuda-version == 'cpu' }} 122 | run: | 123 | BUILD_NO_CUDA=1 pip install . 124 | 125 | - name: Build wheel 126 | run: | 127 | pip install wheel 128 | source .github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} 129 | python setup.py bdist_wheel --dist-dir=dist 130 | shell: bash # `source` does not exist in windows powershell 131 | 132 | - name: Test wheel 133 | run: | 134 | cd dist 135 | ls -lah 136 | pip install *.whl 137 | python -c "import nerfacc; print('nerfacc:', nerfacc.__version__)" 138 | cd .. 139 | shell: bash # `ls -lah` does not exist in windows powershell 140 | 141 | - name: Configure AWS 142 | uses: aws-actions/configure-aws-credentials@v2 143 | with: 144 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 145 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 146 | aws-region: us-west-2 147 | 148 | - name: Upload wheel 149 | run: | 150 | aws s3 sync dist s3://nerfacc-bucket/whl/torch-${{ matrix.torch-version }}_${{ matrix.cuda-version }} --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers 151 | 152 | update_aws_listing: 153 | needs: [wheel] 154 | runs-on: ubuntu-latest 155 | environment: production 156 | 157 | steps: 158 | - uses: actions/checkout@v3 159 | 160 | - name: Set up Python 161 | uses: actions/setup-python@v4 162 | with: 163 | python-version: 3.9 164 | 165 | - name: Upgrade pip 166 | run: | 167 | pip install boto3 168 | 169 | - name: Update AWS listing 170 | run: | 171 | python scripts/run_aws_listing.py \ 172 | --bucket="nerfacc-bucket" \ 173 | --region="us-west-2" 174 | env: 175 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 176 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 177 | -------------------------------------------------------------------------------- /.github/workflows/code_checks.yml: -------------------------------------------------------------------------------- 1 | name: Core Tests. 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Set up Python 3.9 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: "3.9" 22 | - name: Install dependencies 23 | run: | 24 | pip install isort==5.10.1 black[jupyter]==22.3.0 25 | - name: Run isort 26 | run: isort docs/ nerfacc/ scripts/ examples/ tests/ --profile black --skip examples/pycolmap --line-length 80 --check 27 | - name: Run Black 28 | run: black docs/ nerfacc/ scripts/ examples/ tests/ --exclude examples/pycolmap --line-length 80 --check 29 | # - name: Python Pylint 30 | # run: | 31 | # pylint nerfacc/ tests/ scripts/ examples/ 32 | -------------------------------------------------------------------------------- /.github/workflows/cuda/Linux-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Took from https://github.com/pyg-team/pyg-lib/ 4 | 5 | case ${1} in 6 | cu118) 7 | export CUDA_HOME=/usr/local/cuda-11.8 8 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} 9 | export PATH=${CUDA_HOME}/bin:${PATH} 10 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" 11 | ;; 12 | cu117) 13 | export CUDA_HOME=/usr/local/cuda-11.7 14 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} 15 | export PATH=${CUDA_HOME}/bin:${PATH} 16 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" 17 | ;; 18 | cu116) 19 | export CUDA_HOME=/usr/local/cuda-11.6 20 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} 21 | export PATH=${CUDA_HOME}/bin:${PATH} 22 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" 23 | ;; 24 | cu115) 25 | export CUDA_HOME=/usr/local/cuda-11.5 26 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} 27 | export PATH=${CUDA_HOME}/bin:${PATH} 28 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" 29 | ;; 30 | cu113) 31 | export CUDA_HOME=/usr/local/cuda-11.3 32 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} 33 | export PATH=${CUDA_HOME}/bin:${PATH} 34 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" 35 | ;; 36 | cu102) 37 | export CUDA_HOME=/usr/local/cuda-10.2 38 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} 39 | export PATH=${CUDA_HOME}/bin:${PATH} 40 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" 41 | ;; 42 | *) 43 | ;; 44 | esac -------------------------------------------------------------------------------- /.github/workflows/cuda/Linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Took from https://github.com/pyg-team/pyg-lib/ 4 | 5 | OS=ubuntu2004 6 | 7 | case ${1} in 8 | cu118) 9 | CUDA=11.8 10 | APT_KEY=${OS}-${CUDA/./-}-local 11 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.0-520.61.05-1_amd64.deb 12 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.0/local_installers 13 | ;; 14 | cu117) 15 | CUDA=11.7 16 | APT_KEY=${OS}-${CUDA/./-}-local 17 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.1-515.65.01-1_amd64.deb 18 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.1/local_installers 19 | ;; 20 | cu116) 21 | CUDA=11.6 22 | APT_KEY=${OS}-${CUDA/./-}-local 23 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.2-510.47.03-1_amd64.deb 24 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.2/local_installers 25 | ;; 26 | cu115) 27 | CUDA=11.5 28 | APT_KEY=${OS}-${CUDA/./-}-local 29 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.2-495.29.05-1_amd64.deb 30 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.2/local_installers 31 | ;; 32 | cu113) 33 | CUDA=11.3 34 | APT_KEY=${OS}-${CUDA/./-}-local 35 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.0-465.19.01-1_amd64.deb 36 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.0/local_installers 37 | ;; 38 | cu102) 39 | CUDA=10.2 40 | APT_KEY=${CUDA/./-}-local-${CUDA}.89-440.33.01 41 | FILENAME=cuda-repo-${OS}-${APT_KEY}_1.0-1_amd64.deb 42 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}/Prod/local_installers 43 | ;; 44 | *) 45 | echo "Unrecognized CUDA_VERSION=${1}" 46 | exit 1 47 | ;; 48 | esac 49 | 50 | wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin 51 | sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 52 | wget -nv ${URL}/${FILENAME} 53 | sudo dpkg -i ${FILENAME} 54 | 55 | if [ "${1}" = "cu117" ] || [ "${1}" = "cu118" ]; then 56 | sudo cp /var/cuda-repo-${APT_KEY}/cuda-*-keyring.gpg /usr/share/keyrings/ 57 | else 58 | sudo apt-key add /var/cuda-repo-${APT_KEY}/7fa2af80.pub 59 | fi 60 | 61 | sudo apt-get update 62 | sudo apt-get -y install cuda 63 | 64 | rm -f ${FILENAME} -------------------------------------------------------------------------------- /.github/workflows/cuda/Windows-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Took from https://github.com/pyg-team/pyg-lib/ 4 | 5 | case ${1} in 6 | cu118) 7 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.8 8 | PATH=${CUDA_HOME}/bin:$PATH 9 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH 10 | export TORCH_CUDA_ARCH_LIST="6.0+PTX" 11 | ;; 12 | cu117) 13 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.7 14 | PATH=${CUDA_HOME}/bin:$PATH 15 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH 16 | export TORCH_CUDA_ARCH_LIST="6.0+PTX" 17 | ;; 18 | cu116) 19 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.6 20 | PATH=${CUDA_HOME}/bin:$PATH 21 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH 22 | export TORCH_CUDA_ARCH_LIST="6.0+PTX" 23 | ;; 24 | cu115) 25 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.5 26 | PATH=${CUDA_HOME}/bin:$PATH 27 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH 28 | export TORCH_CUDA_ARCH_LIST="6.0+PTX" 29 | ;; 30 | cu113) 31 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3 32 | PATH=${CUDA_HOME}/bin:$PATH 33 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH 34 | export TORCH_CUDA_ARCH_LIST="6.0+PTX" 35 | ;; 36 | *) 37 | ;; 38 | esac -------------------------------------------------------------------------------- /.github/workflows/cuda/Windows.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Took from https://github.com/pyg-team/pyg-lib/ 4 | 5 | # Install NVIDIA drivers, see: 6 | # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102 7 | curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip" 8 | 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32" 9 | 10 | case ${1} in 11 | cu118) 12 | CUDA_SHORT=11.8 13 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers 14 | CUDA_FILE=cuda_${CUDA_SHORT}.0_522.06_windows.exe 15 | ;; 16 | cu117) 17 | CUDA_SHORT=11.7 18 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers 19 | CUDA_FILE=cuda_${CUDA_SHORT}.1_516.94_windows.exe 20 | ;; 21 | cu116) 22 | CUDA_SHORT=11.3 23 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers 24 | CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe 25 | ;; 26 | cu115) 27 | CUDA_SHORT=11.3 28 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers 29 | CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe 30 | ;; 31 | cu113) 32 | CUDA_SHORT=11.3 33 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers 34 | CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe 35 | ;; 36 | *) 37 | echo "Unrecognized CUDA_VERSION=${1}" 38 | exit 1 39 | ;; 40 | esac 41 | 42 | curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}" 43 | echo "" 44 | echo "Installing from ${CUDA_FILE}..." 45 | PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow" 46 | echo "Done!" 47 | rm -f "${CUDA_FILE}" -------------------------------------------------------------------------------- /.github/workflows/doc.yml: -------------------------------------------------------------------------------- 1 | name: Docs 2 | on: 3 | push: 4 | branches: [main] 5 | pull_request: 6 | branches: [main] 7 | workflow_dispatch: 8 | 9 | permissions: 10 | contents: write 11 | jobs: 12 | docs: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | with: 17 | submodules: 'recursive' 18 | - uses: actions/setup-python@v3 19 | with: 20 | python-version: '3.9' 21 | - name: Install dependencies 22 | run: | 23 | pip install -r docs/requirements.txt 24 | pip install torch --index-url https://download.pytorch.org/whl/cpu 25 | BUILD_NO_CUDA=1 pip install . 26 | - name: Sphinx build 27 | run: | 28 | sphinx-build docs/source _build 29 | - name: Deploy 30 | uses: peaceiris/actions-gh-pages@v3 31 | with: 32 | publish_branch: gh-pages 33 | github_token: ${{ secrets.GITHUB_TOKEN }} 34 | publish_dir: _build/ 35 | force_orphan: true 36 | cname: www.nerfacc.com 37 | if: github.event_name != 'pull_request' -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | branches: [master] 10 | 11 | jobs: 12 | deploy: 13 | runs-on: ubuntu-latest 14 | environment: production 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Set up Python 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: '3.7' 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install build twine 25 | - name: Strip unsupported tags in README 26 | run: | 27 | sed -i '//,//d' README.md 28 | - name: Build and publish 29 | env: 30 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 31 | run: | 32 | BUILD_NO_CUDA=1 python -m build 33 | twine upload --username __token__ --password $PYPI_TOKEN dist/* -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Visual Studio Code configs. 2 | .vscode/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | # lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | 109 | .DS_Store 110 | 111 | # Direnv config. 112 | .envrc 113 | 114 | # line_profiler 115 | *.lprof 116 | 117 | # vscode 118 | .vsocde 119 | 120 | outputs/ 121 | data 122 | 123 | benchmarks/* 124 | !benchmarks/barf 125 | !benchmarks/kplanes 126 | !benchmarks/tensorf 127 | !benchmarks/tineuvox -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "examples/pycolmap"] 2 | path = examples/pycolmap 3 | url = https://github.com/rmbrualla/pycolmap.git 4 | 5 | [submodule "benchmarks/tensorf"] 6 | path = benchmarks/tensorf 7 | url = https://github.com/liruilong940607/tensorf.git 8 | branch = nerfacc 9 | 10 | [submodule "benchmarks/barf"] 11 | path = benchmarks/barf 12 | url = https://github.com/liruilong940607/barf.git 13 | branch = nerfacc 14 | 15 | [submodule "benchmarks/tineuvox"] 16 | path = benchmarks/tineuvox 17 | url = https://github.com/liruilong940607/tineuvox.git 18 | branch = nerfacc 19 | 20 | [submodule "benchmarks/kplanes"] 21 | path = benchmarks/kplanes 22 | url = https://github.com/liruilong940607/kplanes.git 23 | branch = nerfacc 24 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | - id: trailing-whitespace 7 | - id: check-yaml 8 | - id: check-merge-conflict 9 | - id: requirements-txt-fixer 10 | - repo: https://github.com/psf/black 11 | rev: 22.10.0 12 | hooks: 13 | - id: black 14 | language_version: python3.8.12 15 | args: # arguments to configure black 16 | - --line-length=80 17 | 18 | - repo: https://github.com/pycqa/isort 19 | rev: 5.10.1 20 | hooks: 21 | - id: isort 22 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-20.04 5 | tools: 6 | python: "3.9" 7 | 8 | sphinx: 9 | fail_on_warning: true 10 | configuration: docs/source/conf.py 11 | 12 | python: 13 | install: 14 | # Equivalent to 'pip install .' 15 | - method: pip 16 | path: . 17 | # Equivalent to 'pip install -r docs/requirements.txt' 18 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | # Change Log 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](http://keepachangelog.com/) 6 | and this project adheres to [Semantic Versioning](http://semver.org/). 7 | 8 | ## [0.5.0] - 2023-04-04 9 | 10 | This is a major upgrade in which 90% of the code has been rewritten. In this version 11 | we achieves: 12 | 13 | ![Teaser](/docs/source/_static/images/teaser.jpg?raw=true) 14 | 15 | Links: 16 | - Documentation: https://www.nerfacc.com/en/v0.5.0/ 17 | - ArXiv Report: Coming Soon. 18 | 19 | Methodologies: 20 | - Upgrade Occupancy Grid to support multiple levels. 21 | - Support Proposal Network from Mip-NeRF 360. 22 | - Update examples on unbounded scenes to use Multi-level Occupancy Grid or Proposal Network. 23 | - Contraction for Occupancy Grid is no longer supported due to it's inefficiency for ray traversal. 24 | 25 | API Changes: 26 | - [Changed] `OccupancyGrid()` -> `OccGridEstimator()`. 27 | - [Added] Argument `levels=1` for multi-level support. 28 | - [Added] Function `self.sampling()` that does basically the same thing with the old `nerfacc.ray_marching`. 29 | - [Renamed] Function `self.every_n_step()` -> `self.update_every_n_steps()` 30 | - [Added] `PropNetEstimator()`. With functions `self.sampling()`, `self.update_every_n_steps()` 31 | and `self.compute_loss()`. 32 | - [Removed] `ray_marching()`. Ray marching is now implemented through calling `sampling()` of 33 | the `OccGridEstimator()` / `PropNetEstimator()`. 34 | - [Changed] `ray_aabb_intersect()` now supports multiple aabb, and supports new argument `near_plane`, `far_plane`, `miss_value`. 35 | - [Changed] `render_*_from_*()`. The input shape changes from `(all_samples, 1)` to `(all_samples)`. And the function will returns all intermediate results so it might be a tuple. 36 | - [Changed] `rendering()`. The input shape changes from `(all_samples, 1)` to `(all_samples)`, including the shape assumption for the `rgb_sigma_fn` and `rgb_alpha_fn`. Be aware of this shape change. 37 | - [Changed] `accumulate_along_rays()`. The shape of the `weights` in the inputs should be `(all_samples)` now. 38 | - [Removed] `unpack_info()`, `pack_data()`, `unpack_data()` are temporally removed due to in-compatibility 39 | with the new backend implementation. Will add them back later. 40 | - [Added] Some basic functions that support both batched tensor and flattened tensor: `inclusive_prod()`, `inclusive_sum()`, `exclusive_prod()`, `exclusive_sum()`, `importance_sampling()`, `searchsorted()`. 41 | 42 | Examples & Benchmarks: 43 | - More benchmarks and examples. See folder `examples/` and `benchmarks/`. 44 | 45 | ## [0.3.5] - 2023-02-23 46 | 47 | A stable version that achieves: 48 | - The vanilla Nerf model with 8-layer MLPs can be trained to better quality (+0.5 PNSR) in 1 hour rather than days as in the paper. 49 | - The Instant-NGP Nerf model can be trained to equal quality in 4.5 minutes, comparing to the official pure-CUDA implementation. 50 | - The D-Nerf model for dynamic objects can also be trained in 1 hour rather than 2 days as in the paper, and with better quality (+~2.5 PSNR). 51 | - Both bounded and unbounded scenes are supported. 52 | 53 | Links: 54 | - Documentation: https://www.nerfacc.com/en/v0.3.5/ 55 | - ArXiv Report: https://arxiv.org/abs/2210.04847v2/ 56 | 57 | Methodologies: 58 | - Single resolution `nerfacc.OccupancyGrid` for synthetic scenes. 59 | - Contraction methods `nerfacc.ContractionType` for unbounded scenes. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Ruilong Li 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | THIS SOFTWARE AND/OR DATA WAS DEPOSITED IN THE BAIR OPEN RESEARCH COMMONS 24 | REPOSITORY ON 05/08/2023. 25 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include nerfacc/cuda/csrc/include/* 2 | include nerfacc/cuda/csrc/* 3 | 4 | include nerfacc/_cuda/csrc/include/* 5 | include nerfacc/_cuda/csrc/* 6 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorch_sphinx_theme @ git+https://github.com/liruilong940607/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 2 | sphinx==5.2.1 3 | sphinx-copybutton==0.5.0 4 | sphinx-design==0.2.0 -------------------------------------------------------------------------------- /docs/source/_static/css/readthedocs.css: -------------------------------------------------------------------------------- 1 | .header-logo { 2 | background-image: url("../images/logo4x.png"); 3 | background-size: 156px 35px; 4 | height: 35px; 5 | width: 156px; 6 | } 7 | code { 8 | word-break: normal; 9 | } -------------------------------------------------------------------------------- /docs/source/_static/images/coding/gpu_util.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/coding/gpu_util.png -------------------------------------------------------------------------------- /docs/source/_static/images/illustration_occgrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/illustration_occgrid.png -------------------------------------------------------------------------------- /docs/source/_static/images/illustration_propnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/illustration_propnet.png -------------------------------------------------------------------------------- /docs/source/_static/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/logo.png -------------------------------------------------------------------------------- /docs/source/_static/images/logo4x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/logo4x.png -------------------------------------------------------------------------------- /docs/source/_static/images/plot_occgrid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/plot_occgrid.png -------------------------------------------------------------------------------- /docs/source/_static/images/plot_propnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/plot_propnet.png -------------------------------------------------------------------------------- /docs/source/_static/images/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/teaser.jpg -------------------------------------------------------------------------------- /docs/source/apis/estimators.rst: -------------------------------------------------------------------------------- 1 | .. _`Estimators`: 2 | 3 | Estimators 4 | =================================== 5 | 6 | .. currentmodule:: nerfacc 7 | 8 | .. autoclass:: OccGridEstimator 9 | :members: 10 | 11 | .. autoclass:: PropNetEstimator 12 | :members: 13 | -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.accumulate_along_rays.rst: -------------------------------------------------------------------------------- 1 | nerfacc.accumulate\_along\_rays 2 | =============================== 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: accumulate_along_rays -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.exclusive_prod.rst: -------------------------------------------------------------------------------- 1 | nerfacc.exclusive\_prod 2 | ======================= 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: exclusive_prod -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.exclusive_sum.rst: -------------------------------------------------------------------------------- 1 | nerfacc.exclusive\_sum 2 | ====================== 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: exclusive_sum -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.importance_sampling.rst: -------------------------------------------------------------------------------- 1 | nerfacc.importance\_sampling 2 | ============================ 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: importance_sampling -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.inclusive_prod.rst: -------------------------------------------------------------------------------- 1 | nerfacc.inclusive\_prod 2 | ======================= 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: inclusive_prod -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.inclusive_sum.rst: -------------------------------------------------------------------------------- 1 | nerfacc.inclusive\_sum 2 | ====================== 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: inclusive_sum -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.pack_info.rst: -------------------------------------------------------------------------------- 1 | nerfacc.pack\_info 2 | ================== 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: pack_info -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.ray_aabb_intersect.rst: -------------------------------------------------------------------------------- 1 | nerfacc.ray\_aabb\_intersect 2 | ============================ 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: ray_aabb_intersect -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.render_transmittance_from_alpha.rst: -------------------------------------------------------------------------------- 1 | nerfacc.render\_transmittance\_from\_alpha 2 | ========================================== 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: render_transmittance_from_alpha -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.render_transmittance_from_density.rst: -------------------------------------------------------------------------------- 1 | nerfacc.render\_transmittance\_from\_density 2 | ============================================ 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: render_transmittance_from_density -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.render_visibility_from_alpha.rst: -------------------------------------------------------------------------------- 1 | nerfacc.render\_visibility\_from\_alpha 2 | ======================================= 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: render_visibility_from_alpha -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.render_visibility_from_density.rst: -------------------------------------------------------------------------------- 1 | nerfacc.render\_visibility\_from\_density 2 | ========================================= 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: render_visibility_from_density -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.render_weight_from_alpha.rst: -------------------------------------------------------------------------------- 1 | nerfacc.render\_weight\_from\_alpha 2 | =================================== 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: render_weight_from_alpha -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.render_weight_from_density.rst: -------------------------------------------------------------------------------- 1 | nerfacc.render\_weight\_from\_density 2 | ===================================== 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: render_weight_from_density -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.searchsorted.rst: -------------------------------------------------------------------------------- 1 | nerfacc.searchsorted 2 | ==================== 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: searchsorted -------------------------------------------------------------------------------- /docs/source/apis/generated/nerfacc.traverse_grids.rst: -------------------------------------------------------------------------------- 1 | nerfacc.traverse\_grids 2 | ======================= 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: traverse_grids -------------------------------------------------------------------------------- /docs/source/apis/rendering.rst: -------------------------------------------------------------------------------- 1 | Rendering 2 | =================================== 3 | 4 | .. currentmodule:: nerfacc 5 | 6 | .. autofunction:: rendering 7 | -------------------------------------------------------------------------------- /docs/source/apis/utils.rst: -------------------------------------------------------------------------------- 1 | Utils 2 | =================================== 3 | 4 | Below are the basic functions that supports sampling and rendering. 5 | 6 | .. currentmodule:: nerfacc 7 | 8 | .. autosummary:: 9 | :nosignatures: 10 | :toctree: generated/ 11 | 12 | inclusive_prod 13 | exclusive_prod 14 | inclusive_sum 15 | exclusive_sum 16 | 17 | pack_info 18 | 19 | render_visibility_from_alpha 20 | render_visibility_from_density 21 | render_weight_from_alpha 22 | render_weight_from_density 23 | render_transmittance_from_alpha 24 | render_transmittance_from_density 25 | accumulate_along_rays 26 | 27 | importance_sampling 28 | searchsorted 29 | 30 | ray_aabb_intersect 31 | traverse_grids 32 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | import pytorch_sphinx_theme 2 | 3 | __version__ = None 4 | exec(open("../../nerfacc/version.py", "r").read()) 5 | 6 | # -- Project information 7 | 8 | project = "nerfacc" 9 | copyright = "2022, Ruilong" 10 | author = "Ruilong" 11 | 12 | release = __version__ 13 | 14 | # -- General configuration 15 | 16 | extensions = [ 17 | "sphinx.ext.napoleon", 18 | "sphinx.ext.duration", 19 | "sphinx.ext.doctest", 20 | "sphinx.ext.autodoc", 21 | "sphinx.ext.autosummary", 22 | "sphinx.ext.intersphinx", 23 | ] 24 | 25 | intersphinx_mapping = { 26 | "python": ("https://docs.python.org/3/", None), 27 | "sphinx": ("https://www.sphinx-doc.org/en/master/", None), 28 | } 29 | intersphinx_disabled_domains = ["std"] 30 | 31 | templates_path = ["_templates"] 32 | 33 | # -- Options for HTML output 34 | 35 | # html_theme = "furo" 36 | 37 | html_theme = "pytorch_sphinx_theme" 38 | html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] 39 | html_static_path = ["_static"] 40 | html_css_files = ["css/readthedocs.css"] 41 | 42 | # Ignore >>> when copying code 43 | copybutton_prompt_text = r">>> |\.\.\. " 44 | copybutton_prompt_is_regexp = True 45 | 46 | # Theme options are theme-specific and customize the look and feel of a theme 47 | # further. For a list of options available for each theme, see the 48 | # documentation. 49 | html_theme_options = { 50 | # The target url that the logo directs to. Unset to do nothing 51 | "logo_url": "https://www.nerfacc.com/", 52 | # "menu" is a list of dictionaries where you can specify the content and the 53 | # behavior of each item in the menu. Each item can either be a link or a 54 | # dropdown menu containing a list of links. 55 | "menu": [ 56 | # A link 57 | { 58 | "name": "GitHub", 59 | "url": "https://github.com/nerfstudio-project/nerfacc", 60 | }, 61 | # A dropdown menu 62 | # { 63 | # "name": "Projects", 64 | # "children": [ 65 | # # A vanilla dropdown item 66 | # { 67 | # "name": "nerfstudio", 68 | # "url": "https://docs.nerf.studio/", 69 | # "description": "The all-in-one repo for NeRFs", 70 | # }, 71 | # ], 72 | # # Optional, determining whether this dropdown menu will always be 73 | # # highlighted. 74 | # # "active": True, 75 | # }, 76 | ], 77 | } 78 | # html_theme_options = { 79 | # "canonical_url": "", 80 | # "analytics_id": "", 81 | # "logo_only": False, 82 | # "display_version": True, 83 | # "prev_next_buttons_location": "bottom", 84 | # "style_external_links": False, 85 | # # Toc options 86 | # "collapse_navigation": True, 87 | # "sticky_navigation": True, 88 | # "navigation_depth": 4, 89 | # "includehidden": True, 90 | # "titles_only": False 91 | # } 92 | 93 | # -- Options for EPUB output 94 | epub_show_urls = "footnote" 95 | 96 | # typehints 97 | autodoc_typehints = "description" 98 | -------------------------------------------------------------------------------- /docs/source/examples/camera.rst: -------------------------------------------------------------------------------- 1 | Camera Optimization NeRFs 2 | =================================== 3 | 4 | Performance Overview 5 | -------------------- 6 | 7 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+------------------------+------------------------+ 8 | | Methods | Dataset | Training Time :math:`\downarrow` | PSNR :math:`\uparrow` | LPIPS :math:`\downarrow` | E_R :math:`\downarrow` | E_T :math:`\downarrow` | 9 | +======================+================+==================================+=======================+==========================+========================+========================+ 10 | | BARF `[1]`_ | NeRF-Synthetic | 586min | 28.83 | 0.054 | 0.19 | 0.74 | 11 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+------------------------+------------------------+ 12 | | *+nerfacc (occgrid)* | | 130min | 30.11 | 0.044 | 0.07 | 0.35 | 13 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+------------------------+------------------------+ 14 | 15 | Implementation Details 16 | ---------------------- 17 | 18 | .. toctree:: 19 | :glob: 20 | :maxdepth: 1 21 | 22 | camera/* 23 | 24 | .. _`[1]`: https://arxiv.org/abs/2104.06405 25 | -------------------------------------------------------------------------------- /docs/source/examples/camera/barf.rst: -------------------------------------------------------------------------------- 1 | BARF 2 | ==================== 3 | 4 | In this example we showcase how to plug the nerfacc library into the *official* codebase 5 | of `BARF `_. See 6 | `our forked repo `_ 7 | for details. 8 | 9 | 10 | Benchmark: NeRF-Synthetic Dataset 11 | --------------------------------- 12 | *updated on 2023-04-04 with nerfacc==0.5.0* 13 | 14 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 15 | 16 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 17 | | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN | 18 | | | | | | | | | | | | 19 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+ 20 | | BARF | 31.16 | 23.87 | 26.28 | 34.48 | 28.4 | 27.86 | 31.07 | 27.55 | 28.83 | 21 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 22 | | training time | 9.5hrs| 9.5hrs| 9.2hrs | 9.3hrs|12.3hrs| 9.3hrs| 9.3hrs| 9.5hrs| 9.8hrs| 23 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 24 | | camera errors (R) | 0.105 | 0.047 | 0.085 | 0.226 | 0.071 | 0.846 | 0.068 | 0.089 | 0.192 | 25 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 26 | | camera errors (T) | 0.0043| 0.0021| 0.0040 | 0.0120| 0.0026| 0.0272| 0.0025| 0.0044| 0.0074| 27 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 28 | | Ours (occ) | 32.25 | 24.77 | 27.73 | 35.84 | 29.98 | 28.83 | 32.84 | 28.62 | 30.11 | 29 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 30 | | training time | 1.5hrs| 2.0hrs| 2.0hrs | 2.3hrs| 2.2hrs| 1.9hrs| 2.2hrs| 2.3hrs| 2.0hrs| 31 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 32 | | camera errors (R) | 0.081 | 0.036 | 0.056 | 0.171 | 0.058 | 0.039 | 0.039 | 0.079 | 0.070 | 33 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 34 | | camera errors (T) | 0.0038| 0.0019| 0.0031 | 0.0106| 0.0021| 0.0013| 0.0014| 0.0041| 0.0035| 35 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 36 | -------------------------------------------------------------------------------- /docs/source/examples/dynamic.rst: -------------------------------------------------------------------------------- 1 | Dynamic NeRFs 2 | =================================== 3 | 4 | 5 | The :class:`nerfacc.PropNetEstimator` can natually work with dynamic NeRFs. To make the 6 | :class:`nerfacc.OccGridEstimator` also work with dynamic NeRFs, we need to make some compromises. 7 | In these examples, we use the :class:`nerfacc.OccGridEstimator` to estimate the 8 | `maximum` opacity at each area `over all the timestamps`. This allows us to share the same estimator 9 | across all the timestamps, including those timestamps that are not in the training set. 10 | In other words, we use it to cache the union of the occupancy at all timestamps. 11 | It is not optimal but still makes the rendering very efficient if the motion is not crazyly significant. 12 | 13 | 14 | Performance Overview 15 | -------------------- 16 | *updated on 2023-04-04* 17 | 18 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 19 | | Methods | Dataset | Training Time :math:`\downarrow` | PSNR :math:`\uparrow` | LPIPS :math:`\downarrow` | 20 | +======================+===========+==================================+=======================+==========================+ 21 | | TiNeuVox `[1]`_ | HyperNeRF | 56.3min | 24.19 | 0.425 | 22 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 23 | | *+nerfacc (occgrid)* | | 33.0min | 24.19 | 0.434 | 24 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 25 | | *+nerfacc (propnet)* | | 34.3min | 24.26 | 0.398 | 26 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 27 | | TiNeuVox `[1]`_ | D-NeRF | 11.8min | 31.14 | 0.050 | 28 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 29 | | *+nerfacc (occgrid)* | | 4.2min | 31.75 | 0.038 | 30 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 31 | | K-Planes `[2]`_ | D-NeRF | 63.9min | 30.28 | 0.043 | 32 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 33 | | *+nerfacc (occgrid)* | | 38.8min | 30.35 | 0.042 | 34 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 35 | | T-NeRF `[3]`_ | D-NeRF | 20hours | 28.78 | 0.069 | 36 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 37 | | *+nerfacc (occgrid)* | | 58min | 32.22 | 0.040 | 38 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+ 39 | 40 | Implementation Details 41 | ---------------------- 42 | 43 | .. toctree:: 44 | :glob: 45 | :maxdepth: 1 46 | 47 | dynamic/* 48 | 49 | | 50 | 51 | 3rd-Party Use Cases 52 | ------------------- 53 | 54 | - `Representing Volumetric Videos as Dynamic MLP Maps, CVPR 2023 `_. 55 | 56 | .. _`[1]`: https://arxiv.org/abs/2205.15285 57 | .. _`[2]`: https://arxiv.org/abs/2301.10241 58 | .. _`[3]`: https://arxiv.org/abs/2011.13961 -------------------------------------------------------------------------------- /docs/source/examples/dynamic/kplanes.rst: -------------------------------------------------------------------------------- 1 | K-Planes 2 | ==================== 3 | 4 | In this example we showcase how to plug the nerfacc library into the *official* codebase 5 | of `K-Planes `_. See 6 | `our forked repo `_ 7 | for details. 8 | 9 | 10 | Benchmark: D-NeRF Dataset 11 | --------------------------------- 12 | *updated on 2023-04-04 with nerfacc==0.5.0* 13 | 14 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 15 | 16 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 17 | | PSNR | bouncing | hell | hook | jumping | lego | mutant | standup | trex | MEAN | 18 | | | balls | warrior | | jacks | | | | | | 19 | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+ 20 | | K-Planes | 39.10 | 23.95 | 27.76 | 31.11 | 25.18 | 32.44 | 32.51 | 30.25 | 30.29 | 21 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 22 | | training time | 68min | 70min | 70min | 70min | 70min | 71min | 72min | 71min | 70min | 23 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 24 | | Ours (occ) | 38.95 | 24.00 | 27.74 | 30.46 | 25.25 | 32.58 | 32.84 | 30.49 | 30.29 | 25 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 26 | | training time | 41min | 41min | 40min | 40min | 39min | 39min | 40min | 38min | 40min | 27 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 28 | -------------------------------------------------------------------------------- /docs/source/examples/dynamic/tineuvox.rst: -------------------------------------------------------------------------------- 1 | .. _`TiNeuVox Example`: 2 | 3 | TiNeuVox 4 | ==================== 5 | 6 | In this example we showcase how to plug the nerfacc library into the *official* codebase 7 | of `TiNeuVox `_. See 8 | `our forked repo `_ 9 | for details. 10 | 11 | 12 | Benchmark: D-NeRF Dataset 13 | --------------------------------- 14 | *updated on 2023-04-04 with nerfacc==0.5.0* 15 | 16 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 17 | 18 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 19 | | PSNR | bouncing | hell | hook | jumping | lego | mutant | standup | trex | MEAN | 20 | | | balls | warrior | | jacks | | | | | | 21 | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+ 22 | | TiNeuVox | 39.37 | 27.05 | 29.61 | 32.92 | 24.32 | 31.47 | 33.59 | 30.01 | 31.04 | 23 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 24 | | Training Time | 832s | 829s | 833s | 841s | 824s | 833s | 827s | 840s | 833s | 25 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 26 | | Ours (occ) | 40.56 | 27.17 | 31.35 | 33.44 | 25.17 | 34.05 | 35.35 | 32.29 | 32.42 | 27 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 28 | | Training Time | 378s | 302s | 342s | 325s | 355s | 360s | 346s | 362s | 346s | 29 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 30 | 31 | 32 | Benchmark: HyperNeRF Dataset 33 | --------------------------------- 34 | *updated on 2023-04-04 with nerfacc==0.5.0* 35 | 36 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 37 | 38 | +----------------------+----------+---------+-------+-------------+-------+ 39 | | PSNR | 3dprinter| broom |chicken| peel-banana | MEAN | 40 | | | | | | | | 41 | +======================+==========+=========+=======+=============+=======+ 42 | | TiNeuVox | 22.77 | 21.30 | 28.29 | 24.50 | 24.22 | 43 | +----------------------+----------+---------+-------+-------------+-------+ 44 | | Training Time | 3253s | 2811s | 3933s | 2705s | 3175s | 45 | +----------------------+----------+---------+-------+-------------+-------+ 46 | | Ours (occ) | 22.72 | 21.27 | 28.27 | 24.54 | 24.20 | 47 | +----------------------+----------+---------+-------+-------------+-------+ 48 | | Training Time | 2265s | 2221s | 2157s | 2101s | 2186s | 49 | +----------------------+----------+---------+-------+-------------+-------+ 50 | | Ours (prop) | 22.75 | 21.17 | 28.27 | 24.97 | 24.29 | 51 | +----------------------+----------+---------+-------+-------------+-------+ 52 | | Training Time | 2307s | 2281s | 2267s | 2510s | 2341s | 53 | +----------------------+----------+---------+-------+-------------+-------+ 54 | -------------------------------------------------------------------------------- /docs/source/examples/dynamic/tnerf.rst: -------------------------------------------------------------------------------- 1 | .. _`T-NeRF Example`: 2 | 3 | T-NeRF 4 | ==================== 5 | See code `examples/train_mlp_dnerf.py` at our `github repository`_ for details. 6 | 7 | Radiance Field 8 | -------------- 9 | Here we implement a very basic time-conditioned NeRF (T-NeRF) model (`examples/radiance_fields/mlp.py`) 10 | for dynamic scene reconstruction. 11 | The implementation is mostly follow the T-NeRF described in the `D-NeRF`_ paper, with a 8-layer-MLP 12 | for the radiance field and a 4-layer-MLP for the warping field. The only major difference is that 13 | we reduce the max frequency of the positional encoding from 10 to 4, to respect the fact that the 14 | motion of the object is relatively smooth. 15 | 16 | 17 | Benchmarks: D-NeRF Dataset 18 | --------------------------- 19 | *updated on 2022-10-08* 20 | 21 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU. 22 | The training memory footprint is about 11GB. 23 | 24 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 25 | | PSNR | bouncing | hell | hook | jumping | lego | mutant | standup | trex | MEAN | 26 | | | balls | warrior | | jacks | | | | | | 27 | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+ 28 | | D-NeRF (~ days) | 32.80 | 25.02 | 29.25 | 32.80 | 21.64 | 31.29 | 32.79 | 31.75 | 29.67 | 29 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 30 | | Ours (~ 1 hr) | 39.49 | 25.58 | 31.86 | 32.73 | 24.32 | 35.55 | 35.90 | 32.33 | 32.22 | 31 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+ 32 | 33 | .. _`D-NeRF`: https://arxiv.org/abs/2011.13961 34 | .. _`github repository`: https://github.com/nerfstudio-project/nerfacc/ 35 | 36 | -------------------------------------------------------------------------------- /docs/source/examples/static.rst: -------------------------------------------------------------------------------- 1 | Static NeRFs 2 | =================================== 3 | 4 | Performance Overview 5 | -------------------- 6 | 7 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 8 | | Methods | Dataset | Training Time :math:`\downarrow` | PSNR :math:`\uparrow` | LPIPS :math:`\downarrow` | 9 | +======================+================+==================================+=======================+==========================+ 10 | | TensoRF `[1]`_ | Tanks&Temple | 19min | 28.11 | 0.167 | 11 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 12 | | *+nerfacc (occgrid)* | | 14min | 28.06 | 0.174 | 13 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 14 | | TensoRF `[1]`_ | NeRF-Synthetic | 10.3min | 32.73 | 0.049 | 15 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 16 | | *+nerfacc (occgrid)* | | 7.1min | 32.52 | 0.054 | 17 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 18 | | NeRF `[2]`_ | NeRF-Synthetic | days | 31.00 | 0.047 | 19 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 20 | | *+nerfacc (occgrid)* | | 1hr | 31.55 | 0.072 | 21 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 22 | | Instant-NGP `[3]`_ | NeRF-Synthetic | 4.4min | 32.35 | | 23 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 24 | | *+nerfacc (occgrid)* | | 4.5min | 33.11 | 0.053 | 25 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 26 | | *+nerfacc (propnet)* | | 4.0min | 31.76 | 0.062 | 27 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 28 | | Instant-NGP `[3]`_ | Mip-NeRF 360 | 5.3min | 25.93 | | 29 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 30 | | *+nerfacc (occgrid)* | | 5.0min | 26.41 | 0.353 | 31 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 32 | | *+nerfacc (propnet)* | | 4.9min | 27.58 | 0.292 | 33 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+ 34 | 35 | Implementation Details 36 | ---------------------- 37 | 38 | .. toctree:: 39 | :glob: 40 | :maxdepth: 1 41 | 42 | static/* 43 | 44 | | 45 | 46 | 3rd-Party Use Cases 47 | ------------------- 48 | 49 | - `nerfstudio `_: A collaboration friendly studio for NeRFs. 50 | - `modelscope `_: A collection of deep-learning algorithms. 51 | - `sdfstudio `_: A Unified Framework for Surface Reconstruction. 52 | - `instant-nsr-pl `_: Train NeuS in 10min. 53 | 54 | .. _`[1]`: https://arxiv.org/abs/2203.09517 55 | .. _`[2]`: https://arxiv.org/abs/2003.08934 56 | .. _`[3]`: https://arxiv.org/abs/2201.05989 -------------------------------------------------------------------------------- /docs/source/examples/static/nerf.rst: -------------------------------------------------------------------------------- 1 | .. _`Vanilla NeRF Example`: 2 | 3 | Vanilla NeRF 4 | ==================== 5 | See code `examples/train_mlp_nerf.py` at our `github repository`_ for details. 6 | 7 | 8 | Radiance Field 9 | -------------- 10 | We follow the original `NeRF`_ paper to implement a 8-layer-MLP radiance field (`examples/radiance_fields/mlp.py`) 11 | with positional encoding. 12 | 13 | .. note:: 14 | The vanilla Nerf paper uses two MLPs for course-to-fine sampling. Instead here we only use a 15 | single MLP with more samples (1024). Both ways share the same spirit to do dense sampling 16 | around the surface. Our fast rendering inheritly skip samples away from the surface 17 | so we can simplly increase the number of samples with a single MLP, to achieve the same goal 18 | with the coarse-to-fine sampling, without runtime or memory issue. 19 | 20 | 21 | Benchmark: Nerf-Synthetic Dataset 22 | --------------------------------- 23 | *updated on 2022-10-08* 24 | 25 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU. 26 | The training memory footprint is about 10GB. 27 | 28 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 29 | | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN | 30 | | | | | | | | | | | | 31 | +======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+ 32 | | NeRF (~ days) | 32.54 | 32.91 | 29.62 | 33.00 | 36.18 | 30.13 | 25.01 | 28.65 | 31.00 | 33 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 34 | | Ours (~ 1 hr) | 33.69 | 33.76 | 29.73 | 33.32 | 35.80 | 32.52 | 25.39 | 28.18 | 31.55 | 35 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 36 | 37 | .. _`github repository`: https://github.com/nerfstudio-project/nerfacc/ 38 | .. _`NeRF`: https://arxiv.org/abs/2003.08934 39 | -------------------------------------------------------------------------------- /docs/source/examples/static/ngp.rst: -------------------------------------------------------------------------------- 1 | .. _`Instant-NGP Example`: 2 | 3 | Instant-NGP 4 | ==================== 5 | 6 | See code `examples/train_ngp_nerf_occ.py` and `examples/train_ngp_nerf_prop.py` at our 7 | `github repository`_ for details. 8 | 9 | 10 | Radiance Field 11 | -------------- 12 | We follow the `Instant-NGP`_ paper to implement the radiance field (`examples/radiance_fields/ngp.py`), 13 | and aligns the hyperparameters (e.g., hashencoder, mlp) with the paper. It is build on top of the 14 | `tiny-cuda-nn`_ library. 15 | 16 | 17 | Benchmark: Nerf-Synthetic Dataset 18 | --------------------------------- 19 | *updated on 2023-04-04 with nerfacc==0.5.0* 20 | 21 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU. 22 | The training memory footprint is about 3GB. 23 | 24 | .. note:: 25 | 26 | The Instant-NGP paper makes use of the alpha channel in the images to apply random background 27 | augmentation during training. For fair comparision, we rerun their code with a constant white 28 | background during both training and testing. Also it is worth to mention that we didn't strictly 29 | follow the training receipe in the Instant-NGP paper, such as the learning rate schedule etc, as 30 | the purpose of this benchmark is to showcase instead of reproducing the paper. 31 | 32 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 33 | | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN | 34 | | | | | | | | | | | | 35 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+ 36 | |Instant-NGP 35k steps | 35.87 | 36.22 | 29.08 | 35.10 | 37.48 | 30.61 | 23.85 | 30.62 | 32.35 | 37 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 38 | | training time | 309s | 258s | 256s | 316s | 292s | 207s | 218s | 250s | 263s | 39 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 40 | |Ours (occ) 20k steps | 35.67 | 36.85 | 29.60 | 35.71 | 37.37 | 33.95 | 25.44 | 30.29 | 33.11 | 41 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 42 | | training time | 288s | 260s | 253s | 326s | 272s | 249s | 252s | 251s | 269s | 43 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 44 | |Ours (prop) 20k steps | 34.04 | 34.56 | 28.76 | 34.21 | 36.44 | 31.41 | 24.81 | 29.85 | 31.76 | 45 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 46 | | training time | 225s | 235s | 235s | 240s | 239s | 242s | 258s | 247s | 240s | 47 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 48 | 49 | 50 | Benchmark: Mip-NeRF 360 Dataset 51 | --------------------------------- 52 | *updated on 2023-04-04 with nerfacc==0.5.0* 53 | 54 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU. 55 | 56 | .. note:: 57 | `Ours (prop)` combines the proposal network (:class:`nerfacc.PropNetEstimator`) with the 58 | Instant-NGP radiance field. This is exactly what the `Nerfacto`_ model is doing in the 59 | `nerfstudio`_ project. In fact, the hyperparameters for `Ours (prop)` in this experiment 60 | are aligned with the `Nerfacto`_ model. 61 | 62 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+ 63 | | PSNR |Bicycle| Garden| Stump | Bonsai|Counter|Kitchen| Room | MEAN | 64 | | | | | | | | | | | 65 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+ 66 | |NeRF++ (~days) | 22.64 | 24.32 | 23.34 | 29.15 | 26.38 | 27.80 | 28.87 | 26.21 | 67 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+ 68 | |Mip-NeRF 360 (~days) | 24.37 | 26.98 | 26.40 | 33.46 | 29.55 | 32.23 | 31.63 | 29.23 | 69 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+ 70 | |Instant-NGP 35k steps | 22.40 | 24.86 | 23.17 | 24.41 | 27.38 | 29.07 | 30.24 | 25.93 | 71 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+ 72 | | training time | 301s | 339s | 295s | 279s | 339s | 366s | 317s | 319s | 73 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+ 74 | |Ours (occ) 20k steps | 22.40 | 23.94 | 22.98 | 30.09 | 26.84 | 28.03 | 30.60 | 26.41 | 75 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+ 76 | | training time | 277s | 302s | 299s | 278s | 315s | 331s | 301s | 300s | 77 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+ 78 | |Ours (prop) 20k steps | 23.23 | 25.42 | 25.24 | 30.71 | 26.74 | 30.70 | 30.99 | 27.58 | 79 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+ 80 | | training time | 276s | 293s | 291s | 291s | 291s | 295s | 287s | 289s | 81 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+ 82 | 83 | 84 | .. _`github repository`: https://github.com/nerfstudio-project/nerfacc/ 85 | .. _`Instant-NGP`: https://arxiv.org/abs/2201.05989 86 | .. _`tiny-cuda-nn`: https://github.com/NVlabs/tiny-cuda-nn 87 | .. _`Nerfacto`: https://docs.nerf.studio/nerfology/methods/nerfacto.html 88 | .. _`nerfstudio`: https://docs.nerf.studio/ -------------------------------------------------------------------------------- /docs/source/examples/static/tensorf.rst: -------------------------------------------------------------------------------- 1 | .. _`TensoRF Example`: 2 | 3 | TensoRF 4 | ==================== 5 | 6 | In this example we showcase how to plug the nerfacc library into the *official* codebase 7 | of `TensoRF `_. See 8 | `our forked repo `_ 9 | for details. 10 | 11 | 12 | Benchmark: NeRF-Synthetic Dataset 13 | --------------------------------- 14 | *updated on 2023-04-04 with nerfacc==0.5.0* 15 | 16 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 17 | 18 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 19 | | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN | 20 | | | | | | | | | | | | 21 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+ 22 | | TensoRF | 35.14 | 25.70 | 33.69 | 37.03 | 36.04 | 29.77 | 34.35 | 30.12 | 32.73 | 23 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 24 | | training time | 504s | 522s | 633s | 648s | 584s | 824s | 464s | 759s | 617s | 25 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 26 | | Ours (occ) | 35.05 | 25.70 | 33.54 | 36.99 | 35.62 | 29.76 | 34.08 | 29.39 | 32.52 | 27 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 28 | | training time | 310s | 312s | 463s | 433s | 363s | 750s | 303s | 468s | 425s | 29 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+ 30 | 31 | 32 | Benchmark: Tanks&Temples Dataset 33 | --------------------------------- 34 | *updated on 2023-04-04 with nerfacc==0.5.0* 35 | 36 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 37 | 38 | +-----------------------+-------+-------------+--------+-------+-------+ 39 | | PSNR | Barn | Caterpillar | Family | Truck | MEAN | 40 | | | | | | | | 41 | +=======================+=======+=============+========+=======+=======+ 42 | | TensoRF | 26.88 | 25.48 | 33.48 | 26.59 | 28.11 | 43 | +-----------------------+-------+-------------+--------+-------+-------+ 44 | | training time | 24min | 19min | 15min | 18min | 19min | 45 | +-----------------------+-------+-------------+--------+-------+-------+ 46 | | Ours (occ) | 26.74 | 25.64 | 33.16 | 26.70 | 28.06 | 47 | +-----------------------+-------+-------------+--------+-------+-------+ 48 | | training time | 19min | 15min | 11min | 13min | 14min | 49 | +-----------------------+-------+-------------+--------+-------+-------+ 50 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | NerfAcc Documentation 2 | =================================== 3 | 4 | NerfAcc is a PyTorch Nerf acceleration toolbox for both training and inference. It focus on 5 | efficient sampling in the volumetric rendering pipeline of radiance fields, which is 6 | universal and plug-and-play for most of the NeRFs. 7 | With minimal modifications to the existing codebases, Nerfacc provides significant speedups 8 | in training various recent NeRF papers. 9 | **And it is pure Python interface with flexible APIs!** 10 | 11 | | 12 | 13 | .. image:: _static/images/teaser.jpg 14 | :align: center 15 | 16 | | 17 | 18 | | Github: https://github.com/nerfstudio-project/nerfacc 19 | | Paper: https://arxiv.org/pdf/2305.04966.pdf 20 | | Authors: `Ruilong Li`_, `Hang Gao`_, `Matthew Tancik`_, `Angjoo Kanazawa`_ 21 | 22 | .. note:: 23 | 24 | Though this repo only contains examples for single scene optimization, 25 | we believe generalizable NeRFs across multiple scenes can also be accelerate with our 26 | :class:`nerfacc.PropNetEstimator`. Examples will be added soon. 27 | 28 | 29 | Installation: 30 | ------------- 31 | 32 | **Dependence**: Please install `Pytorch`_ first. 33 | 34 | The easist way is to install from PyPI. In this way it will build the CUDA code **on the first run** (JIT). 35 | 36 | .. code-block:: console 37 | 38 | $ pip install nerfacc 39 | 40 | Or install from source. In this way it will build the CUDA code during installation. 41 | 42 | .. code-block:: console 43 | 44 | $ pip install git+https://github.com/nerfstudio-project/nerfacc.git 45 | 46 | We also provide pre-built wheels covering major combinations of Pytorch + CUDA versions. 47 | See our `Github README`_ for what we support. 48 | 49 | .. code-block:: console 50 | 51 | // e.g. torch 1.13.0 + CUDA 11.7 52 | $ pip install nerfacc -f https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl/torch-1.13.0_cu117.html 53 | 54 | 55 | Usage: 56 | ------------- 57 | 58 | The idea of NerfAcc is to perform efficient volumetric sampling with a computationally cheap estimator to discover surfaces. 59 | So NerfAcc can work with any user-defined radiance field. To plug the NerfAcc rendering pipeline into your code and enjoy 60 | the acceleration, you only need to define two functions with your radience field. 61 | 62 | - `sigma_fn`: Compute density at each sample. It will be used by the estimator 63 | (e.g., :class:`nerfacc.OccGridEstimator`, :class:`nerfacc.PropNetEstimator`) to discover surfaces. 64 | - `rgb_sigma_fn`: Compute color and density at each sample. It will be used by 65 | :func:`nerfacc.rendering` to conduct differentiable volumetric rendering. This function 66 | will receive gradients to update your radiance field. 67 | 68 | An simple example is like this: 69 | 70 | .. code-block:: python 71 | 72 | import torch 73 | from torch import Tensor 74 | import nerfacc 75 | 76 | radiance_field = ... # network: a NeRF model 77 | rays_o: Tensor = ... # ray origins. (n_rays, 3) 78 | rays_d: Tensor = ... # ray normalized directions. (n_rays, 3) 79 | optimizer = ... # optimizer 80 | 81 | estimator = nerfacc.OccGridEstimator(...) 82 | 83 | def sigma_fn( 84 | t_starts: Tensor, t_ends:Tensor, ray_indices: Tensor 85 | ) -> Tensor: 86 | """ Define how to query density for the estimator.""" 87 | t_origins = rays_o[ray_indices] # (n_samples, 3) 88 | t_dirs = rays_d[ray_indices] # (n_samples, 3) 89 | positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0 90 | sigmas = radiance_field.query_density(positions) 91 | return sigmas # (n_samples,) 92 | 93 | def rgb_sigma_fn( 94 | t_starts: Tensor, t_ends: Tensor, ray_indices: Tensor 95 | ) -> Tuple[Tensor, Tensor]: 96 | """ Query rgb and density values from a user-defined radiance field. """ 97 | t_origins = rays_o[ray_indices] # (n_samples, 3) 98 | t_dirs = rays_d[ray_indices] # (n_samples, 3) 99 | positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0 100 | rgbs, sigmas = radiance_field(positions, condition=t_dirs) 101 | return rgbs, sigmas # (n_samples, 3), (n_samples,) 102 | 103 | # Efficient Raymarching: 104 | # ray_indices: (n_samples,). t_starts: (n_samples,). t_ends: (n_samples,). 105 | ray_indices, t_starts, t_ends = estimator.sampling( 106 | rays_o, rays_d, sigma_fn=sigma_fn, near_plane=0.2, far_plane=1.0, 107 | early_stop_eps=1e-4, alpha_thre=1e-2, 108 | ) 109 | 110 | # Differentiable Volumetric Rendering. 111 | # colors: (n_rays, 3). opaicity: (n_rays, 1). depth: (n_rays, 1). 112 | color, opacity, depth, extras = nerfacc.rendering( 113 | t_starts, t_ends, ray_indices, n_rays=rays_o.shape[0], rgb_sigma_fn=rgb_sigma_fn 114 | ) 115 | 116 | # Optimize: Both the network and rays will receive gradients 117 | optimizer.zero_grad() 118 | loss = F.mse_loss(color, color_gt) 119 | loss.backward() 120 | optimizer.step() 121 | 122 | 123 | Links: 124 | ------------- 125 | 126 | .. toctree:: 127 | :glob: 128 | :maxdepth: 1 129 | :caption: Methodology 130 | 131 | methodology/* 132 | 133 | .. toctree:: 134 | :glob: 135 | :maxdepth: 1 136 | :caption: Python API 137 | 138 | apis/* 139 | 140 | .. toctree:: 141 | :glob: 142 | :maxdepth: 1 143 | :caption: Example Usages and Benchmarks 144 | 145 | examples/* 146 | 147 | .. toctree:: 148 | :maxdepth: 1 149 | :caption: Projects 150 | 151 | nerfstudio 152 | sdfstudio 153 | instant-nsr-pl 154 | 155 | .. _`vanilla Nerf`: https://arxiv.org/abs/2003.08934 156 | .. _`Instant-NGP Nerf`: https://arxiv.org/abs/2201.05989 157 | .. _`D-Nerf`: https://arxiv.org/abs/2011.13961 158 | .. _`MipNerf360`: https://arxiv.org/abs/2111.12077 159 | .. _`pixel-Nerf`: https://arxiv.org/abs/2012.02190 160 | .. _`Nerf++`: https://arxiv.org/abs/2010.07492 161 | 162 | .. _`Ruilong Li`: https://www.liruilong.cn/ 163 | .. _`Hang Gao`: https://hangg7.com/ 164 | .. _`Matthew Tancik`: https://www.matthewtancik.com/ 165 | .. _`Angjoo Kanazawa`: https://people.eecs.berkeley.edu/~kanazawa/ 166 | 167 | .. _`Github README`: https://github.com/nerfstudio-project/nerfacc#readme 168 | 169 | .. _`PyTorch`: https://pytorch.org/get-started/locally/ 170 | -------------------------------------------------------------------------------- /docs/source/methodology/coding.rst: -------------------------------------------------------------------------------- 1 | .. _`Efficient Coding`: 2 | 3 | Efficient Coding 4 | ================ 5 | 6 | Monitor the GPU Utilization 7 | ---------------------------- 8 | 9 | The rule of thumb is to maximize the computation power you have. When working with GPU, 10 | that means to maximize the percentage of GPU kernels are running at the same time. This 11 | can be monitored by the GPU utilization (last column) reported by the `nvidia-smi` command: 12 | 13 | .. image:: ../_static/images/coding/gpu_util.png 14 | :align: center 15 | 16 | | 17 | 18 | If the GPU utilization is less than 100%, it means there are some GPU kernels are idling 19 | from time to time (if not all the time). This is of course not good for the performance. 20 | Under the hood, Pytorch will try to use all the GPU kernels to parellelize the computation, 21 | but in most of the case you don't see 100%. Why is that? 22 | 23 | 24 | The first reason is simplly that there might not be too much to parellelize, for which 25 | we don't have too much to do other than increasing batch size. For example 26 | 27 | .. code-block:: python 28 | 29 | # Only 1000 threads are running at the same time to create this tensor. 30 | # So we see 28% GPU utilization. 31 | while True: torch.zeros((1000), device="cuda") 32 | # Now we see 100% GPU utilization. 33 | while True: torch.zeros((10000000), device="cuda") 34 | 35 | The second reason, which is more common, is that there is *CPU-GPU synchronization* 36 | happening in the code. For example: 37 | 38 | .. code-block:: python 39 | 40 | data = torch.rand((10000000), device="cuda") 41 | mask = data > 0.5 42 | ids = torch.where(mask)[0] 43 | assert torch.all(data[mask] == data[ids]) 44 | 45 | # 100% GPU utilization. 46 | while True: data[ids] 47 | # 95% GPU utilization. 48 | while True: data[mask] 49 | 50 | Besides, if there are many cpu operations in the pipeline, such as data loading and 51 | preprocessing, it might also cause the GPU utilization to be low. In this case, you 52 | can try to use `torch.utils.data.DataLoader` to overlap the data processing time 53 | with the GPU computation time. 54 | 55 | Avoid CPU-GPU Synchronization 56 | ----------------------------- 57 | 58 | In the above example, if you time your code, you will see a significant difference: 59 | 60 | .. code-block:: python 61 | 62 | # 177 µs ± 15.4 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each) 63 | %timeit data[ids] 64 | # 355 µs ± 466 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each) 65 | %timeit data[mask] 66 | 67 | Explanation: In this case, the mask operation needs to decide the size of the output tensor, which 68 | lives on CPU, based on the number of `True` values in the mask that lives on GPU. So 69 | a synchronization is required. The index selection operation, on the other hand, 70 | already knows the size of the output tensor based the size of the `ids`, so no 71 | synchronization is required. 72 | 73 | You may argue in this case we can't really improve things because the `ids` are computed 74 | from the `mask`, which would require the synchronization anyway (`torch.where`). However, 75 | in many cases we can avoid the synchronization by carefully writing the code. For example: 76 | 77 | .. code-block:: python 78 | 79 | # no sync. 67.3 µs ± 5.01 ns per loop 80 | while True: torch.zeros((10000000), device="cuda") 81 | # sync. 13.7 ms ± 320 µs per loop 82 | while True: torch.zeros((10000000)).to("cuda") 83 | 84 | Operations that require synchronization including `torch.where`, `tensor.item()`, 85 | `print(tensor)`, `tensor.to(device)`, `torch.nonzero` etc. Just imagine those functions 86 | have a inherit `torch.cuda.synchronize()` called under the hood. See the 87 | `official guide `_ 88 | if with more interests. 89 | 90 | In our library, :func:`nerfacc.traverse_grids` is a function that requires synchronization, 91 | because it needs to know the size of the output tensor when traversing the grids. As a result, 92 | sampling with :class:`nerfacc.OccGridEstimator` also requires synchronization. But there is 93 | no walkaround in this case so just be aware of it. 94 | 95 | 96 | Profiling 97 | ----------------------------- 98 | 99 | There are plenty of tools for profiling. My personal favorite is 100 | `line_profiler `_ which will give you *per-line* runtime 101 | of a function with a simple decorator `@profile`. It is very useful for finding where the bottleneck 102 | is in your code. It is worth to note that due to the asynchronized nature of Pytorch code, you would 103 | need to set `CUDA_LAUNCH_BLOCKING=1` when profiling your code (no matter which profiling tool you are using). 104 | This variable will force CPU-GPU synchronization for every torch function (equavalent to add 105 | `torch.cuda.synchronize()` everywhere), which can reveal the true runtime of each line of code. 106 | And of course, with `CUDA_LAUNCH_BLOCKING=1` you would get slower total runtime, so don't forget to 107 | remove it when you are done profiling. -------------------------------------------------------------------------------- /docs/source/methodology/sampling.rst: -------------------------------------------------------------------------------- 1 | .. _`Efficient Sampling`: 2 | 3 | Efficient Sampling 4 | =================================== 5 | 6 | Importance Sampling via Transmittance Estimator. 7 | ------------------------------------------------- 8 | 9 | Efficient sampling is a well-explored problem in Graphics, wherein the 10 | emphasis is on identifying regions that make the most significant 11 | contribution to the final rendering. This objective is generally accomplished 12 | through importance sampling, which aims to distribute samples based on the 13 | probability density function (PDF), denoted as :math:`p(t)`, between the range 14 | of :math:`[t_n, t_f]`. By computing the cumulative distribution function (CDF) 15 | through integration, *i.e.*, :math:`F(t) = \int_{t_n}^{t} p(v)\,dv`, 16 | samples are generated using the inverse transform sampling method: 17 | 18 | .. math:: 19 | 20 | t = F^{-1}(u) \quad \text{where} \quad u \sim \mathcal{U}[0,1]. 21 | 22 | In volumetric rendering, the contribution of each sample to the final 23 | rendering is expressed by the accumulation weights :math:`T(t)\sigma(t)`: 24 | 25 | .. math:: 26 | 27 | C(\mathbf{r}) = \int_{t_n}^{t_f} T(t)\,\sigma(t)\,c(t)\,dt 28 | 29 | Hence, the PDF for volumetric rendering is :math:`p(t) = T(t)\sigma(t)` 30 | and the CDF is: 31 | 32 | .. math:: 33 | 34 | F(t) = \int_{t_n}^{t} T(v)\sigma(v)\,dv = 1 - T(t) 35 | 36 | Therefore, inverse sampling the CDF :math:`F(t)` is equivalent to inverse 37 | sampling the transmittance :math:`T(t)`. A transmittance estimator is sufficient 38 | to determine the optimal samples. We refer readers to the 39 | `SIGGRAPH 2017 Course: Production Volume Rendering`_ for more details about this 40 | concept if within interests. 41 | 42 | Occupancy Grid Estimator. 43 | ---------------------------- 44 | 45 | .. image:: ../_static/images/illustration_occgrid.png 46 | :class: float-right 47 | :width: 200px 48 | 49 | The idea of Occupancy Grid is to cache the density in the scene with a binaraized voxel grid. When 50 | sampling, the ray marches through the grid with a preset step sizes, and skip the empty regions by querying 51 | the voxel grid. Intuitively, the binaraized voxel grid is an *estimator* of the radiance field, with much 52 | faster readout. This technique is proposed in `Instant-NGP`_ with highly optimized CUDA implementations. 53 | More formally, The estimator describes a binaraized density distribution :math:`\hat{\sigma}` along 54 | the ray with a conservative threshold :math:`\tau`: 55 | 56 | .. math:: 57 | 58 | \hat{\sigma}(t_i) = \mathbb{1}\big[\sigma(t_i) > \tau\big] 59 | 60 | Consequently, the piece-wise constant PDF can be expressed as 61 | 62 | .. math:: 63 | 64 | p(t_i) = \hat{\sigma}(t_i) / \sum_{j=1}^{n} \hat{\sigma}(t_j) 65 | 66 | and the piece-wise linear transmittance estimator is 67 | 68 | .. math:: 69 | 70 | T(t_i) = 1 - \sum_{j=1}^{i-1}\hat{\sigma}(t_j) / \sum_{j=1}^{n} \hat{\sigma}(t_j) 71 | 72 | See the figure below for an illustration. 73 | 74 | .. rst-class:: clear-both 75 | 76 | .. image:: ../_static/images/plot_occgrid.png 77 | :align: center 78 | 79 | | 80 | 81 | In `nerfacc`, this is implemented via the :class:`nerfacc.OccGridEstimator` class. 82 | 83 | Proposal Network Estimator. 84 | ----------------------------- 85 | 86 | .. image:: ../_static/images/illustration_propnet.png 87 | :class: float-right 88 | :width: 200px 89 | 90 | Another type of approach is to directly estimate the PDF along the ray with discrete samples. 91 | In `vanilla NeRF`_, the coarse MLP is trained using volumetric rendering loss to output a set of 92 | densities :math:`{\sigma(t_i)}`. This allows for the creation of a piece-wise constant PDF: 93 | 94 | .. math:: 95 | 96 | p(t_i) = \sigma(t_i)\exp(-\sigma(t_i)\,dt) 97 | 98 | and a piece-wise linear transmittance estimator: 99 | 100 | .. math:: 101 | 102 | T(t_i) = \exp(-\sum_{j=1}^{i-1}\sigma(t_i)\,dt) 103 | 104 | This approach was further improved in `Mip-NeRF 360`_ with a PDF matching loss, which allows for 105 | the use of a much smaller MLP in the coarse level, namely Proposal Network, to speedup the 106 | PDF construction. 107 | 108 | See the figure below for an illustration. 109 | 110 | .. image:: ../_static/images/plot_propnet.png 111 | :align: center 112 | 113 | | 114 | 115 | In `nerfacc`, this is implemented via the :class:`nerfacc.PropNetEstimator` class. 116 | 117 | Which Estimator to use? 118 | ----------------------- 119 | - :class:`nerfacc.OccGridEstimator` is a generally more efficient when most of the space in the scene is empty, such as in the case of `NeRF-Synthetic`_ dataset. But it still places samples within occluded areas that contribute little to the final rendering (e.g., the last sample in the above illustration). 120 | 121 | - :class:`nerfacc.PropNetEstimator` generally provide more accurate transmittance estimation, enabling samples to concentrate more on high-contribution areas (e.g., surfaces) and to be more spread out in both empty and occluded regions. Also this method works nicely on unbouned scenes as it does not require a preset bounding box of the scene. Thus datasets like `Mip-NeRF 360`_ are better suited with this estimator. 122 | 123 | .. _`SIGGRAPH 2017 Course: Production Volume Rendering`: https://graphics.pixar.com/library/ProductionVolumeRendering/paper.pdf 124 | .. _`Instant-NGP`: https://arxiv.org/abs/2201.05989 125 | .. _`Mip-NeRF 360`: https://arxiv.org/abs/2111.12077 126 | .. _`vanilla NeRF`: https://arxiv.org/abs/2003.08934 127 | .. _`NeRF-Synthetic`: https://arxiv.org/abs/2003.08934 -------------------------------------------------------------------------------- /examples/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/examples/datasets/__init__.py -------------------------------------------------------------------------------- /examples/datasets/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | 5 | import collections 6 | 7 | Rays = collections.namedtuple("Rays", ("origins", "viewdirs")) 8 | 9 | 10 | def namedtuple_map(fn, tup): 11 | """Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple.""" 12 | return type(tup)(*(None if x is None else fn(x) for x in tup)) 13 | -------------------------------------------------------------------------------- /examples/radiance_fields/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/examples/radiance_fields/__init__.py -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch 2 | opencv-python 3 | imageio 4 | numpy 5 | tqdm 6 | scipy 7 | lpips -------------------------------------------------------------------------------- /examples/train_mlp_nerf.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | 5 | import argparse 6 | import pathlib 7 | import time 8 | 9 | import imageio 10 | import numpy as np 11 | import torch 12 | import torch.nn.functional as F 13 | import tqdm 14 | from datasets.nerf_synthetic import SubjectLoader 15 | from lpips import LPIPS 16 | from radiance_fields.mlp import VanillaNeRFRadianceField 17 | 18 | from examples.utils import ( 19 | NERF_SYNTHETIC_SCENES, 20 | render_image_with_occgrid, 21 | set_random_seed, 22 | ) 23 | from nerfacc.estimators.occ_grid import OccGridEstimator 24 | 25 | device = "cuda:0" 26 | set_random_seed(42) 27 | 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument( 30 | "--data_root", 31 | type=str, 32 | default=str(pathlib.Path.cwd() / "data/nerf_synthetic"), 33 | help="the root dir of the dataset", 34 | ) 35 | parser.add_argument( 36 | "--train_split", 37 | type=str, 38 | default="train", 39 | choices=["train", "trainval"], 40 | help="which train split to use", 41 | ) 42 | parser.add_argument( 43 | "--model_path", 44 | type=str, 45 | default=None, 46 | help="the path of the pretrained model", 47 | ) 48 | parser.add_argument( 49 | "--scene", 50 | type=str, 51 | default="lego", 52 | choices=NERF_SYNTHETIC_SCENES, 53 | help="which scene to use", 54 | ) 55 | parser.add_argument( 56 | "--test_chunk_size", 57 | type=int, 58 | default=4096, 59 | ) 60 | args = parser.parse_args() 61 | 62 | # training parameters 63 | max_steps = 50000 64 | init_batch_size = 1024 65 | target_sample_batch_size = 1 << 16 66 | # scene parameters 67 | aabb = torch.tensor([-1.5, -1.5, -1.5, 1.5, 1.5, 1.5], device=device) 68 | near_plane = 0.0 69 | far_plane = 1.0e10 70 | # model parameters 71 | grid_resolution = 128 72 | grid_nlvl = 1 73 | # render parameters 74 | render_step_size = 5e-3 75 | 76 | # setup the dataset 77 | train_dataset = SubjectLoader( 78 | subject_id=args.scene, 79 | root_fp=args.data_root, 80 | split=args.train_split, 81 | num_rays=init_batch_size, 82 | device=device, 83 | ) 84 | test_dataset = SubjectLoader( 85 | subject_id=args.scene, 86 | root_fp=args.data_root, 87 | split="test", 88 | num_rays=None, 89 | device=device, 90 | ) 91 | 92 | estimator = OccGridEstimator( 93 | roi_aabb=aabb, resolution=grid_resolution, levels=grid_nlvl 94 | ).to(device) 95 | 96 | # setup the radiance field we want to train. 97 | radiance_field = VanillaNeRFRadianceField().to(device) 98 | optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4) 99 | scheduler = torch.optim.lr_scheduler.MultiStepLR( 100 | optimizer, 101 | milestones=[ 102 | max_steps // 2, 103 | max_steps * 3 // 4, 104 | max_steps * 5 // 6, 105 | max_steps * 9 // 10, 106 | ], 107 | gamma=0.33, 108 | ) 109 | 110 | lpips_net = LPIPS(net="vgg").to(device) 111 | lpips_norm_fn = lambda x: x[None, ...].permute(0, 3, 1, 2) * 2 - 1 112 | lpips_fn = lambda x, y: lpips_net(lpips_norm_fn(x), lpips_norm_fn(y)).mean() 113 | 114 | if args.model_path is not None: 115 | checkpoint = torch.load(args.model_path) 116 | radiance_field.load_state_dict(checkpoint["radiance_field_state_dict"]) 117 | optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) 118 | scheduler.load_state_dict(checkpoint["scheduler_state_dict"]) 119 | estimator.load_state_dict(checkpoint["estimator_state_dict"]) 120 | step = checkpoint["step"] 121 | else: 122 | step = 0 123 | 124 | # training 125 | tic = time.time() 126 | for step in range(max_steps + 1): 127 | radiance_field.train() 128 | estimator.train() 129 | 130 | i = torch.randint(0, len(train_dataset), (1,)).item() 131 | data = train_dataset[i] 132 | 133 | render_bkgd = data["color_bkgd"] 134 | rays = data["rays"] 135 | pixels = data["pixels"] 136 | 137 | def occ_eval_fn(x): 138 | density = radiance_field.query_density(x) 139 | return density * render_step_size 140 | 141 | # update occupancy grid 142 | estimator.update_every_n_steps( 143 | step=step, 144 | occ_eval_fn=occ_eval_fn, 145 | occ_thre=1e-2, 146 | ) 147 | 148 | # render 149 | rgb, acc, depth, n_rendering_samples = render_image_with_occgrid( 150 | radiance_field, 151 | estimator, 152 | rays, 153 | # rendering options 154 | near_plane=near_plane, 155 | render_step_size=render_step_size, 156 | render_bkgd=render_bkgd, 157 | ) 158 | if n_rendering_samples == 0: 159 | continue 160 | 161 | if target_sample_batch_size > 0: 162 | # dynamic batch size for rays to keep sample batch size constant. 163 | num_rays = len(pixels) 164 | num_rays = int( 165 | num_rays * (target_sample_batch_size / float(n_rendering_samples)) 166 | ) 167 | train_dataset.update_num_rays(num_rays) 168 | 169 | # compute loss 170 | loss = F.smooth_l1_loss(rgb, pixels) 171 | 172 | optimizer.zero_grad() 173 | loss.backward() 174 | optimizer.step() 175 | scheduler.step() 176 | 177 | if step % 5000 == 0: 178 | elapsed_time = time.time() - tic 179 | loss = F.mse_loss(rgb, pixels) 180 | psnr = -10.0 * torch.log(loss) / np.log(10.0) 181 | print( 182 | f"elapsed_time={elapsed_time:.2f}s | step={step} | " 183 | f"loss={loss:.5f} | psnr={psnr:.2f} | " 184 | f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} | " 185 | f"max_depth={depth.max():.3f} | " 186 | ) 187 | 188 | if step > 0 and step % max_steps == 0: 189 | model_save_path = str(pathlib.Path.cwd() / f"mlp_nerf_{step}") 190 | torch.save( 191 | { 192 | "step": step, 193 | "radiance_field_state_dict": radiance_field.state_dict(), 194 | "optimizer_state_dict": optimizer.state_dict(), 195 | "scheduler_state_dict": scheduler.state_dict(), 196 | "estimator_state_dict": estimator.state_dict(), 197 | }, 198 | model_save_path, 199 | ) 200 | 201 | # evaluation 202 | radiance_field.eval() 203 | estimator.eval() 204 | 205 | psnrs = [] 206 | lpips = [] 207 | with torch.no_grad(): 208 | for i in tqdm.tqdm(range(len(test_dataset))): 209 | data = test_dataset[i] 210 | render_bkgd = data["color_bkgd"] 211 | rays = data["rays"] 212 | pixels = data["pixels"] 213 | 214 | # rendering 215 | rgb, acc, depth, _ = render_image_with_occgrid( 216 | radiance_field, 217 | estimator, 218 | rays, 219 | # rendering options 220 | near_plane=near_plane, 221 | render_step_size=render_step_size, 222 | render_bkgd=render_bkgd, 223 | # test options 224 | test_chunk_size=args.test_chunk_size, 225 | ) 226 | mse = F.mse_loss(rgb, pixels) 227 | psnr = -10.0 * torch.log(mse) / np.log(10.0) 228 | psnrs.append(psnr.item()) 229 | lpips.append(lpips_fn(rgb, pixels).item()) 230 | # if i == 0: 231 | # imageio.imwrite( 232 | # "rgb_test.png", 233 | # (rgb.cpu().numpy() * 255).astype(np.uint8), 234 | # ) 235 | # imageio.imwrite( 236 | # "rgb_error.png", 237 | # ( 238 | # (rgb - pixels).norm(dim=-1).cpu().numpy() * 255 239 | # ).astype(np.uint8), 240 | # ) 241 | psnr_avg = sum(psnrs) / len(psnrs) 242 | lpips_avg = sum(lpips) / len(lpips) 243 | print(f"evaluation: psnr_avg={psnr_avg}, lpips_avg={lpips_avg}") 244 | -------------------------------------------------------------------------------- /examples/train_mlp_tnerf.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | 5 | import argparse 6 | import math 7 | import pathlib 8 | import time 9 | 10 | import imageio 11 | import numpy as np 12 | import torch 13 | import torch.nn.functional as F 14 | import tqdm 15 | from datasets.dnerf_synthetic import SubjectLoader 16 | from lpips import LPIPS 17 | from radiance_fields.mlp import TNeRFRadianceField 18 | 19 | from examples.utils import render_image_with_occgrid, set_random_seed 20 | from nerfacc.estimators.occ_grid import OccGridEstimator 21 | 22 | device = "cuda:0" 23 | set_random_seed(42) 24 | 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument( 27 | "--data_root", 28 | type=str, 29 | default=str(pathlib.Path.cwd() / "data/dnerf"), 30 | help="the root dir of the dataset", 31 | ) 32 | parser.add_argument( 33 | "--train_split", 34 | type=str, 35 | default="train", 36 | choices=["train"], 37 | help="which train split to use", 38 | ) 39 | parser.add_argument( 40 | "--scene", 41 | type=str, 42 | default="lego", 43 | choices=[ 44 | # dnerf 45 | "bouncingballs", 46 | "hellwarrior", 47 | "hook", 48 | "jumpingjacks", 49 | "lego", 50 | "mutant", 51 | "standup", 52 | "trex", 53 | ], 54 | help="which scene to use", 55 | ) 56 | parser.add_argument( 57 | "--test_chunk_size", 58 | type=int, 59 | default=4096, 60 | ) 61 | args = parser.parse_args() 62 | 63 | # training parameters 64 | max_steps = 30000 65 | init_batch_size = 1024 66 | target_sample_batch_size = 1 << 16 67 | # scene parameters 68 | aabb = torch.tensor([-1.5, -1.5, -1.5, 1.5, 1.5, 1.5], device=device) 69 | near_plane = 0.0 70 | far_plane = 1.0e10 71 | # model parameters 72 | grid_resolution = 128 73 | grid_nlvl = 1 74 | # render parameters 75 | render_step_size = 5e-3 76 | 77 | # setup the dataset 78 | train_dataset = SubjectLoader( 79 | subject_id=args.scene, 80 | root_fp=args.data_root, 81 | split=args.train_split, 82 | num_rays=init_batch_size, 83 | device=device, 84 | ) 85 | test_dataset = SubjectLoader( 86 | subject_id=args.scene, 87 | root_fp=args.data_root, 88 | split="test", 89 | num_rays=None, 90 | device=device, 91 | ) 92 | 93 | estimator = OccGridEstimator( 94 | roi_aabb=aabb, resolution=grid_resolution, levels=grid_nlvl 95 | ).to(device) 96 | 97 | # setup the radiance field we want to train. 98 | radiance_field = TNeRFRadianceField().to(device) 99 | optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4) 100 | scheduler = torch.optim.lr_scheduler.MultiStepLR( 101 | optimizer, 102 | milestones=[ 103 | max_steps // 2, 104 | max_steps * 3 // 4, 105 | max_steps * 5 // 6, 106 | max_steps * 9 // 10, 107 | ], 108 | gamma=0.33, 109 | ) 110 | 111 | lpips_net = LPIPS(net="vgg").to(device) 112 | lpips_norm_fn = lambda x: x[None, ...].permute(0, 3, 1, 2) * 2 - 1 113 | lpips_fn = lambda x, y: lpips_net(lpips_norm_fn(x), lpips_norm_fn(y)).mean() 114 | 115 | # training 116 | tic = time.time() 117 | for step in range(max_steps + 1): 118 | radiance_field.train() 119 | estimator.train() 120 | 121 | i = torch.randint(0, len(train_dataset), (1,)).item() 122 | data = train_dataset[i] 123 | 124 | render_bkgd = data["color_bkgd"] 125 | rays = data["rays"] 126 | pixels = data["pixels"] 127 | timestamps = data["timestamps"] 128 | 129 | # update occupancy grid 130 | estimator.update_every_n_steps( 131 | step=step, 132 | occ_eval_fn=lambda x: radiance_field.query_opacity( 133 | x, timestamps, render_step_size 134 | ), 135 | occ_thre=1e-2, 136 | ) 137 | 138 | # render 139 | rgb, acc, depth, n_rendering_samples = render_image_with_occgrid( 140 | radiance_field, 141 | estimator, 142 | rays, 143 | # rendering options 144 | near_plane=near_plane, 145 | render_step_size=render_step_size, 146 | render_bkgd=render_bkgd, 147 | alpha_thre=0.01 if step > 1000 else 0.00, 148 | # t-nerf options 149 | timestamps=timestamps, 150 | ) 151 | if n_rendering_samples == 0: 152 | continue 153 | 154 | if target_sample_batch_size > 0: 155 | # dynamic batch size for rays to keep sample batch size constant. 156 | num_rays = len(pixels) 157 | num_rays = int( 158 | num_rays * (target_sample_batch_size / float(n_rendering_samples)) 159 | ) 160 | train_dataset.update_num_rays(num_rays) 161 | 162 | # compute loss 163 | loss = F.smooth_l1_loss(rgb, pixels) 164 | 165 | optimizer.zero_grad() 166 | loss.backward() 167 | optimizer.step() 168 | scheduler.step() 169 | 170 | if step % 5000 == 0: 171 | elapsed_time = time.time() - tic 172 | loss = F.mse_loss(rgb, pixels) 173 | psnr = -10.0 * torch.log(loss) / np.log(10.0) 174 | print( 175 | f"elapsed_time={elapsed_time:.2f}s | step={step} | " 176 | f"loss={loss:.5f} | psnr={psnr:.2f} | " 177 | f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} | " 178 | f"max_depth={depth.max():.3f} | " 179 | ) 180 | 181 | if step > 0 and step % max_steps == 0: 182 | # evaluation 183 | radiance_field.eval() 184 | estimator.eval() 185 | 186 | psnrs = [] 187 | lpips = [] 188 | with torch.no_grad(): 189 | for i in tqdm.tqdm(range(len(test_dataset))): 190 | data = test_dataset[i] 191 | render_bkgd = data["color_bkgd"] 192 | rays = data["rays"] 193 | pixels = data["pixels"] 194 | timestamps = data["timestamps"] 195 | 196 | # rendering 197 | rgb, acc, depth, _ = render_image_with_occgrid( 198 | radiance_field, 199 | estimator, 200 | rays, 201 | # rendering options 202 | near_plane=near_plane, 203 | render_step_size=render_step_size, 204 | render_bkgd=render_bkgd, 205 | alpha_thre=0.01, 206 | # test options 207 | test_chunk_size=args.test_chunk_size, 208 | # t-nerf options 209 | timestamps=timestamps, 210 | ) 211 | mse = F.mse_loss(rgb, pixels) 212 | psnr = -10.0 * torch.log(mse) / np.log(10.0) 213 | psnrs.append(psnr.item()) 214 | lpips.append(lpips_fn(rgb, pixels).item()) 215 | # if i == 0: 216 | # imageio.imwrite( 217 | # "rgb_test.png", 218 | # (rgb.cpu().numpy() * 255).astype(np.uint8), 219 | # ) 220 | # imageio.imwrite( 221 | # "rgb_error.png", 222 | # ( 223 | # (rgb - pixels).norm(dim=-1).cpu().numpy() * 255 224 | # ).astype(np.uint8), 225 | # ) 226 | psnr_avg = sum(psnrs) / len(psnrs) 227 | lpips_avg = sum(lpips) / len(lpips) 228 | print(f"evaluation: psnr_avg={psnr_avg}, lpips_avg={lpips_avg}") 229 | -------------------------------------------------------------------------------- /nerfacc/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | 5 | from .data_specs import RayIntervals, RaySamples 6 | from .estimators.occ_grid import OccGridEstimator 7 | from .estimators.prop_net import PropNetEstimator 8 | from .estimators.vdb import VDBEstimator, traverse_vdbs 9 | from .grid import ray_aabb_intersect, traverse_grids 10 | from .losses import distortion 11 | from .pack import pack_info 12 | from .pdf import importance_sampling, searchsorted 13 | from .scan import exclusive_prod, exclusive_sum, inclusive_prod, inclusive_sum 14 | from .version import __version__ 15 | from .volrend import ( 16 | accumulate_along_rays, 17 | render_transmittance_from_alpha, 18 | render_transmittance_from_density, 19 | render_visibility_from_alpha, 20 | render_visibility_from_density, 21 | render_weight_from_alpha, 22 | render_weight_from_density, 23 | rendering, 24 | ) 25 | 26 | __all__ = [ 27 | "__version__", 28 | "inclusive_prod", 29 | "exclusive_prod", 30 | "inclusive_sum", 31 | "exclusive_sum", 32 | "inclusive_prod_cub", 33 | "exclusive_prod_cub", 34 | "inclusive_sum_cub", 35 | "exclusive_sum_cub", 36 | "pack_info", 37 | "render_visibility_from_alpha", 38 | "render_visibility_from_density", 39 | "render_weight_from_alpha", 40 | "render_weight_from_density", 41 | "render_transmittance_from_alpha", 42 | "render_transmittance_from_density", 43 | "accumulate_along_rays", 44 | "rendering", 45 | "importance_sampling", 46 | "searchsorted", 47 | "RayIntervals", 48 | "RaySamples", 49 | "ray_aabb_intersect", 50 | "traverse_grids", 51 | "traverse_vdbs", 52 | "OccGridEstimator", 53 | "PropNetEstimator", 54 | "VDBEstimator", 55 | "distortion", 56 | ] 57 | -------------------------------------------------------------------------------- /nerfacc/cameras.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | from typing import Tuple 5 | 6 | import torch 7 | import torch.nn.functional as F 8 | from torch import Tensor 9 | 10 | from . import cuda as _C 11 | 12 | 13 | def opencv_lens_undistortion( 14 | uv: Tensor, params: Tensor, eps: float = 1e-6, iters: int = 10 15 | ) -> Tensor: 16 | """Undistort the opencv distortion. 17 | 18 | Note: 19 | This function is not differentiable to any inputs. 20 | 21 | Args: 22 | uv: (..., 2) UV coordinates. 23 | params: (..., N) or (N) OpenCV distortion parameters. We support 24 | N = 0, 1, 2, 4, 8. If N = 0, we return the input uv directly. 25 | If N = 1, we assume the input is {k1}. If N = 2, we assume the 26 | input is {k1, k2}. If N = 4, we assume the input is {k1, k2, p1, p2}. 27 | If N = 8, we assume the input is {k1, k2, p1, p2, k3, k4, k5, k6}. 28 | 29 | Returns: 30 | (..., 2) undistorted UV coordinates. 31 | """ 32 | assert uv.shape[-1] == 2 33 | assert params.shape[-1] in [0, 1, 2, 4, 8] 34 | 35 | if params.shape[-1] == 0: 36 | return uv 37 | elif params.shape[-1] < 8: 38 | params = F.pad(params, (0, 8 - params.shape[-1]), "constant", 0) 39 | assert params.shape[-1] == 8 40 | 41 | batch_shape = uv.shape[:-1] 42 | params = torch.broadcast_to(params, batch_shape + (params.shape[-1],)) 43 | 44 | return _C.opencv_lens_undistortion( 45 | uv.contiguous(), params.contiguous(), eps, iters 46 | ) 47 | 48 | 49 | def opencv_lens_undistortion_fisheye( 50 | uv: Tensor, params: Tensor, eps: float = 1e-6, iters: int = 10 51 | ) -> Tensor: 52 | """Undistort the opencv distortion of {k1, k2, k3, k4}. 53 | 54 | Note: 55 | This function is not differentiable to any inputs. 56 | 57 | Args: 58 | uv: (..., 2) UV coordinates. 59 | params: (..., 4) or (4) OpenCV distortion parameters. 60 | 61 | Returns: 62 | (..., 2) undistorted UV coordinates. 63 | """ 64 | assert uv.shape[-1] == 2 65 | assert params.shape[-1] == 4 66 | batch_shape = uv.shape[:-1] 67 | params = torch.broadcast_to(params, batch_shape + (params.shape[-1],)) 68 | 69 | return _C.opencv_lens_undistortion_fisheye( 70 | uv.contiguous(), params.contiguous(), eps, iters 71 | ) 72 | 73 | 74 | def _opencv_lens_distortion(uv: Tensor, params: Tensor) -> Tensor: 75 | """The opencv camera distortion of {k1, k2, p1, p2, k3, k4, k5, k6}. 76 | 77 | See https://docs.opencv.org/3.4/d9/d0c/group__calib3d.html for more details. 78 | """ 79 | k1, k2, p1, p2, k3, k4, k5, k6 = torch.unbind(params, dim=-1) 80 | s1, s2, s3, s4 = 0, 0, 0, 0 81 | u, v = torch.unbind(uv, dim=-1) 82 | r2 = u * u + v * v 83 | r4 = r2**2 84 | r6 = r4 * r2 85 | ratial = (1 + k1 * r2 + k2 * r4 + k3 * r6) / ( 86 | 1 + k4 * r2 + k5 * r4 + k6 * r6 87 | ) 88 | fx = 2 * p1 * u * v + p2 * (r2 + 2 * u * u) + s1 * r2 + s2 * r4 89 | fy = 2 * p2 * u * v + p1 * (r2 + 2 * v * v) + s3 * r2 + s4 * r4 90 | return torch.stack([u * ratial + fx, v * ratial + fy], dim=-1) 91 | 92 | 93 | def _opencv_lens_distortion_fisheye( 94 | uv: Tensor, params: Tensor, eps: float = 1e-10 95 | ) -> Tensor: 96 | """The opencv camera distortion of {k1, k2, k3, p1, p2}. 97 | 98 | See https://docs.opencv.org/4.x/db/d58/group__calib3d__fisheye.html for more details. 99 | 100 | Args: 101 | uv: (..., 2) UV coordinates. 102 | params: (..., 4) or (4) OpenCV distortion parameters. 103 | 104 | Returns: 105 | (..., 2) distorted UV coordinates. 106 | """ 107 | assert params.shape[-1] == 4, f"Invalid params shape: {params.shape}" 108 | k1, k2, k3, k4 = torch.unbind(params, dim=-1) 109 | u, v = torch.unbind(uv, dim=-1) 110 | r = torch.sqrt(u * u + v * v) 111 | theta = torch.atan(r) 112 | theta_d = theta * ( 113 | 1 114 | + k1 * theta**2 115 | + k2 * theta**4 116 | + k3 * theta**6 117 | + k4 * theta**8 118 | ) 119 | scale = theta_d / torch.clamp(r, min=eps) 120 | return uv * scale[..., None] 121 | 122 | 123 | @torch.jit.script 124 | def _compute_residual_and_jacobian( 125 | x: Tensor, y: Tensor, xd: Tensor, yd: Tensor, params: Tensor 126 | ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: 127 | assert params.shape[-1] == 8 128 | 129 | k1, k2, p1, p2, k3, k4, k5, k6 = torch.unbind(params, dim=-1) 130 | 131 | # let r(x, y) = x^2 + y^2; 132 | # alpha(x, y) = 1 + k1 * r(x, y) + k2 * r(x, y) ^2 + k3 * r(x, y)^3; 133 | # beta(x, y) = 1 + k4 * r(x, y) + k5 * r(x, y) ^2 + k6 * r(x, y)^3; 134 | # d(x, y) = alpha(x, y) / beta(x, y); 135 | r = x * x + y * y 136 | alpha = 1.0 + r * (k1 + r * (k2 + r * k3)) 137 | beta = 1.0 + r * (k4 + r * (k5 + r * k6)) 138 | d = alpha / beta 139 | 140 | # The perfect projection is: 141 | # xd = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2); 142 | # yd = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2); 143 | # 144 | # Let's define 145 | # 146 | # fx(x, y) = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2) - xd; 147 | # fy(x, y) = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2) - yd; 148 | # 149 | # We are looking for a solution that satisfies 150 | # fx(x, y) = fy(x, y) = 0; 151 | fx = d * x + 2 * p1 * x * y + p2 * (r + 2 * x * x) - xd 152 | fy = d * y + 2 * p2 * x * y + p1 * (r + 2 * y * y) - yd 153 | 154 | # Compute derivative of alpha, beta over r. 155 | alpha_r = k1 + r * (2.0 * k2 + r * (3.0 * k3)) 156 | beta_r = k4 + r * (2.0 * k5 + r * (3.0 * k6)) 157 | 158 | # Compute derivative of d over [x, y] 159 | d_r = (alpha_r * beta - alpha * beta_r) / (beta * beta) 160 | d_x = 2.0 * x * d_r 161 | d_y = 2.0 * y * d_r 162 | 163 | # Compute derivative of fx over x and y. 164 | fx_x = d + d_x * x + 2.0 * p1 * y + 6.0 * p2 * x 165 | fx_y = d_y * x + 2.0 * p1 * x + 2.0 * p2 * y 166 | 167 | # Compute derivative of fy over x and y. 168 | fy_x = d_x * y + 2.0 * p2 * y + 2.0 * p1 * x 169 | fy_y = d + d_y * y + 2.0 * p2 * x + 6.0 * p1 * y 170 | 171 | return fx, fy, fx_x, fx_y, fy_x, fy_y 172 | 173 | 174 | @torch.jit.script 175 | def _opencv_lens_undistortion( 176 | uv: Tensor, params: Tensor, eps: float = 1e-6, iters: int = 10 177 | ) -> Tensor: 178 | """Same as opencv_lens_undistortion(), but native PyTorch. 179 | 180 | Took from with bug fix and modification. 181 | https://github.com/nerfstudio-project/nerfstudio/blob/ec603634edbd61b13bdf2c598fda8c993370b8f7/nerfstudio/cameras/camera_utils.py 182 | """ 183 | assert uv.shape[-1] == 2 184 | assert params.shape[-1] in [0, 1, 2, 4, 8] 185 | 186 | if params.shape[-1] == 0: 187 | return uv 188 | elif params.shape[-1] < 8: 189 | params = F.pad(params, (0, 8 - params.shape[-1]), "constant", 0.0) 190 | assert params.shape[-1] == 8 191 | 192 | # Initialize from the distorted point. 193 | x, y = x0, y0 = torch.unbind(uv, dim=-1) 194 | 195 | zeros = torch.zeros_like(x) 196 | for _ in range(iters): 197 | fx, fy, fx_x, fx_y, fy_x, fy_y = _compute_residual_and_jacobian( 198 | x=x, y=y, xd=x0, yd=y0, params=params 199 | ) 200 | denominator = fy_x * fx_y - fx_x * fy_y 201 | mask = torch.abs(denominator) > eps 202 | 203 | x_numerator = fx * fy_y - fy * fx_y 204 | y_numerator = fy * fx_x - fx * fy_x 205 | step_x = torch.where(mask, x_numerator / denominator, zeros) 206 | step_y = torch.where(mask, y_numerator / denominator, zeros) 207 | 208 | x = x + step_x 209 | y = y + step_y 210 | 211 | return torch.stack([x, y], dim=-1) 212 | -------------------------------------------------------------------------------- /nerfacc/cuda/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | 5 | from typing import Any, Callable 6 | 7 | 8 | def _make_lazy_cuda_func(name: str) -> Callable: 9 | def call_cuda(*args, **kwargs): 10 | # pylint: disable=import-outside-toplevel 11 | from ._backend import _C 12 | 13 | return getattr(_C, name)(*args, **kwargs) 14 | 15 | return call_cuda 16 | 17 | 18 | # data specs 19 | RaySegmentsSpec = _make_lazy_cuda_func("RaySegmentsSpec") 20 | 21 | # grid 22 | ray_aabb_intersect = _make_lazy_cuda_func("ray_aabb_intersect") 23 | traverse_grids = _make_lazy_cuda_func("traverse_grids") 24 | 25 | # scan 26 | inclusive_sum = _make_lazy_cuda_func("inclusive_sum") 27 | exclusive_sum = _make_lazy_cuda_func("exclusive_sum") 28 | inclusive_prod_forward = _make_lazy_cuda_func("inclusive_prod_forward") 29 | inclusive_prod_backward = _make_lazy_cuda_func("inclusive_prod_backward") 30 | exclusive_prod_forward = _make_lazy_cuda_func("exclusive_prod_forward") 31 | exclusive_prod_backward = _make_lazy_cuda_func("exclusive_prod_backward") 32 | 33 | is_cub_available = _make_lazy_cuda_func("is_cub_available") 34 | inclusive_sum_cub = _make_lazy_cuda_func("inclusive_sum_cub") 35 | exclusive_sum_cub = _make_lazy_cuda_func("exclusive_sum_cub") 36 | inclusive_prod_cub_forward = _make_lazy_cuda_func("inclusive_prod_cub_forward") 37 | inclusive_prod_cub_backward = _make_lazy_cuda_func( 38 | "inclusive_prod_cub_backward" 39 | ) 40 | exclusive_prod_cub_forward = _make_lazy_cuda_func("exclusive_prod_cub_forward") 41 | exclusive_prod_cub_backward = _make_lazy_cuda_func( 42 | "exclusive_prod_cub_backward" 43 | ) 44 | 45 | # pdf 46 | importance_sampling = _make_lazy_cuda_func("importance_sampling") 47 | searchsorted = _make_lazy_cuda_func("searchsorted") 48 | 49 | # camera 50 | opencv_lens_undistortion = _make_lazy_cuda_func("opencv_lens_undistortion") 51 | opencv_lens_undistortion_fisheye = _make_lazy_cuda_func( 52 | "opencv_lens_undistortion_fisheye" 53 | ) 54 | -------------------------------------------------------------------------------- /nerfacc/cuda/_backend.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | 5 | import glob 6 | import json 7 | import os 8 | import shutil 9 | from subprocess import DEVNULL, call 10 | 11 | from rich.console import Console 12 | from torch.utils.cpp_extension import _get_build_directory, load 13 | 14 | PATH = os.path.dirname(os.path.abspath(__file__)) 15 | 16 | 17 | def cuda_toolkit_available(): 18 | """Check if the nvcc is avaiable on the machine.""" 19 | try: 20 | call(["nvcc"], stdout=DEVNULL, stderr=DEVNULL) 21 | return True 22 | except FileNotFoundError: 23 | return False 24 | 25 | 26 | def cuda_toolkit_version(): 27 | """Get the cuda toolkit version.""" 28 | cuda_home = os.path.join(os.path.dirname(shutil.which("nvcc")), "..") 29 | if os.path.exists(os.path.join(cuda_home, "version.txt")): 30 | with open(os.path.join(cuda_home, "version.txt")) as f: 31 | cuda_version = f.read().strip().split()[-1] 32 | elif os.path.exists(os.path.join(cuda_home, "version.json")): 33 | with open(os.path.join(cuda_home, "version.json")) as f: 34 | cuda_version = json.load(f)["cuda"]["version"] 35 | else: 36 | raise RuntimeError("Cannot find the cuda version.") 37 | return cuda_version 38 | 39 | 40 | name = "nerfacc_cuda" 41 | build_dir = _get_build_directory(name, verbose=False) 42 | extra_include_paths = [] 43 | extra_cflags = ["-O3"] 44 | extra_cuda_cflags = ["-O3"] 45 | 46 | _C = None 47 | sources = list(glob.glob(os.path.join(PATH, "csrc/*.cu"))) + list( 48 | glob.glob(os.path.join(PATH, "csrc/*.cpp")) 49 | ) 50 | 51 | try: 52 | # try to import the compiled module (via setup.py) 53 | from nerfacc import csrc as _C 54 | except ImportError: 55 | # if failed, try with JIT compilation 56 | if cuda_toolkit_available(): 57 | if os.listdir(build_dir) != []: 58 | # If the build exists, we assume the extension has been built 59 | # and we can load it. 60 | 61 | _C = load( 62 | name=name, 63 | sources=sources, 64 | extra_cflags=extra_cflags, 65 | extra_cuda_cflags=extra_cuda_cflags, 66 | extra_include_paths=extra_include_paths, 67 | ) 68 | else: 69 | # Build from scratch. Remove the build directory just to be safe: pytorch jit might stuck 70 | # if the build directory exists. 71 | shutil.rmtree(build_dir) 72 | with Console().status( 73 | "[bold yellow]NerfAcc: Setting up CUDA (This may take a few minutes the first time)", 74 | spinner="bouncingBall", 75 | ): 76 | _C = load( 77 | name=name, 78 | sources=sources, 79 | extra_cflags=extra_cflags, 80 | extra_cuda_cflags=extra_cuda_cflags, 81 | extra_include_paths=extra_include_paths, 82 | ) 83 | else: 84 | Console().print( 85 | "[yellow]NerfAcc: No CUDA toolkit found. NerfAcc will be disabled.[/yellow]" 86 | ) 87 | 88 | 89 | __all__ = ["_C"] 90 | -------------------------------------------------------------------------------- /nerfacc/cuda/csrc/camera.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "include/utils_cuda.cuh" 4 | #include "include/utils_camera.cuh" 5 | 6 | 7 | namespace { 8 | namespace device { 9 | 10 | __global__ void opencv_lens_undistortion_fisheye( 11 | const int64_t N, 12 | const float* uv, 13 | const float* params, 14 | const int criteria_iters, 15 | const float criteria_eps, 16 | float* uv_out, 17 | bool* success) 18 | { 19 | // parallelize over outputs 20 | for (int64_t tid = blockIdx.x * blockDim.x + threadIdx.x; tid < N; tid += blockDim.x * gridDim.x) 21 | { 22 | success[tid] = iterative_opencv_lens_undistortion_fisheye( 23 | uv[tid * 2 + 0], 24 | uv[tid * 2 + 1], 25 | params[tid * 4 + 0], // k1 26 | params[tid * 4 + 1], // k2 27 | params[tid * 4 + 2], // k3 28 | params[tid * 4 + 3], // k4 29 | criteria_iters, 30 | criteria_eps, 31 | uv_out[tid * 2 + 0], 32 | uv_out[tid * 2 + 1] 33 | ); 34 | } 35 | } 36 | 37 | __global__ void opencv_lens_undistortion( 38 | const int64_t N, 39 | const int64_t n_params, 40 | const float* uv, 41 | const float* params, 42 | const float eps, 43 | const int max_iterations, 44 | float* uv_out) 45 | { 46 | // parallelize over outputs 47 | for (int64_t tid = blockIdx.x * blockDim.x + threadIdx.x; tid < N; tid += blockDim.x * gridDim.x) 48 | { 49 | if (n_params == 5) { 50 | radial_and_tangential_undistort( 51 | uv[tid * 2 + 0], 52 | uv[tid * 2 + 1], 53 | params[tid * n_params + 0], // k1 54 | params[tid * n_params + 1], // k2 55 | params[tid * n_params + 4], // k3 56 | 0.f, // k4 57 | 0.f, // k5 58 | 0.f, // k6 59 | params[tid * n_params + 2], // p1 60 | params[tid * n_params + 3], // p2 61 | eps, 62 | max_iterations, 63 | uv_out[tid * 2 + 0], 64 | uv_out[tid * 2 + 1]); 65 | } else if (n_params == 8) { 66 | radial_and_tangential_undistort( 67 | uv[tid * 2 + 0], 68 | uv[tid * 2 + 1], 69 | params[tid * n_params + 0], // k1 70 | params[tid * n_params + 1], // k2 71 | params[tid * n_params + 4], // k3 72 | params[tid * n_params + 5], // k4 73 | params[tid * n_params + 6], // k5 74 | params[tid * n_params + 7], // k6 75 | params[tid * n_params + 2], // p1 76 | params[tid * n_params + 3], // p2 77 | eps, 78 | max_iterations, 79 | uv_out[tid * 2 + 0], 80 | uv_out[tid * 2 + 1]); 81 | } else if (n_params == 12) { 82 | bool success = iterative_opencv_lens_undistortion( 83 | uv[tid * 2 + 0], 84 | uv[tid * 2 + 1], 85 | params[tid * 12 + 0], // k1 86 | params[tid * 12 + 1], // k2 87 | params[tid * 12 + 2], // k3 88 | params[tid * 12 + 3], // k4 89 | params[tid * 12 + 4], // k5 90 | params[tid * 12 + 5], // k6 91 | params[tid * 12 + 6], // p1 92 | params[tid * 12 + 7], // p2 93 | params[tid * 12 + 8], // s1 94 | params[tid * 12 + 9], // s2 95 | params[tid * 12 + 10], // s3 96 | params[tid * 12 + 11], // s4 97 | max_iterations, 98 | uv_out[tid * 2 + 0], 99 | uv_out[tid * 2 + 1] 100 | ); 101 | if (!success) { 102 | uv_out[tid * 2 + 0] = uv[tid * 2 + 0]; 103 | uv_out[tid * 2 + 1] = uv[tid * 2 + 1]; 104 | } 105 | } 106 | } 107 | } 108 | 109 | 110 | } // namespace device 111 | } // namespace 112 | 113 | 114 | torch::Tensor opencv_lens_undistortion( 115 | const torch::Tensor& uv, // [..., 2] 116 | const torch::Tensor& params, // [..., 5] or [..., 12] 117 | const float eps, 118 | const int max_iterations) 119 | { 120 | DEVICE_GUARD(uv); 121 | CHECK_INPUT(uv); 122 | CHECK_INPUT(params); 123 | TORCH_CHECK(uv.ndimension() == params.ndimension()); 124 | TORCH_CHECK(uv.size(-1) == 2, "uv must have shape [..., 2]"); 125 | TORCH_CHECK(params.size(-1) == 5 || params.size(-1) == 8 || params.size(-1) == 12); 126 | 127 | int64_t N = uv.numel() / 2; 128 | int64_t n_params = params.size(-1); 129 | 130 | at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream(); 131 | int64_t max_threads = 512; 132 | int64_t max_blocks = 65535; 133 | dim3 threads = dim3(min(max_threads, N)); 134 | dim3 blocks = dim3(min(max_blocks, ceil_div(N, threads.x))); 135 | 136 | auto uv_out = torch::empty_like(uv); 137 | device::opencv_lens_undistortion<<>>( 138 | N, 139 | n_params, 140 | uv.data_ptr(), 141 | params.data_ptr(), 142 | eps, 143 | max_iterations, 144 | uv_out.data_ptr()); 145 | 146 | return uv_out; 147 | } 148 | 149 | torch::Tensor opencv_lens_undistortion_fisheye( 150 | const torch::Tensor& uv, // [..., 2] 151 | const torch::Tensor& params, // [..., 4] 152 | const float criteria_eps, 153 | const int criteria_iters) 154 | { 155 | DEVICE_GUARD(uv); 156 | CHECK_INPUT(uv); 157 | CHECK_INPUT(params); 158 | TORCH_CHECK(uv.ndimension() == params.ndimension()); 159 | TORCH_CHECK(uv.size(-1) == 2, "uv must have shape [..., 2]"); 160 | TORCH_CHECK(params.size(-1) == 4); 161 | 162 | int64_t N = uv.numel() / 2; 163 | 164 | at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream(); 165 | int64_t max_threads = 512; 166 | int64_t max_blocks = 65535; 167 | dim3 threads = dim3(min(max_threads, N)); 168 | dim3 blocks = dim3(min(max_blocks, ceil_div(N, threads.x))); 169 | 170 | auto uv_out = torch::empty_like(uv); 171 | auto success = torch::empty( 172 | uv.sizes().slice(0, uv.ndimension() - 1), uv.options().dtype(torch::kBool)); 173 | device::opencv_lens_undistortion_fisheye<<>>( 174 | N, 175 | uv.data_ptr(), 176 | params.data_ptr(), 177 | criteria_iters, 178 | criteria_eps, 179 | uv_out.data_ptr(), 180 | success.data_ptr()); 181 | 182 | return uv_out; 183 | } 184 | -------------------------------------------------------------------------------- /nerfacc/cuda/csrc/include/data_spec.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "utils_cuda.cuh" 5 | 6 | struct RaySegmentsSpec { 7 | torch::Tensor vals; // [n_edges] or [n_rays, n_edges_per_ray] 8 | // for flattened tensor 9 | torch::Tensor chunk_starts; // [n_rays] 10 | torch::Tensor chunk_cnts; // [n_rays] 11 | torch::Tensor ray_indices; // [n_edges] 12 | torch::Tensor is_left; // [n_edges] have n_bins true values 13 | torch::Tensor is_right; // [n_edges] have n_bins true values 14 | torch::Tensor is_valid; // [n_edges] have n_bins true values 15 | 16 | inline void check() { 17 | CHECK_INPUT(vals); 18 | TORCH_CHECK(vals.defined()); 19 | 20 | // batched tensor [..., n_edges_per_ray] 21 | if (vals.ndimension() > 1) return; 22 | 23 | // flattend tensor [n_edges] 24 | CHECK_INPUT(chunk_starts); 25 | CHECK_INPUT(chunk_cnts); 26 | TORCH_CHECK(chunk_starts.defined()); 27 | TORCH_CHECK(chunk_cnts.defined()); 28 | TORCH_CHECK(chunk_starts.ndimension() == 1); 29 | TORCH_CHECK(chunk_cnts.ndimension() == 1); 30 | TORCH_CHECK(chunk_starts.numel() == chunk_cnts.numel()); 31 | if (ray_indices.defined()) { 32 | CHECK_INPUT(ray_indices); 33 | TORCH_CHECK(ray_indices.ndimension() == 1); 34 | TORCH_CHECK(vals.numel() == ray_indices.numel()); 35 | } 36 | if (is_left.defined()) { 37 | CHECK_INPUT(is_left); 38 | TORCH_CHECK(is_left.ndimension() == 1); 39 | TORCH_CHECK(vals.numel() == is_left.numel()); 40 | } 41 | if (is_right.defined()) { 42 | CHECK_INPUT(is_right); 43 | TORCH_CHECK(is_right.ndimension() == 1); 44 | TORCH_CHECK(vals.numel() == is_right.numel()); 45 | } 46 | if (is_valid.defined()) { 47 | CHECK_INPUT(is_valid); 48 | TORCH_CHECK(is_valid.ndimension() == 1); 49 | TORCH_CHECK(vals.numel() == is_valid.numel()); 50 | } 51 | } 52 | 53 | inline void memalloc_cnts(int32_t n_rays, at::TensorOptions options, bool zero_init = true) { 54 | TORCH_CHECK(!chunk_cnts.defined()); 55 | if (zero_init) { 56 | chunk_cnts = torch::zeros({n_rays}, options.dtype(torch::kLong)); 57 | } else { 58 | chunk_cnts = torch::empty({n_rays}, options.dtype(torch::kLong)); 59 | } 60 | } 61 | 62 | inline void memalloc_data(int32_t size, bool alloc_masks = true, bool zero_init = true, bool alloc_valid = false) { 63 | TORCH_CHECK(chunk_cnts.defined()); 64 | TORCH_CHECK(!vals.defined()); 65 | 66 | if (zero_init) { 67 | vals = torch::zeros({size}, chunk_cnts.options().dtype(torch::kFloat32)); 68 | ray_indices = torch::zeros({size}, chunk_cnts.options().dtype(torch::kLong)); 69 | if (alloc_masks) { 70 | is_left = torch::zeros({size}, chunk_cnts.options().dtype(torch::kBool)); 71 | is_right = torch::zeros({size}, chunk_cnts.options().dtype(torch::kBool)); 72 | } 73 | } else { 74 | vals = torch::empty({size}, chunk_cnts.options().dtype(torch::kFloat32)); 75 | ray_indices = torch::empty({size}, chunk_cnts.options().dtype(torch::kLong)); 76 | if (alloc_masks) { 77 | is_left = torch::empty({size}, chunk_cnts.options().dtype(torch::kBool)); 78 | is_right = torch::empty({size}, chunk_cnts.options().dtype(torch::kBool)); 79 | } 80 | } 81 | if (alloc_valid) { 82 | is_valid = torch::zeros({size}, chunk_cnts.options().dtype(torch::kBool)); 83 | } 84 | } 85 | 86 | inline int64_t memalloc_data_from_chunk(bool alloc_masks = true, bool zero_init = true, bool alloc_valid = false) { 87 | TORCH_CHECK(chunk_cnts.defined()); 88 | TORCH_CHECK(!chunk_starts.defined()); 89 | 90 | torch::Tensor cumsum = torch::cumsum(chunk_cnts, 0, chunk_cnts.scalar_type()); 91 | int64_t n_edges = cumsum[-1].item(); 92 | 93 | chunk_starts = cumsum - chunk_cnts; 94 | memalloc_data(n_edges, alloc_masks, zero_init, alloc_valid); 95 | return 1; 96 | } 97 | 98 | // compute the chunk_start from chunk_cnts 99 | inline int64_t compute_chunk_start() { 100 | TORCH_CHECK(chunk_cnts.defined()); 101 | // TORCH_CHECK(!chunk_starts.defined()); 102 | 103 | torch::Tensor cumsum = torch::cumsum(chunk_cnts, 0, chunk_cnts.scalar_type()); 104 | chunk_starts = cumsum - chunk_cnts; 105 | return 1; 106 | } 107 | }; -------------------------------------------------------------------------------- /nerfacc/cuda/csrc/include/data_spec_packed.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "data_spec.hpp" 6 | 7 | namespace { 8 | namespace device { 9 | 10 | struct PackedRaySegmentsSpec { 11 | PackedRaySegmentsSpec(RaySegmentsSpec& spec) : 12 | vals(spec.vals.defined() ? spec.vals.data_ptr() : nullptr), 13 | is_batched(spec.vals.defined() ? spec.vals.dim() > 1 : false), 14 | // for flattened tensor 15 | chunk_starts(spec.chunk_starts.defined() ? spec.chunk_starts.data_ptr() : nullptr), 16 | chunk_cnts(spec.chunk_cnts.defined() ? spec.chunk_cnts.data_ptr(): nullptr), 17 | ray_indices(spec.ray_indices.defined() ? spec.ray_indices.data_ptr() : nullptr), 18 | is_left(spec.is_left.defined() ? spec.is_left.data_ptr() : nullptr), 19 | is_right(spec.is_right.defined() ? spec.is_right.data_ptr() : nullptr), 20 | is_valid(spec.is_valid.defined() ? spec.is_valid.data_ptr() : nullptr), 21 | // for dimensions 22 | n_edges(spec.vals.defined() ? spec.vals.numel() : 0), 23 | n_rays(spec.chunk_cnts.defined() ? spec.chunk_cnts.size(0) : 0), // for flattened tensor 24 | n_edges_per_ray(spec.vals.defined() ? spec.vals.size(-1) : 0) // for batched tensor 25 | { } 26 | 27 | float* vals; 28 | bool is_batched; 29 | 30 | int64_t* chunk_starts; 31 | int64_t* chunk_cnts; 32 | int64_t* ray_indices; 33 | bool* is_left; 34 | bool* is_right; 35 | bool* is_valid; 36 | 37 | int64_t n_edges; 38 | int32_t n_rays; 39 | int32_t n_edges_per_ray; 40 | }; 41 | 42 | 43 | struct SingleRaySpec { 44 | // TODO: check inv_dir if dir is zero. 45 | __device__ SingleRaySpec( 46 | float *rays_o, float *rays_d, float tmin, float tmax) : 47 | origin{rays_o[0], rays_o[1], rays_o[2]}, 48 | dir{rays_d[0], rays_d[1], rays_d[2]}, 49 | inv_dir{1.0f/rays_d[0], 1.0f/rays_d[1], 1.0f/rays_d[2]}, 50 | tmin{tmin}, 51 | tmax{tmax} 52 | { } 53 | 54 | float3 origin; 55 | float3 dir; 56 | float3 inv_dir; 57 | float tmin; 58 | float tmax; 59 | }; 60 | 61 | struct AABBSpec { 62 | __device__ AABBSpec(float *aabb) : 63 | min{aabb[0], aabb[1], aabb[2]}, 64 | max{aabb[3], aabb[4], aabb[5]} 65 | { } 66 | __device__ AABBSpec(float3 min, float3 max) : 67 | min{min.x, min.y, min.z}, 68 | max{max.x, max.y, max.z} 69 | { } 70 | float3 min; 71 | float3 max; 72 | }; 73 | 74 | 75 | } // namespace device 76 | } // namespace -------------------------------------------------------------------------------- /nerfacc/cuda/csrc/include/utils.cub.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | * Modified from aten/src/ATen/cuda/cub_definitions.cuh in PyTorch. 4 | */ 5 | 6 | #pragma once 7 | 8 | #include // for CUDA_VERSION 9 | 10 | #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000 11 | #include 12 | #else 13 | #define CUB_VERSION 0 14 | #endif 15 | 16 | // cub support for scan by key is added to cub 1.15 17 | // in https://github.com/NVIDIA/cub/pull/376 18 | #if CUB_VERSION >= 101500 19 | #define CUB_SUPPORTS_SCAN_BY_KEY() 1 20 | #else 21 | #define CUB_SUPPORTS_SCAN_BY_KEY() 0 22 | #endif 23 | 24 | // https://github.com/pytorch/pytorch/blob/233305a852e1cd7f319b15b5137074c9eac455f6/aten/src/ATen/cuda/cub.cuh#L38-L46 25 | #define CUB_WRAPPER(func, ...) do { \ 26 | size_t temp_storage_bytes = 0; \ 27 | func(nullptr, temp_storage_bytes, __VA_ARGS__); \ 28 | auto& caching_allocator = *::c10::cuda::CUDACachingAllocator::get(); \ 29 | auto temp_storage = caching_allocator.allocate(temp_storage_bytes); \ 30 | func(temp_storage.get(), temp_storage_bytes, __VA_ARGS__); \ 31 | AT_CUDA_CHECK(cudaGetLastError()); \ 32 | } while (false) -------------------------------------------------------------------------------- /nerfacc/cuda/csrc/include/utils_camera.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | */ 4 | 5 | #include "utils_cuda.cuh" 6 | 7 | #define PI 3.14159265358979323846 8 | 9 | namespace { 10 | namespace device { 11 | 12 | // https://github.com/JamesPerlman/TurboNeRF/blob/75f1228d41b914b0a768a876d2a851f3b3213a58/src/utils/camera-kernels.cuh 13 | inline __device__ void _compute_residual_and_jacobian( 14 | // inputs 15 | float x, float y, 16 | float xd, float yd, 17 | float k1, float k2, float k3, float k4, float k5, float k6, 18 | float p1, float p2, 19 | // outputs 20 | float& fx, float& fy, 21 | float& fx_x, float& fx_y, 22 | float& fy_x, float& fy_y 23 | ) { 24 | // let r(x, y) = x^2 + y^2; 25 | // alpha(x, y) = 1 + k1 * r(x, y) + k2 * r(x, y) ^2 + k3 * r(x, y)^3; 26 | // beta(x, y) = 1 + k4 * r(x, y) + k5 * r(x, y) ^2 + k6 * r(x, y)^3; 27 | // d(x, y) = alpha(x, y) / beta(x, y); 28 | const float r = x * x + y * y; 29 | const float alpha = 1.0f + r * (k1 + r * (k2 + r * k3)); 30 | const float beta = 1.0f + r * (k4 + r * (k5 + r * k6)); 31 | const float d = alpha / beta; 32 | 33 | // The perfect projection is: 34 | // xd = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2); 35 | // yd = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2); 36 | 37 | // Let's define 38 | // fx(x, y) = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2) - xd; 39 | // fy(x, y) = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2) - yd; 40 | 41 | // We are looking for a solution that satisfies 42 | // fx(x, y) = fy(x, y) = 0; 43 | 44 | fx = d * x + 2 * p1 * x * y + p2 * (r + 2 * x * x) - xd; 45 | fy = d * y + 2 * p2 * x * y + p1 * (r + 2 * y * y) - yd; 46 | 47 | // Compute derivative of alpha, beta over r. 48 | const float alpha_r = k1 + r * (2.0 * k2 + r * (3.0 * k3)); 49 | const float beta_r = k4 + r * (2.0 * k5 + r * (3.0 * k6)); 50 | 51 | // Compute derivative of d over [x, y] 52 | const float d_r = (alpha_r * beta - alpha * beta_r) / (beta * beta); 53 | const float d_x = 2.0 * x * d_r; 54 | const float d_y = 2.0 * y * d_r; 55 | 56 | // Compute derivative of fx over x and y. 57 | fx_x = d + d_x * x + 2.0 * p1 * y + 6.0 * p2 * x; 58 | fx_y = d_y * x + 2.0 * p1 * x + 2.0 * p2 * y; 59 | 60 | // Compute derivative of fy over x and y. 61 | fy_x = d_x * y + 2.0 * p2 * y + 2.0 * p1 * x; 62 | fy_y = d + d_y * y + 2.0 * p2 * x + 6.0 * p1 * y; 63 | } 64 | 65 | // https://github.com/JamesPerlman/TurboNeRF/blob/75f1228d41b914b0a768a876d2a851f3b3213a58/src/utils/camera-kernels.cuh 66 | inline __device__ void radial_and_tangential_undistort( 67 | float xd, float yd, 68 | float k1, float k2, float k3, float k4, float k5, float k6, 69 | float p1, float p2, 70 | const float& eps, 71 | const int& max_iterations, 72 | float& x, float& y 73 | ) { 74 | // Initial guess. 75 | x = xd; 76 | y = yd; 77 | 78 | // Newton's method. 79 | for (int i = 0; i < max_iterations; ++i) { 80 | float fx, fy, fx_x, fx_y, fy_x, fy_y; 81 | 82 | _compute_residual_and_jacobian( 83 | x, y, 84 | xd, yd, 85 | k1, k2, k3, k4, k5, k6, 86 | p1, p2, 87 | fx, fy, 88 | fx_x, fx_y, fy_x, fy_y 89 | ); 90 | 91 | // Compute the Jacobian. 92 | const float det = fx_y * fy_x - fx_x * fy_y; 93 | if (fabs(det) < eps) { 94 | break; 95 | } 96 | 97 | // Compute the update. 98 | const float dx = (fx * fy_y - fy * fx_y) / det; 99 | const float dy = (fy * fx_x - fx * fy_x) / det; 100 | 101 | // Update the solution. 102 | x += dx; 103 | y += dy; 104 | 105 | // Check for convergence. 106 | if (fabs(dx) < eps && fabs(dy) < eps) { 107 | break; 108 | } 109 | } 110 | } 111 | 112 | // not good 113 | // https://github.com/opencv/opencv/blob/8d0fbc6a1e9f20c822921e8076551a01e58cd632/modules/calib3d/src/undistort.dispatch.cpp#L578 114 | inline __device__ bool iterative_opencv_lens_undistortion( 115 | float u, float v, 116 | float k1, float k2, float k3, float k4, float k5, float k6, 117 | float p1, float p2, float s1, float s2, float s3, float s4, 118 | int iters, 119 | // outputs 120 | float& x, float& y) 121 | { 122 | x = u; 123 | y = v; 124 | for(int i = 0; i < iters; i++) 125 | { 126 | float r2 = x*x + y*y; 127 | float icdist = (1 + ((k6*r2 + k5)*r2 + k4)*r2) / (1 + ((k3*r2 + k2)*r2 + k1)*r2); 128 | if (icdist < 0) return false; 129 | float deltaX = 2*p1*x*y + p2*(r2 + 2*x*x) + s1*r2 + s2*r2*r2; 130 | float deltaY = p1*(r2 + 2*y*y) + 2*p2*x*y + s3*r2 + s4*r2*r2; 131 | x = (u - deltaX) * icdist; 132 | y = (v - deltaY) * icdist; 133 | } 134 | return true; 135 | } 136 | 137 | // https://github.com/opencv/opencv/blob/master/modules/calib3d/src/fisheye.cpp#L321 138 | inline __device__ bool iterative_opencv_lens_undistortion_fisheye( 139 | float u, float v, 140 | float k1, float k2, float k3, float k4, 141 | int criteria_iters, 142 | float criteria_eps, 143 | // outputs 144 | float& u_out, float& v_out) 145 | { 146 | // image point (u, v) to world point (x, y) 147 | float theta_d = sqrt(u * u + v * v); 148 | 149 | // the current camera model is only valid up to 180 FOV 150 | // for larger FOV the loop below does not converge 151 | // clip values so we still get plausible results for super fisheye images > 180 grad 152 | theta_d = min(max(-PI/2., theta_d), PI/2.); 153 | 154 | bool converged = false; 155 | float theta = theta_d; 156 | 157 | float scale = 0.0; 158 | 159 | if (fabs(theta_d) > criteria_eps) 160 | { 161 | // compensate distortion iteratively using Newton method 162 | for (int j = 0; j < criteria_iters; j++) 163 | { 164 | double theta2 = theta*theta, theta4 = theta2*theta2, theta6 = theta4*theta2, theta8 = theta6*theta2; 165 | double k0_theta2 = k1 * theta2, k1_theta4 = k2 * theta4, k2_theta6 = k3 * theta6, k3_theta8 = k4 * theta8; 166 | /* new_theta = theta - theta_fix, theta_fix = f0(theta) / f0'(theta) */ 167 | double theta_fix = (theta * (1 + k0_theta2 + k1_theta4 + k2_theta6 + k3_theta8) - theta_d) / 168 | (1 + 3*k0_theta2 + 5*k1_theta4 + 7*k2_theta6 + 9*k3_theta8); 169 | theta = theta - theta_fix; 170 | 171 | if (fabs(theta_fix) < criteria_eps) 172 | { 173 | converged = true; 174 | break; 175 | } 176 | } 177 | 178 | scale = std::tan(theta) / theta_d; 179 | } 180 | else 181 | { 182 | converged = true; 183 | } 184 | 185 | // theta is monotonously increasing or decreasing depending on the sign of theta 186 | // if theta has flipped, it might converge due to symmetry but on the opposite of the camera center 187 | // so we can check whether theta has changed the sign during the optimization 188 | bool theta_flipped = ((theta_d < 0 && theta > 0) || (theta_d > 0 && theta < 0)); 189 | 190 | if (converged && !theta_flipped) 191 | { 192 | u_out = u * scale; 193 | v_out = v * scale; 194 | } 195 | 196 | return converged; 197 | } 198 | 199 | 200 | } // namespace device 201 | } // namespace 202 | -------------------------------------------------------------------------------- /nerfacc/cuda/csrc/include/utils_contraction.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "utils_math.cuh" 8 | 9 | namespace { 10 | namespace device { 11 | 12 | enum ContractionType 13 | { 14 | AABB = 0, 15 | UN_BOUNDED_TANH = 1, 16 | UN_BOUNDED_SPHERE = 2, 17 | }; 18 | 19 | inline __device__ __host__ float3 roi_to_unit( 20 | const float3 xyz, const float3 roi_min, const float3 roi_max) 21 | { 22 | // roi -> [0, 1]^3 23 | return (xyz - roi_min) / (roi_max - roi_min); 24 | } 25 | 26 | inline __device__ __host__ float3 unit_to_roi( 27 | const float3 xyz, const float3 roi_min, const float3 roi_max) 28 | { 29 | // [0, 1]^3 -> roi 30 | return xyz * (roi_max - roi_min) + roi_min; 31 | } 32 | 33 | inline __device__ __host__ float3 inf_to_unit_tanh( 34 | const float3 xyz, float3 roi_min, const float3 roi_max) 35 | { 36 | /** 37 | [-inf, inf]^3 -> [0, 1]^3 38 | roi -> cube of [0.25, 0.75]^3 39 | **/ 40 | float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3 41 | xyz_unit = xyz_unit - 0.5f; // roi -> [-0.5, 0.5]^3 42 | return make_float3(tanhf(xyz_unit.x), tanhf(xyz_unit.y), tanhf(xyz_unit.z)) * 0.5f + 0.5f; 43 | } 44 | 45 | inline __device__ __host__ float3 unit_to_inf_tanh( 46 | const float3 xyz, float3 roi_min, const float3 roi_max) 47 | { 48 | /** 49 | [0, 1]^3 -> [-inf, inf]^3 50 | cube of [0.25, 0.75]^3 -> roi 51 | **/ 52 | float3 xyz_unit = clamp( 53 | make_float3( 54 | atanhf(xyz.x * 2.0f - 1.0f), 55 | atanhf(xyz.y * 2.0f - 1.0f), 56 | atanhf(xyz.z * 2.0f - 1.0f)), 57 | -1e10f, 58 | 1e10f); 59 | xyz_unit = xyz_unit + 0.5f; 60 | xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max); 61 | return xyz_unit; 62 | } 63 | 64 | inline __device__ __host__ float3 inf_to_unit_sphere( 65 | const float3 xyz, const float3 roi_min, const float3 roi_max) 66 | { 67 | /** From MipNeRF360 68 | [-inf, inf]^3 -> sphere of [0, 1]^3; 69 | roi -> sphere of [0.25, 0.75]^3 70 | **/ 71 | float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3 72 | xyz_unit = xyz_unit * 2.0f - 1.0f; // roi -> [-1, 1]^3 73 | 74 | float norm_sq = dot(xyz_unit, xyz_unit); 75 | float norm = sqrt(norm_sq); 76 | if (norm > 1.0f) 77 | { 78 | xyz_unit = (2.0f - 1.0f / norm) * (xyz_unit / norm); 79 | } 80 | xyz_unit = xyz_unit * 0.25f + 0.5f; // [-1, 1]^3 -> [0.25, 0.75]^3 81 | return xyz_unit; 82 | } 83 | 84 | inline __device__ __host__ float3 unit_sphere_to_inf( 85 | const float3 xyz, const float3 roi_min, const float3 roi_max) 86 | { 87 | /** From MipNeRF360 88 | sphere of [0, 1]^3 -> [-inf, inf]^3; 89 | sphere of [0.25, 0.75]^3 -> roi 90 | **/ 91 | float3 xyz_unit = (xyz - 0.5f) * 4.0f; // [0.25, 0.75]^3 -> [-1, 1]^3 92 | 93 | float norm_sq = dot(xyz_unit, xyz_unit); 94 | float norm = sqrt(norm_sq); 95 | if (norm > 1.0f) 96 | { 97 | xyz_unit = xyz_unit / fmaxf((2.0f * norm - 1.0f * norm_sq), 1e-10f); 98 | } 99 | xyz_unit = xyz_unit * 0.5f + 0.5f; // [-1, 1]^3 -> [0, 1]^3 100 | xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max); // [0, 1]^3 -> roi 101 | return xyz_unit; 102 | } 103 | 104 | inline __device__ __host__ float3 apply_contraction( 105 | const float3 xyz, const float3 roi_min, const float3 roi_max, 106 | const ContractionType type) 107 | { 108 | switch (type) 109 | { 110 | case AABB: 111 | return roi_to_unit(xyz, roi_min, roi_max); 112 | case UN_BOUNDED_TANH: 113 | return inf_to_unit_tanh(xyz, roi_min, roi_max); 114 | case UN_BOUNDED_SPHERE: 115 | return inf_to_unit_sphere(xyz, roi_min, roi_max); 116 | } 117 | } 118 | 119 | inline __device__ __host__ float3 apply_contraction_inv( 120 | const float3 xyz, const float3 roi_min, const float3 roi_max, 121 | const ContractionType type) 122 | { 123 | switch (type) 124 | { 125 | case AABB: 126 | return unit_to_roi(xyz, roi_min, roi_max); 127 | case UN_BOUNDED_TANH: 128 | return unit_to_inf_tanh(xyz, roi_min, roi_max); 129 | case UN_BOUNDED_SPHERE: 130 | return unit_sphere_to_inf(xyz, roi_min, roi_max); 131 | } 132 | } 133 | 134 | } // namespace device 135 | } // namespace -------------------------------------------------------------------------------- /nerfacc/cuda/csrc/include/utils_cuda.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor") 13 | #define CHECK_CONTIGUOUS(x) \ 14 | TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 15 | #define CHECK_INPUT(x) \ 16 | CHECK_CUDA(x); \ 17 | CHECK_CONTIGUOUS(x) 18 | #define CUDA_GET_THREAD_ID(tid, Q) \ 19 | const int tid = blockIdx.x * blockDim.x + threadIdx.x; \ 20 | if (tid >= Q) \ 21 | return 22 | #define CUDA_N_BLOCKS_NEEDED(Q, CUDA_N_THREADS) ((Q - 1) / CUDA_N_THREADS + 1) 23 | #define DEVICE_GUARD(_ten) \ 24 | const at::cuda::OptionalCUDAGuard device_guard(device_of(_ten)); 25 | 26 | template 27 | inline __device__ __host__ scalar_t ceil_div(scalar_t a, scalar_t b) 28 | { 29 | return (a + b - 1) / b; 30 | } -------------------------------------------------------------------------------- /nerfacc/cuda/csrc/include/utils_grid.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "data_spec_packed.cuh" 4 | #include "utils_contraction.cuh" 5 | #include "utils_math.cuh" 6 | 7 | namespace { 8 | namespace device { 9 | 10 | inline __device__ bool ray_aabb_intersect( 11 | SingleRaySpec ray, AABBSpec aabb, 12 | // outputs 13 | float& tmin, float& tmax) 14 | { 15 | float tmin_temp{}; 16 | float tmax_temp{}; 17 | 18 | if (ray.inv_dir.x >= 0) { 19 | tmin = (aabb.min.x - ray.origin.x) * ray.inv_dir.x; 20 | tmax = (aabb.max.x - ray.origin.x) * ray.inv_dir.x; 21 | } else { 22 | tmin = (aabb.max.x - ray.origin.x) * ray.inv_dir.x; 23 | tmax = (aabb.min.x - ray.origin.x) * ray.inv_dir.x; 24 | } 25 | 26 | if (ray.inv_dir.y >= 0) { 27 | tmin_temp = (aabb.min.y - ray.origin.y) * ray.inv_dir.y; 28 | tmax_temp = (aabb.max.y - ray.origin.y) * ray.inv_dir.y; 29 | } else { 30 | tmin_temp = (aabb.max.y - ray.origin.y) * ray.inv_dir.y; 31 | tmax_temp = (aabb.min.y - ray.origin.y) * ray.inv_dir.y; 32 | } 33 | 34 | if (tmin > tmax_temp || tmin_temp > tmax) return false; 35 | if (tmin_temp > tmin) tmin = tmin_temp; 36 | if (tmax_temp < tmax) tmax = tmax_temp; 37 | 38 | if (ray.inv_dir.z >= 0) { 39 | tmin_temp = (aabb.min.z - ray.origin.z) * ray.inv_dir.z; 40 | tmax_temp = (aabb.max.z - ray.origin.z) * ray.inv_dir.z; 41 | } else { 42 | tmin_temp = (aabb.max.z - ray.origin.z) * ray.inv_dir.z; 43 | tmax_temp = (aabb.min.z - ray.origin.z) * ray.inv_dir.z; 44 | } 45 | 46 | if (tmin > tmax_temp || tmin_temp > tmax) return false; 47 | if (tmin_temp > tmin) tmin = tmin_temp; 48 | if (tmax_temp < tmax) tmax = tmax_temp; 49 | 50 | if (tmax <= 0) return false; 51 | 52 | tmin = fmaxf(tmin, ray.tmin); 53 | tmax = fminf(tmax, ray.tmax); 54 | return true; 55 | } 56 | 57 | 58 | inline __device__ void setup_traversal( 59 | SingleRaySpec ray, float tmin, float tmax, float eps, 60 | AABBSpec aabb, int3 resolution, 61 | // outputs 62 | float3 &delta, float3 &tdist, 63 | int3 &step_index, int3 ¤t_index, int3 &final_index) 64 | { 65 | const float3 res = make_float3(resolution); 66 | const float3 voxel_size = (aabb.max - aabb.min) / res; 67 | const float3 ray_start = ray.origin + ray.dir * (tmin + eps); 68 | const float3 ray_end = ray.origin + ray.dir * (tmax - eps); 69 | 70 | // get voxel index of start and end within grid 71 | // TODO: check float error here! 72 | current_index = make_int3( 73 | apply_contraction(ray_start, aabb.min, aabb.max, ContractionType::AABB) 74 | * res 75 | ); 76 | current_index = clamp(current_index, make_int3(0, 0, 0), resolution - 1); 77 | 78 | final_index = make_int3( 79 | apply_contraction(ray_end, aabb.min, aabb.max, ContractionType::AABB) 80 | * res 81 | ); 82 | final_index = clamp(final_index, make_int3(0, 0, 0), resolution - 1); 83 | 84 | // 85 | const int3 index_delta = make_int3( 86 | ray.dir.x > 0 ? 1 : 0, ray.dir.y > 0 ? 1 : 0, ray.dir.z > 0 ? 1 : 0 87 | ); 88 | const int3 start_index = current_index + index_delta; 89 | const float3 tmax_xyz = ((aabb.min + 90 | ((make_float3(start_index) * voxel_size) - ray_start)) * ray.inv_dir) + tmin; 91 | 92 | tdist = make_float3( 93 | (ray.dir.x == 0.0f) ? tmax : tmax_xyz.x, 94 | (ray.dir.y == 0.0f) ? tmax : tmax_xyz.y, 95 | (ray.dir.z == 0.0f) ? tmax : tmax_xyz.z 96 | ); 97 | // printf("tdist: %f %f %f\n", tdist.x, tdist.y, tdist.z); 98 | 99 | const float3 step_float = make_float3( 100 | (ray.dir.x == 0.0f) ? 0.0f : (ray.dir.x > 0.0f ? 1.0f : -1.0f), 101 | (ray.dir.y == 0.0f) ? 0.0f : (ray.dir.y > 0.0f ? 1.0f : -1.0f), 102 | (ray.dir.z == 0.0f) ? 0.0f : (ray.dir.z > 0.0f ? 1.0f : -1.0f) 103 | ); 104 | step_index = make_int3(step_float); 105 | // printf("step_index: %d %d %d\n", step_index.x, step_index.y, step_index.z); 106 | 107 | const float3 delta_temp = voxel_size * ray.inv_dir * step_float; 108 | delta = make_float3( 109 | (ray.dir.x == 0.0f) ? tmax : delta_temp.x, 110 | (ray.dir.y == 0.0f) ? tmax : delta_temp.y, 111 | (ray.dir.z == 0.0f) ? tmax : delta_temp.z 112 | ); 113 | // printf("delta: %f %f %f\n", delta.x, delta.y, delta.z); 114 | } 115 | 116 | inline __device__ bool single_traversal( 117 | float3& tdist, int3& current_index, 118 | const int3 overflow_index, const int3 step_index, const float3 delta) { 119 | if ((tdist.x < tdist.y) && (tdist.x < tdist.z)) { 120 | // X-axis traversal. 121 | current_index.x += step_index.x; 122 | tdist.x += delta.x; 123 | if (current_index.x == overflow_index.x) { 124 | return false; 125 | } 126 | } else if (tdist.y < tdist.z) { 127 | // Y-axis traversal. 128 | current_index.y += step_index.y; 129 | tdist.y += delta.y; 130 | if (current_index.y == overflow_index.y) { 131 | return false; 132 | } 133 | } else { 134 | // Z-axis traversal. 135 | current_index.z += step_index.z; 136 | tdist.z += delta.z; 137 | if (current_index.z == overflow_index.z) { 138 | return false; 139 | } 140 | } 141 | return true; 142 | } 143 | 144 | 145 | } // namespace device 146 | } // namespace -------------------------------------------------------------------------------- /nerfacc/cuda/csrc/nerfacc.cpp: -------------------------------------------------------------------------------- 1 | // This file contains only Python bindings 2 | #include "include/data_spec.hpp" 3 | 4 | #include 5 | 6 | 7 | // scan 8 | torch::Tensor inclusive_sum( 9 | torch::Tensor chunk_starts, 10 | torch::Tensor chunk_cnts, 11 | torch::Tensor inputs, 12 | bool normalize, 13 | bool backward); 14 | torch::Tensor exclusive_sum( 15 | torch::Tensor chunk_starts, 16 | torch::Tensor chunk_cnts, 17 | torch::Tensor inputs, 18 | bool normalize, 19 | bool backward); 20 | torch::Tensor inclusive_prod_forward( 21 | torch::Tensor chunk_starts, 22 | torch::Tensor chunk_cnts, 23 | torch::Tensor inputs); 24 | torch::Tensor inclusive_prod_backward( 25 | torch::Tensor chunk_starts, 26 | torch::Tensor chunk_cnts, 27 | torch::Tensor inputs, 28 | torch::Tensor outputs, 29 | torch::Tensor grad_outputs); 30 | torch::Tensor exclusive_prod_forward( 31 | torch::Tensor chunk_starts, 32 | torch::Tensor chunk_cnts, 33 | torch::Tensor inputs); 34 | torch::Tensor exclusive_prod_backward( 35 | torch::Tensor chunk_starts, 36 | torch::Tensor chunk_cnts, 37 | torch::Tensor inputs, 38 | torch::Tensor outputs, 39 | torch::Tensor grad_outputs); 40 | 41 | bool is_cub_available(); 42 | torch::Tensor inclusive_sum_cub( 43 | torch::Tensor ray_indices, 44 | torch::Tensor inputs, 45 | bool backward); 46 | torch::Tensor exclusive_sum_cub( 47 | torch::Tensor indices, 48 | torch::Tensor inputs, 49 | bool backward); 50 | torch::Tensor inclusive_prod_cub_forward( 51 | torch::Tensor indices, 52 | torch::Tensor inputs); 53 | torch::Tensor inclusive_prod_cub_backward( 54 | torch::Tensor indices, 55 | torch::Tensor inputs, 56 | torch::Tensor outputs, 57 | torch::Tensor grad_outputs); 58 | torch::Tensor exclusive_prod_cub_forward( 59 | torch::Tensor indices, 60 | torch::Tensor inputs); 61 | torch::Tensor exclusive_prod_cub_backward( 62 | torch::Tensor indices, 63 | torch::Tensor inputs, 64 | torch::Tensor outputs, 65 | torch::Tensor grad_outputs); 66 | 67 | // grid 68 | std::vector ray_aabb_intersect( 69 | const torch::Tensor rays_o, // [n_rays, 3] 70 | const torch::Tensor rays_d, // [n_rays, 3] 71 | const torch::Tensor aabbs, // [n_aabbs, 6] 72 | const float near_plane, 73 | const float far_plane, 74 | const float miss_value); 75 | std::tuple traverse_grids( 76 | // rays 77 | const torch::Tensor rays_o, // [n_rays, 3] 78 | const torch::Tensor rays_d, // [n_rays, 3] 79 | const torch::Tensor rays_mask, // [n_rays] 80 | // grids 81 | const torch::Tensor binaries, // [n_grids, resx, resy, resz] 82 | const torch::Tensor aabbs, // [n_grids, 6] 83 | // intersections 84 | const torch::Tensor t_sorted, // [n_rays, n_grids * 2] 85 | const torch::Tensor t_indices, // [n_rays, n_grids * 2] 86 | const torch::Tensor hits, // [n_rays, n_grids] 87 | // options 88 | const torch::Tensor near_planes, 89 | const torch::Tensor far_planes, 90 | const float step_size, 91 | const float cone_angle, 92 | const bool compute_intervals, 93 | const bool compute_samples, 94 | const bool compute_terminate_planes, 95 | const int32_t traverse_steps_limit, // <= 0 means no limit 96 | const bool over_allocate); // over allocate the memory for intervals and samples 97 | 98 | // pdf 99 | std::vector importance_sampling( 100 | RaySegmentsSpec ray_segments, 101 | torch::Tensor cdfs, 102 | torch::Tensor n_intervels_per_ray, 103 | bool stratified); 104 | std::vector importance_sampling( 105 | RaySegmentsSpec ray_segments, 106 | torch::Tensor cdfs, 107 | int64_t n_intervels_per_ray, 108 | bool stratified); 109 | std::vector searchsorted( 110 | RaySegmentsSpec query, 111 | RaySegmentsSpec key); 112 | 113 | // cameras 114 | torch::Tensor opencv_lens_undistortion( 115 | const torch::Tensor& uv, // [..., 2] 116 | const torch::Tensor& params, // [..., 6] 117 | const float eps, 118 | const int max_iterations); 119 | torch::Tensor opencv_lens_undistortion_fisheye( 120 | const torch::Tensor& uv, // [..., 2] 121 | const torch::Tensor& params, // [..., 4] 122 | const float criteria_eps, 123 | const int criteria_iters); 124 | 125 | 126 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 127 | #define _REG_FUNC(funname) m.def(#funname, &funname) 128 | _REG_FUNC(inclusive_sum); 129 | _REG_FUNC(exclusive_sum); 130 | _REG_FUNC(inclusive_prod_forward); 131 | _REG_FUNC(inclusive_prod_backward); 132 | _REG_FUNC(exclusive_prod_forward); 133 | _REG_FUNC(exclusive_prod_backward); 134 | 135 | _REG_FUNC(is_cub_available); 136 | _REG_FUNC(inclusive_sum_cub); 137 | _REG_FUNC(exclusive_sum_cub); 138 | _REG_FUNC(inclusive_prod_cub_forward); 139 | _REG_FUNC(inclusive_prod_cub_backward); 140 | _REG_FUNC(exclusive_prod_cub_forward); 141 | _REG_FUNC(exclusive_prod_cub_backward); 142 | 143 | _REG_FUNC(ray_aabb_intersect); 144 | _REG_FUNC(traverse_grids); 145 | _REG_FUNC(searchsorted); 146 | 147 | _REG_FUNC(opencv_lens_undistortion); 148 | _REG_FUNC(opencv_lens_undistortion_fisheye); // TODO: check this function. 149 | #undef _REG_FUNC 150 | 151 | m.def("importance_sampling", py::overload_cast(&importance_sampling)); 152 | m.def("importance_sampling", py::overload_cast(&importance_sampling)); 153 | 154 | py::class_(m, "RaySegmentsSpec") 155 | .def(py::init<>()) 156 | .def_readwrite("vals", &RaySegmentsSpec::vals) 157 | .def_readwrite("is_left", &RaySegmentsSpec::is_left) 158 | .def_readwrite("is_right", &RaySegmentsSpec::is_right) 159 | .def_readwrite("is_valid", &RaySegmentsSpec::is_valid) 160 | .def_readwrite("chunk_starts", &RaySegmentsSpec::chunk_starts) 161 | .def_readwrite("chunk_cnts", &RaySegmentsSpec::chunk_cnts) 162 | .def_readwrite("ray_indices", &RaySegmentsSpec::ray_indices); 163 | } -------------------------------------------------------------------------------- /nerfacc/data_specs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | from dataclasses import dataclass 5 | from typing import Optional 6 | 7 | import torch 8 | 9 | from . import cuda as _C 10 | 11 | 12 | @dataclass 13 | class RaySamples: 14 | """Ray samples that supports batched and flattened data. 15 | 16 | Note: 17 | When `vals` is flattened, either `packed_info` or `ray_indices` must 18 | be provided. 19 | 20 | Args: 21 | vals: Batched data with shape (n_rays, n_samples) or flattened data 22 | with shape (all_samples,) 23 | packed_info: Optional. A tensor of shape (n_rays, 2) that specifies 24 | the start and count of each chunk in flattened `vals`, with in 25 | total n_rays chunks. Only needed when `vals` is flattened. 26 | ray_indices: Optional. A tensor of shape (all_samples,) that specifies 27 | the ray index of each sample. Only needed when `vals` is flattened. 28 | 29 | Examples: 30 | 31 | .. code-block:: python 32 | 33 | >>> # Batched data 34 | >>> ray_samples = RaySamples(torch.rand(10, 100)) 35 | >>> # Flattened data 36 | >>> ray_samples = RaySamples( 37 | >>> torch.rand(1000), 38 | >>> packed_info=torch.tensor([[0, 100], [100, 200], [300, 700]]), 39 | >>> ) 40 | 41 | """ 42 | 43 | vals: torch.Tensor 44 | packed_info: Optional[torch.Tensor] = None 45 | ray_indices: Optional[torch.Tensor] = None 46 | is_valid: Optional[torch.Tensor] = None 47 | 48 | def _to_cpp(self): 49 | """ 50 | Generate object to pass to C++ 51 | """ 52 | 53 | spec = _C.RaySegmentsSpec() 54 | spec.vals = self.vals.contiguous() 55 | if self.packed_info is not None: 56 | spec.chunk_starts = self.packed_info[:, 0].contiguous() 57 | if self.chunk_cnts is not None: 58 | spec.chunk_cnts = self.packed_info[:, 1].contiguous() 59 | if self.ray_indices is not None: 60 | spec.ray_indices = self.ray_indices.contiguous() 61 | return spec 62 | 63 | @classmethod 64 | def _from_cpp(cls, spec): 65 | """ 66 | Generate object from C++ 67 | """ 68 | if spec.chunk_starts is not None and spec.chunk_cnts is not None: 69 | packed_info = torch.stack([spec.chunk_starts, spec.chunk_cnts], -1) 70 | else: 71 | packed_info = None 72 | ray_indices = spec.ray_indices 73 | if spec.is_valid is not None: 74 | is_valid = spec.is_valid 75 | else: 76 | is_valid = None 77 | vals = spec.vals 78 | return cls( 79 | vals=vals, 80 | packed_info=packed_info, 81 | ray_indices=ray_indices, 82 | is_valid=is_valid, 83 | ) 84 | 85 | @property 86 | def device(self) -> torch.device: 87 | return self.vals.device 88 | 89 | 90 | @dataclass 91 | class RayIntervals: 92 | """Ray intervals that supports batched and flattened data. 93 | 94 | Each interval is defined by two edges (left and right). The attribute `vals` 95 | stores the edges of all intervals along the rays. The attributes `is_left` 96 | and `is_right` are for indicating whether each edge is a left or right edge. 97 | This class unifies the representation of both continuous and non-continuous ray 98 | intervals. 99 | 100 | Note: 101 | When `vals` is flattened, either `packed_info` or `ray_indices` must 102 | be provided. Also both `is_left` and `is_right` must be provided. 103 | 104 | Args: 105 | vals: Batched data with shape (n_rays, n_edges) or flattened data 106 | with shape (all_edges,) 107 | packed_info: Optional. A tensor of shape (n_rays, 2) that specifies 108 | the start and count of each chunk in flattened `vals`, with in 109 | total n_rays chunks. Only needed when `vals` is flattened. 110 | ray_indices: Optional. A tensor of shape (all_edges,) that specifies 111 | the ray index of each edge. Only needed when `vals` is flattened. 112 | is_left: Optional. A boolen tensor of shape (all_edges,) that specifies 113 | whether each edge is a left edge. Only needed when `vals` is flattened. 114 | is_right: Optional. A boolen tensor of shape (all_edges,) that specifies 115 | whether each edge is a right edge. Only needed when `vals` is flattened. 116 | 117 | Examples: 118 | 119 | .. code-block:: python 120 | 121 | >>> # Batched data 122 | >>> ray_intervals = RayIntervals(torch.rand(10, 100)) 123 | >>> # Flattened data 124 | >>> ray_intervals = RayIntervals( 125 | >>> torch.rand(6), 126 | >>> packed_info=torch.tensor([[0, 2], [2, 0], [2, 4]]), 127 | >>> is_left=torch.tensor([True, False, True, True, True, False]), 128 | >>> is_right=torch.tensor([False, True, False, True, True, True]), 129 | >>> ) 130 | 131 | """ 132 | 133 | vals: torch.Tensor 134 | packed_info: Optional[torch.Tensor] = None 135 | ray_indices: Optional[torch.Tensor] = None 136 | is_left: Optional[torch.Tensor] = None 137 | is_right: Optional[torch.Tensor] = None 138 | 139 | def _to_cpp(self): 140 | """ 141 | Generate object to pass to C++ 142 | """ 143 | 144 | spec = _C.RaySegmentsSpec() 145 | spec.vals = self.vals.contiguous() 146 | if self.packed_info is not None: 147 | spec.chunk_starts = self.packed_info[:, 0].contiguous() 148 | if self.packed_info is not None: 149 | spec.chunk_cnts = self.packed_info[:, 1].contiguous() 150 | if self.ray_indices is not None: 151 | spec.ray_indices = self.ray_indices.contiguous() 152 | if self.is_left is not None: 153 | spec.is_left = self.is_left.contiguous() 154 | if self.is_right is not None: 155 | spec.is_right = self.is_right.contiguous() 156 | return spec 157 | 158 | @classmethod 159 | def _from_cpp(cls, spec): 160 | """ 161 | Generate object from C++ 162 | """ 163 | if spec.chunk_starts is not None and spec.chunk_cnts is not None: 164 | packed_info = torch.stack([spec.chunk_starts, spec.chunk_cnts], -1) 165 | else: 166 | packed_info = None 167 | ray_indices = spec.ray_indices 168 | is_left = spec.is_left 169 | is_right = spec.is_right 170 | return cls( 171 | vals=spec.vals, 172 | packed_info=packed_info, 173 | ray_indices=ray_indices, 174 | is_left=is_left, 175 | is_right=is_right, 176 | ) 177 | 178 | @property 179 | def device(self) -> torch.device: 180 | return self.vals.device 181 | -------------------------------------------------------------------------------- /nerfacc/estimators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/nerfacc/estimators/__init__.py -------------------------------------------------------------------------------- /nerfacc/estimators/base.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class AbstractEstimator(nn.Module): 8 | """An abstract Transmittance Estimator class for Sampling.""" 9 | 10 | def __init__(self) -> None: 11 | super().__init__() 12 | self.register_buffer("_dummy", torch.empty(0), persistent=False) 13 | 14 | @property 15 | def device(self) -> torch.device: 16 | return self._dummy.device 17 | 18 | def sampling(self, *args, **kwargs) -> Any: 19 | raise NotImplementedError 20 | 21 | def update_every_n_steps(self, *args, **kwargs) -> None: 22 | raise NotImplementedError 23 | -------------------------------------------------------------------------------- /nerfacc/losses.py: -------------------------------------------------------------------------------- 1 | from torch import Tensor 2 | 3 | from .scan import exclusive_sum 4 | from .volrend import accumulate_along_rays 5 | 6 | 7 | def distortion( 8 | weights: Tensor, 9 | t_starts: Tensor, 10 | t_ends: Tensor, 11 | ray_indices: Tensor, 12 | n_rays: int, 13 | ) -> Tensor: 14 | """Distortion Regularization proposed in Mip-NeRF 360. 15 | 16 | Args: 17 | weights: The flattened weights of the samples. Shape (n_samples,) 18 | t_starts: The start points of the samples. Shape (n_samples,) 19 | t_ends: The end points of the samples. Shape (n_samples,) 20 | ray_indices: The ray indices of the samples. LongTensor with shape (n_samples,) 21 | n_rays: The total number of rays. 22 | 23 | Returns: 24 | The per-ray distortion loss with the shape (n_rays, 1). 25 | """ 26 | assert ( 27 | weights.shape == t_starts.shape == t_ends.shape == ray_indices.shape 28 | ), ( 29 | f"the shape of the inputs are not the same: " 30 | f"weights {weights.shape}, t_starts {t_starts.shape}, " 31 | f"t_ends {t_ends.shape}, ray_indices {ray_indices.shape}" 32 | ) 33 | t_mids = 0.5 * (t_starts + t_ends) 34 | t_deltas = t_ends - t_starts 35 | loss_uni = (1 / 3) * (t_deltas * weights.pow(2)) 36 | loss_bi_0 = weights * t_mids * exclusive_sum(weights, indices=ray_indices) 37 | loss_bi_1 = weights * exclusive_sum(weights * t_mids, indices=ray_indices) 38 | loss_bi = 2 * (loss_bi_0 - loss_bi_1) 39 | loss = loss_uni + loss_bi 40 | loss = accumulate_along_rays(loss, None, ray_indices, n_rays) 41 | return loss 42 | -------------------------------------------------------------------------------- /nerfacc/pack.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | from typing import Optional 5 | 6 | import torch 7 | from torch import Tensor 8 | 9 | 10 | @torch.no_grad() 11 | def pack_info(ray_indices: Tensor, n_rays: Optional[int] = None) -> Tensor: 12 | """Pack `ray_indices` to `packed_info`. Useful for converting per sample data to per ray data. 13 | 14 | Note: 15 | this function is not differentiable to any inputs. 16 | 17 | Args: 18 | ray_indices: Ray indices of the samples. LongTensor with shape (n_sample). 19 | n_rays: Number of rays. If None, it is inferred from `ray_indices`. Default is None. 20 | 21 | Returns: 22 | A LongTensor of shape (n_rays, 2) that specifies the start and count 23 | of each chunk in the flattened input tensor, with in total n_rays chunks. 24 | 25 | Example: 26 | 27 | .. code-block:: python 28 | 29 | >>> ray_indices = torch.tensor([0, 0, 1, 1, 1, 2, 2, 2, 2], device="cuda") 30 | >>> packed_info = pack_info(ray_indices, n_rays=3) 31 | >>> packed_info 32 | tensor([[0, 2], [2, 3], [5, 4]], device='cuda:0') 33 | 34 | """ 35 | assert ( 36 | ray_indices.dim() == 1 37 | ), "ray_indices must be a 1D tensor with shape (n_samples)." 38 | if ray_indices.is_cuda: 39 | device = ray_indices.device 40 | dtype = ray_indices.dtype 41 | if n_rays is None: 42 | n_rays = ray_indices.max().item() + 1 43 | chunk_cnts = torch.zeros((n_rays,), device=device, dtype=dtype) 44 | chunk_cnts.index_add_(0, ray_indices, torch.ones_like(ray_indices)) 45 | chunk_starts = chunk_cnts.cumsum(dim=0, dtype=dtype) - chunk_cnts 46 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1) 47 | else: 48 | raise NotImplementedError("Only support cuda inputs.") 49 | return packed_info 50 | -------------------------------------------------------------------------------- /nerfacc/pdf.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | from typing import Tuple, Union 5 | 6 | import torch 7 | from torch import Tensor 8 | 9 | from . import cuda as _C 10 | from .data_specs import RayIntervals, RaySamples 11 | 12 | 13 | def searchsorted( 14 | sorted_sequence: Union[RayIntervals, RaySamples], 15 | values: Union[RayIntervals, RaySamples], 16 | ) -> Tuple[Tensor, Tensor]: 17 | """Searchsorted that supports flattened tensor. 18 | 19 | This function returns {`ids_left`, `ids_right`} such that: 20 | 21 | `sorted_sequence.vals.gather(-1, ids_left) <= values.vals < sorted_sequence.vals.gather(-1, ids_right)` 22 | 23 | Note: 24 | When values is out of range of sorted_sequence, we return the 25 | corresponding ids as if the values is clipped to the range of 26 | sorted_sequence. See the example below. 27 | 28 | Args: 29 | sorted_sequence: A :class:`RayIntervals` or :class:`RaySamples` object. We assume 30 | the `sorted_sequence.vals` is acendingly sorted for each ray. 31 | values: A :class:`RayIntervals` or :class:`RaySamples` object. 32 | 33 | Returns: 34 | A tuple of LongTensor: 35 | 36 | - **ids_left**: A LongTensor with the same shape as `values.vals`. 37 | - **ids_right**: A LongTensor with the same shape as `values.vals`. 38 | 39 | Example: 40 | >>> sorted_sequence = RayIntervals( 41 | ... vals=torch.tensor([0.0, 1.0, 0.0, 1.0, 2.0], device="cuda"), 42 | ... packed_info=torch.tensor([[0, 2], [2, 3]], device="cuda"), 43 | ... ) 44 | >>> values = RayIntervals( 45 | ... vals=torch.tensor([0.5, 1.5, 2.5], device="cuda"), 46 | ... packed_info=torch.tensor([[0, 1], [1, 2]], device="cuda"), 47 | ... ) 48 | >>> ids_left, ids_right = searchsorted(sorted_sequence, values) 49 | >>> ids_left 50 | tensor([0, 3, 3], device='cuda:0') 51 | >>> ids_right 52 | tensor([1, 4, 4], device='cuda:0') 53 | >>> sorted_sequence.vals.gather(-1, ids_left) 54 | tensor([0., 1., 1.], device='cuda:0') 55 | >>> sorted_sequence.vals.gather(-1, ids_right) 56 | tensor([1., 2., 2.], device='cuda:0') 57 | """ 58 | 59 | ids_left, ids_right = _C.searchsorted( 60 | values._to_cpp(), sorted_sequence._to_cpp() 61 | ) 62 | return ids_left, ids_right 63 | 64 | 65 | def importance_sampling( 66 | intervals: RayIntervals, 67 | cdfs: Tensor, 68 | n_intervals_per_ray: Union[Tensor, int], 69 | stratified: bool = False, 70 | ) -> Tuple[RayIntervals, RaySamples]: 71 | """Importance sampling that supports flattened tensor. 72 | 73 | Given a set of intervals and the corresponding CDFs at the interval edges, 74 | this function performs inverse transform sampling to create a new set of 75 | intervals and samples. Stratified sampling is also supported. 76 | 77 | Args: 78 | intervals: A :class:`RayIntervals` object that specifies the edges of the 79 | intervals along the rays. 80 | cdfs: The CDFs at the interval edges. It has the same shape as 81 | `intervals.vals`. 82 | n_intervals_per_ray: Resample each ray to have this many intervals. 83 | If it is a tensor, it must be of shape (n_rays,). If it is an int, 84 | it is broadcasted to all rays. 85 | stratified: If True, perform stratified sampling. 86 | 87 | Returns: 88 | A tuple of {:class:`RayIntervals`, :class:`RaySamples`}: 89 | 90 | - **intervals**: A :class:`RayIntervals` object. If `n_intervals_per_ray` is an int, \ 91 | `intervals.vals` will has the shape of (n_rays, n_intervals_per_ray + 1). \ 92 | If `n_intervals_per_ray` is a tensor, we assume each ray results \ 93 | in a different number of intervals. In this case, `intervals.vals` \ 94 | will has the shape of (all_edges,), the attributes `packed_info`, \ 95 | `ray_indices`, `is_left` and `is_right` will be accessable. 96 | 97 | - **samples**: A :class:`RaySamples` object. If `n_intervals_per_ray` is an int, \ 98 | `samples.vals` will has the shape of (n_rays, n_intervals_per_ray). \ 99 | If `n_intervals_per_ray` is a tensor, we assume each ray results \ 100 | in a different number of intervals. In this case, `samples.vals` \ 101 | will has the shape of (all_samples,), the attributes `packed_info` and \ 102 | `ray_indices` will be accessable. 103 | 104 | Example: 105 | 106 | .. code-block:: python 107 | 108 | >>> intervals = RayIntervals( 109 | ... vals=torch.tensor([0.0, 1.0, 0.0, 1.0, 2.0], device="cuda"), 110 | ... packed_info=torch.tensor([[0, 2], [2, 3]], device="cuda"), 111 | ... ) 112 | >>> cdfs = torch.tensor([0.0, 0.5, 0.0, 0.5, 1.0], device="cuda") 113 | >>> n_intervals_per_ray = 2 114 | >>> intervals, samples = importance_sampling(intervals, cdfs, n_intervals_per_ray) 115 | >>> intervals.vals 116 | tensor([[0.0000, 0.5000, 1.0000], 117 | [0.0000, 1.0000, 2.0000]], device='cuda:0') 118 | >>> samples.vals 119 | tensor([[0.2500, 0.7500], 120 | [0.5000, 1.5000]], device='cuda:0') 121 | 122 | """ 123 | if isinstance(n_intervals_per_ray, Tensor): 124 | n_intervals_per_ray = n_intervals_per_ray.contiguous() 125 | intervals, samples = _C.importance_sampling( 126 | intervals._to_cpp(), 127 | cdfs.contiguous(), 128 | n_intervals_per_ray, 129 | stratified, 130 | ) 131 | return RayIntervals._from_cpp(intervals), RaySamples._from_cpp(samples) 132 | 133 | 134 | def _sample_from_weighted( 135 | bins: Tensor, 136 | weights: Tensor, 137 | num_samples: int, 138 | stratified: bool = False, 139 | vmin: float = -torch.inf, 140 | vmax: float = torch.inf, 141 | ) -> Tuple[Tensor, Tensor]: 142 | import torch.nn.functional as F 143 | 144 | """ 145 | Args: 146 | bins: (..., B + 1). 147 | weights: (..., B). 148 | 149 | Returns: 150 | samples: (..., S + 1). 151 | """ 152 | B = weights.shape[-1] 153 | S = num_samples 154 | assert bins.shape[-1] == B + 1 155 | 156 | dtype, device = bins.dtype, bins.device 157 | eps = torch.finfo(weights.dtype).eps 158 | 159 | # (..., B). 160 | pdf = F.normalize(weights, p=1, dim=-1) 161 | # (..., B + 1). 162 | cdf = torch.cat( 163 | [ 164 | torch.zeros_like(pdf[..., :1]), 165 | torch.cumsum(pdf[..., :-1], dim=-1), 166 | torch.ones_like(pdf[..., :1]), 167 | ], 168 | dim=-1, 169 | ) 170 | 171 | # (..., S). Sample positions between [0, 1). 172 | if not stratified: 173 | pad = 1 / (2 * S) 174 | # Get the center of each pdf bins. 175 | u = torch.linspace(pad, 1 - pad - eps, S, dtype=dtype, device=device) 176 | u = u.broadcast_to(bins.shape[:-1] + (S,)) 177 | else: 178 | # `u` is in [0, 1) --- it can be zero, but it can never be 1. 179 | u_max = eps + (1 - eps) / S 180 | max_jitter = (1 - u_max) / (S - 1) - eps 181 | # Only perform one jittering per ray (`single_jitter` in the original 182 | # implementation.) 183 | u = ( 184 | torch.linspace(0, 1 - u_max, S, dtype=dtype, device=device) 185 | + torch.rand( 186 | *bins.shape[:-1], 187 | 1, 188 | dtype=dtype, 189 | device=device, 190 | ) 191 | * max_jitter 192 | ) 193 | 194 | # (..., S). 195 | ceil = torch.searchsorted(cdf.contiguous(), u.contiguous(), side="right") 196 | floor = ceil - 1 197 | # (..., S * 2). 198 | inds = torch.cat([floor, ceil], dim=-1) 199 | 200 | # (..., S). 201 | cdf0, cdf1 = cdf.gather(-1, inds).split(S, dim=-1) 202 | b0, b1 = bins.gather(-1, inds).split(S, dim=-1) 203 | 204 | # (..., S). Linear interpolation in 1D. 205 | t = (u - cdf0) / torch.clamp(cdf1 - cdf0, min=eps) 206 | # Sample centers. 207 | centers = b0 + t * (b1 - b0) 208 | 209 | samples = (centers[..., 1:] + centers[..., :-1]) / 2 210 | samples = torch.cat( 211 | [ 212 | (2 * centers[..., :1] - samples[..., :1]).clamp_min(vmin), 213 | samples, 214 | (2 * centers[..., -1:] - samples[..., -1:]).clamp_max(vmax), 215 | ], 216 | dim=-1, 217 | ) 218 | 219 | return samples, centers 220 | -------------------------------------------------------------------------------- /nerfacc/version.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley. 3 | """ 4 | 5 | __version__ = "0.5.3" 6 | -------------------------------------------------------------------------------- /scripts/run_aws_listing.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import defaultdict 3 | 4 | from boto3 import resource 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("--bucket", type=str, required=True) 8 | parser.add_argument("--region", type=str, required=True) 9 | args = parser.parse_args() 10 | 11 | ROOT_URL = f"https://{args.bucket}.s3.{args.region}.amazonaws.com/whl" 12 | html = "\n\n\n{}\n\n" 13 | href = ' {}
' 14 | html_args = { 15 | "ContentType": "text/html", 16 | "CacheControl": "max-age=300", 17 | "ACL": "public-read", 18 | } 19 | 20 | bucket = resource("s3").Bucket(name="nerfacc-bucket") 21 | 22 | wheels_dict = defaultdict(list) 23 | for obj in bucket.objects.filter(Prefix="whl"): 24 | if obj.key[-3:] != "whl": 25 | continue 26 | torch_version, wheel = obj.key.split("/")[-2:] 27 | wheel = f"{torch_version}/{wheel}" 28 | wheels_dict[torch_version].append(wheel) 29 | 30 | index_html = html.format( 31 | "\n".join( 32 | [ 33 | href.format( 34 | f"{torch_version}.html".replace("+", "%2B"), torch_version 35 | ) 36 | for torch_version in wheels_dict 37 | ] 38 | ) 39 | ) 40 | 41 | with open("index.html", "w") as f: 42 | f.write(index_html) 43 | bucket.Object("whl/index.html").upload_file("index.html", html_args) 44 | 45 | for torch_version, wheel_names in wheels_dict.items(): 46 | torch_version_html = html.format( 47 | "\n".join( 48 | [ 49 | href.format( 50 | f"{ROOT_URL}/{wheel_name}".replace("+", "%2B"), 51 | wheel_name.split("/")[-1], 52 | ) 53 | for wheel_name in wheel_names 54 | ] 55 | ) 56 | ) 57 | 58 | with open(f"{torch_version}.html", "w") as f: 59 | f.write(torch_version_html) 60 | bucket.Object(f"whl/{torch_version}.html").upload_file( 61 | f"{torch_version}.html", html_args 62 | ) 63 | -------------------------------------------------------------------------------- /scripts/run_dev_checks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Simple yaml debugger""" 3 | import subprocess 4 | 5 | import yaml 6 | from rich.console import Console 7 | from rich.style import Style 8 | 9 | console = Console(width=120) 10 | 11 | LOCAL_TESTS = [ 12 | "Run license checks", 13 | "Run isort", 14 | "Run Black", 15 | "Python Pylint", 16 | "Test with pytest", 17 | ] 18 | 19 | 20 | def run_command(command: str) -> bool: 21 | """Run a command kill actions if it fails 22 | 23 | Args: 24 | command: command to run 25 | continue_on_fail: whether to continue running commands if the current one fails. 26 | """ 27 | ret_code = subprocess.call(command, shell=True) 28 | if ret_code != 0: 29 | console.print(f"[bold red]Error: `{command}` failed.") 30 | return ret_code == 0 31 | 32 | 33 | def run_github_actions_file(filename: str): 34 | """Run a github actions file locally. 35 | 36 | Args: 37 | filename: Which yml github actions file to run. 38 | """ 39 | with open(filename, "rb") as f: 40 | my_dict = yaml.safe_load(f) 41 | steps = my_dict["jobs"]["build"]["steps"] 42 | 43 | success = True 44 | 45 | for step in steps: 46 | if "name" in step and step["name"] in LOCAL_TESTS: 47 | compressed = step["run"].replace("\n", ";").replace("\\", "") 48 | compressed = compressed.replace("--check", "") 49 | curr_command = f"{compressed}" 50 | 51 | console.line() 52 | console.rule(f"[bold green]Running: {curr_command}") 53 | success = success and run_command(curr_command) 54 | else: 55 | skip_name = step["name"] if "name" in step else step["uses"] 56 | console.print(f"Skipping {skip_name}") 57 | 58 | # Code Testing 59 | console.line() 60 | console.rule("[bold green]Running pytest") 61 | success = success and run_command("pytest") 62 | 63 | # Add checks for building documentation 64 | console.line() 65 | console.rule("[bold green]Building Documentation") 66 | success = success and run_command( 67 | "cd docs/; make clean; make html SPHINXOPTS='-W;'" 68 | ) 69 | 70 | if success: 71 | console.line() 72 | console.rule(characters="=") 73 | console.print( 74 | "[bold green]:TADA: :TADA: :TADA: ALL CHECKS PASSED :TADA: :TADA: :TADA:", 75 | justify="center", 76 | ) 77 | console.rule(characters="=") 78 | else: 79 | console.line() 80 | console.rule(characters="=", style=Style(color="red")) 81 | console.print( 82 | "[bold red]:skull: :skull: :skull: ERRORS FOUND :skull: :skull: :skull:", 83 | justify="center", 84 | ) 85 | console.rule(characters="=", style=Style(color="red")) 86 | 87 | 88 | if __name__ == "__main__": 89 | run_github_actions_file(filename=".github/workflows/code_checks.yml") 90 | -------------------------------------------------------------------------------- /scripts/run_profiler.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import torch 4 | import tqdm 5 | 6 | import nerfacc 7 | 8 | # timing 9 | # https://github.com/pytorch/pytorch/commit/d2784c233bfc57a1d836d961694bcc8ec4ed45e4 10 | 11 | 12 | class Profiler: 13 | def __init__(self, warmup=10, repeat=1000): 14 | self.warmup = warmup 15 | self.repeat = repeat 16 | 17 | def __call__(self, func: Callable): 18 | # warmup 19 | for _ in range(self.warmup): 20 | func() 21 | torch.cuda.synchronize() 22 | 23 | # profile 24 | with torch.profiler.profile( 25 | activities=[ 26 | torch.profiler.ProfilerActivity.CPU, 27 | torch.profiler.ProfilerActivity.CUDA, 28 | ], 29 | profile_memory=True, 30 | ) as prof: 31 | for _ in range(self.repeat): 32 | func() 33 | torch.cuda.synchronize() 34 | 35 | # return 36 | events = prof.key_averages() 37 | # print(events.table(sort_by="self_cpu_time_total", row_limit=10)) 38 | self_cpu_time_total = ( 39 | sum([event.self_cpu_time_total for event in events]) / self.repeat 40 | ) 41 | self_cuda_time_total = ( 42 | sum([event.self_cuda_time_total for event in events]) / self.repeat 43 | ) 44 | self_cuda_memory_usage = max( 45 | [event.self_cuda_memory_usage for event in events] 46 | ) 47 | return ( 48 | self_cpu_time_total, # in us 49 | self_cuda_time_total, # in us 50 | self_cuda_memory_usage, # in bytes 51 | ) 52 | 53 | 54 | def main(): 55 | device = "cuda:0" 56 | torch.manual_seed(42) 57 | profiler = Profiler(warmup=10, repeat=100) 58 | 59 | # # contract 60 | # print("* contract") 61 | # x = torch.rand([1024, 3], device=device) 62 | # roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device) 63 | # fn = lambda: nerfacc.contract( 64 | # x, roi=roi, type=nerfacc.ContractionType.UN_BOUNDED_TANH 65 | # ) 66 | # cpu_t, cuda_t, cuda_bytes = profiler(fn) 67 | # print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB") 68 | 69 | # rendering 70 | print("* rendering") 71 | batch_size = 81920 72 | rays_o = torch.rand((batch_size, 3), device=device) 73 | rays_d = torch.randn((batch_size, 3), device=device) 74 | rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) 75 | 76 | ray_indices, t_starts, t_ends = nerfacc._ray_marching( 77 | rays_o, 78 | rays_d, 79 | near_plane=0.1, 80 | far_plane=1.0, 81 | render_step_size=1e-1, 82 | ) 83 | sigmas = torch.randn_like(t_starts, requires_grad=True) 84 | fn = ( 85 | lambda: nerfacc.render_weight_from_density( 86 | ray_indices, t_starts, t_ends, sigmas 87 | ) 88 | .sum() 89 | .backward() 90 | ) 91 | fn() 92 | torch.cuda.synchronize() 93 | for _ in tqdm.tqdm(range(100)): 94 | fn() 95 | torch.cuda.synchronize() 96 | 97 | cpu_t, cuda_t, cuda_bytes = profiler(fn) 98 | print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB") 99 | 100 | packed_info = nerfacc.pack_info(ray_indices, n_rays=batch_size) 101 | fn = ( 102 | lambda: nerfacc._vol_rendering._RenderingDensity.apply( 103 | packed_info, t_starts, t_ends, sigmas, 0 104 | ) 105 | .sum() 106 | .backward() 107 | ) 108 | fn() 109 | torch.cuda.synchronize() 110 | for _ in tqdm.tqdm(range(100)): 111 | fn() 112 | torch.cuda.synchronize() 113 | cpu_t, cuda_t, cuda_bytes = profiler(fn) 114 | print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB") 115 | 116 | 117 | if __name__ == "__main__": 118 | main() 119 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | multi_line_output = 3 3 | line_length = 80 4 | include_trailing_comma = true 5 | skip=./examples/pycolmap,./benchmarks -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import os.path as osp 4 | import platform 5 | import sys 6 | 7 | from setuptools import find_packages, setup 8 | 9 | __version__ = None 10 | exec(open("nerfacc/version.py", "r").read()) 11 | 12 | URL = "https://github.com/nerfstudio-project/nerfacc" 13 | 14 | BUILD_NO_CUDA = os.getenv("BUILD_NO_CUDA", "0") == "1" 15 | WITH_SYMBOLS = os.getenv("WITH_SYMBOLS", "0") == "1" 16 | 17 | 18 | def get_ext(): 19 | from torch.utils.cpp_extension import BuildExtension 20 | 21 | return BuildExtension.with_options( 22 | no_python_abi_suffix=True, use_ninja=False 23 | ) 24 | 25 | 26 | def get_extensions(): 27 | import torch 28 | from torch.__config__ import parallel_info 29 | from torch.utils.cpp_extension import CUDAExtension 30 | 31 | extensions_dir = osp.join("nerfacc", "cuda", "csrc") 32 | sources = glob.glob(osp.join(extensions_dir, "*.cu")) + glob.glob( 33 | osp.join(extensions_dir, "*.cpp") 34 | ) 35 | # remove generated 'hip' files, in case of rebuilds 36 | sources = [path for path in sources if "hip" not in path] 37 | 38 | undef_macros = [] 39 | define_macros = [] 40 | 41 | if sys.platform == "win32": 42 | define_macros += [("nerfacc_EXPORTS", None)] 43 | 44 | extra_compile_args = {"cxx": ["-O3"]} 45 | if not os.name == "nt": # Not on Windows: 46 | extra_compile_args["cxx"] += ["-Wno-sign-compare"] 47 | extra_link_args = [] if WITH_SYMBOLS else ["-s"] 48 | 49 | info = parallel_info() 50 | if ( 51 | "backend: OpenMP" in info 52 | and "OpenMP not found" not in info 53 | and sys.platform != "darwin" 54 | ): 55 | extra_compile_args["cxx"] += ["-DAT_PARALLEL_OPENMP"] 56 | if sys.platform == "win32": 57 | extra_compile_args["cxx"] += ["/openmp"] 58 | else: 59 | extra_compile_args["cxx"] += ["-fopenmp"] 60 | else: 61 | print("Compiling without OpenMP...") 62 | 63 | # Compile for mac arm64 64 | if sys.platform == "darwin" and platform.machine() == "arm64": 65 | extra_compile_args["cxx"] += ["-arch", "arm64"] 66 | extra_link_args += ["-arch", "arm64"] 67 | 68 | nvcc_flags = os.getenv("NVCC_FLAGS", "") 69 | nvcc_flags = [] if nvcc_flags == "" else nvcc_flags.split(" ") 70 | nvcc_flags += ["-O3"] 71 | if torch.version.hip: 72 | # USE_ROCM was added to later versions of PyTorch. 73 | # Define here to support older PyTorch versions as well: 74 | define_macros += [("USE_ROCM", None)] 75 | undef_macros += ["__HIP_NO_HALF_CONVERSIONS__"] 76 | else: 77 | nvcc_flags += ["--expt-relaxed-constexpr"] 78 | extra_compile_args["nvcc"] = nvcc_flags 79 | 80 | extension = CUDAExtension( 81 | f"nerfacc.csrc", 82 | sources, 83 | include_dirs=[osp.join(extensions_dir, "include")], 84 | define_macros=define_macros, 85 | undef_macros=undef_macros, 86 | extra_compile_args=extra_compile_args, 87 | extra_link_args=extra_link_args, 88 | ) 89 | 90 | return [extension] 91 | 92 | 93 | # work-around hipify abs paths 94 | include_package_data = True 95 | # if torch.cuda.is_available() and torch.version.hip: 96 | # include_package_data = False 97 | 98 | setup( 99 | name="nerfacc", 100 | version=__version__, 101 | description="A General NeRF Acceleration Toolbox", 102 | author="Ruilong", 103 | author_email="ruilongli94@gmail.com", 104 | url=URL, 105 | download_url=f"{URL}/archive/{__version__}.tar.gz", 106 | keywords=[], 107 | python_requires=">=3.7", 108 | install_requires=[ 109 | "rich>=12", 110 | "torch", 111 | "typing_extensions; python_version<'3.8'", 112 | ], 113 | extras_require={ 114 | # dev dependencies. Install them by `pip install nerfacc[dev]` 115 | "dev": [ 116 | "black[jupyter]==22.3.0", 117 | "isort==5.10.1", 118 | "pylint==2.13.4", 119 | "pytest==7.1.2", 120 | "pytest-xdist==2.5.0", 121 | "typeguard>=2.13.3", 122 | "pyyaml==6.0", 123 | "build", 124 | "twine", 125 | "ninja", 126 | ], 127 | }, 128 | ext_modules=get_extensions() if not BUILD_NO_CUDA else [], 129 | cmdclass={"build_ext": get_ext()} if not BUILD_NO_CUDA else {}, 130 | packages=find_packages(), 131 | include_package_data=include_package_data, 132 | ) 133 | -------------------------------------------------------------------------------- /tests/test_camera.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import pytest 4 | import torch 5 | import tqdm 6 | from torch import Tensor 7 | 8 | device = "cuda:0" 9 | 10 | 11 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 12 | @torch.no_grad() 13 | def test_opencv_lens_undistortion(): 14 | from nerfacc.cameras import ( 15 | _opencv_lens_distortion, 16 | _opencv_lens_distortion_fisheye, 17 | _opencv_lens_undistortion, 18 | opencv_lens_undistortion, 19 | opencv_lens_undistortion_fisheye, 20 | ) 21 | 22 | torch.manual_seed(42) 23 | 24 | x = torch.rand((3, 1000, 2), device=device) 25 | 26 | params = torch.rand((8), device=device) * 0.01 27 | x_undistort = opencv_lens_undistortion(x, params, 1e-5, 10) 28 | _x_undistort = _opencv_lens_undistortion(x, params, 1e-5, 10) 29 | assert torch.allclose(x_undistort, _x_undistort, atol=1e-5) 30 | x_distort = _opencv_lens_distortion(x_undistort, params) 31 | assert torch.allclose(x, x_distort, atol=1e-5), (x - x_distort).abs().max() 32 | # print(x[0, 0], x_distort[0, 0], x_undistort[0, 0]) 33 | 34 | params = torch.rand((4), device=device) * 0.01 35 | x_undistort = opencv_lens_undistortion_fisheye(x, params, 1e-5, 10) 36 | x_distort = _opencv_lens_distortion_fisheye(x_undistort, params) 37 | assert torch.allclose(x, x_distort, atol=1e-5), (x - x_distort).abs().max() 38 | # print(x[0, 0], x_distort[0, 0], x_undistort[0, 0]) 39 | 40 | 41 | if __name__ == "__main__": 42 | test_opencv_lens_undistortion() 43 | -------------------------------------------------------------------------------- /tests/test_pack.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | device = "cuda:0" 5 | 6 | 7 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 8 | def test_pack_info(): 9 | from nerfacc.pack import pack_info 10 | 11 | _packed_info = torch.tensor( 12 | [[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device 13 | ) 14 | ray_indices = torch.tensor( 15 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device 16 | ) 17 | packed_info = pack_info(ray_indices, n_rays=_packed_info.shape[0]) 18 | assert (packed_info == _packed_info).all() 19 | 20 | 21 | if __name__ == "__main__": 22 | test_pack_info() 23 | -------------------------------------------------------------------------------- /tests/test_pdf.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | device = "cuda:0" 5 | 6 | 7 | def _create_intervals(n_rays, n_samples, flat=False): 8 | from nerfacc.data_specs import RayIntervals 9 | 10 | torch.manual_seed(42) 11 | vals = torch.rand((n_rays, n_samples + 1), device=device) 12 | vals = torch.sort(vals, -1)[0] 13 | 14 | sample_masks = torch.rand((n_rays, n_samples), device=device) > 0.5 15 | is_lefts = torch.cat( 16 | [ 17 | sample_masks, 18 | torch.zeros((n_rays, 1), device=device, dtype=torch.bool), 19 | ], 20 | dim=-1, 21 | ) 22 | is_rights = torch.cat( 23 | [ 24 | torch.zeros((n_rays, 1), device=device, dtype=torch.bool), 25 | sample_masks, 26 | ], 27 | dim=-1, 28 | ) 29 | if not flat: 30 | return RayIntervals(vals=vals) 31 | else: 32 | interval_masks = is_lefts | is_rights 33 | vals = vals[interval_masks] 34 | is_lefts = is_lefts[interval_masks] 35 | is_rights = is_rights[interval_masks] 36 | chunk_cnts = (interval_masks).long().sum(-1) 37 | chunk_starts = torch.cumsum(chunk_cnts, 0) - chunk_cnts 38 | packed_info = torch.stack([chunk_starts, chunk_cnts], -1) 39 | 40 | return RayIntervals( 41 | vals, packed_info, is_left=is_lefts, is_right=is_rights 42 | ) 43 | 44 | 45 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 46 | def test_searchsorted(): 47 | from nerfacc.data_specs import RayIntervals 48 | from nerfacc.pdf import searchsorted 49 | 50 | torch.manual_seed(42) 51 | query: RayIntervals = _create_intervals(10, 100, flat=False) 52 | key: RayIntervals = _create_intervals(10, 100, flat=False) 53 | 54 | ids_left, ids_right = searchsorted(key, query) 55 | y = key.vals.gather(-1, ids_right) 56 | 57 | _ids_right = torch.searchsorted(key.vals, query.vals, right=True) 58 | _ids_right = torch.clamp(_ids_right, 0, key.vals.shape[-1] - 1) 59 | _y = key.vals.gather(-1, _ids_right) 60 | 61 | assert torch.allclose(ids_right, _ids_right) 62 | assert torch.allclose(y, _y) 63 | 64 | 65 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 66 | def test_importance_sampling(): 67 | from nerfacc.data_specs import RayIntervals 68 | from nerfacc.pdf import _sample_from_weighted, importance_sampling 69 | 70 | torch.manual_seed(42) 71 | intervals: RayIntervals = _create_intervals(5, 100, flat=False) 72 | cdfs = torch.rand_like(intervals.vals) 73 | cdfs = torch.sort(cdfs, -1)[0] 74 | n_intervels_per_ray = 100 75 | stratified = False 76 | 77 | _intervals, _samples = importance_sampling( 78 | intervals, 79 | cdfs, 80 | n_intervels_per_ray, 81 | stratified, 82 | ) 83 | 84 | for i in range(intervals.vals.shape[0]): 85 | _vals, _mids = _sample_from_weighted( 86 | intervals.vals[i : i + 1], 87 | cdfs[i : i + 1, 1:] - cdfs[i : i + 1, :-1], 88 | n_intervels_per_ray, 89 | stratified, 90 | intervals.vals[i].min(), 91 | intervals.vals[i].max(), 92 | ) 93 | assert torch.allclose(_intervals.vals[i : i + 1], _vals, atol=1e-4) 94 | assert torch.allclose(_samples.vals[i : i + 1], _mids, atol=1e-4) 95 | 96 | 97 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 98 | def test_pdf_loss(): 99 | from nerfacc.data_specs import RayIntervals 100 | from nerfacc.estimators.prop_net import _lossfun_outer, _pdf_loss 101 | from nerfacc.pdf import _sample_from_weighted, importance_sampling 102 | 103 | torch.manual_seed(42) 104 | intervals: RayIntervals = _create_intervals(5, 100, flat=False) 105 | cdfs = torch.rand_like(intervals.vals) 106 | cdfs = torch.sort(cdfs, -1)[0] 107 | n_intervels_per_ray = 10 108 | stratified = False 109 | 110 | _intervals, _samples = importance_sampling( 111 | intervals, 112 | cdfs, 113 | n_intervels_per_ray, 114 | stratified, 115 | ) 116 | _cdfs = torch.rand_like(_intervals.vals) 117 | _cdfs = torch.sort(_cdfs, -1)[0] 118 | 119 | loss = _pdf_loss(intervals, cdfs, _intervals, _cdfs) 120 | 121 | loss2 = _lossfun_outer( 122 | intervals.vals, 123 | cdfs[:, 1:] - cdfs[:, :-1], 124 | _intervals.vals, 125 | _cdfs[:, 1:] - _cdfs[:, :-1], 126 | ) 127 | assert torch.allclose(loss, loss2, atol=1e-4) 128 | 129 | 130 | if __name__ == "__main__": 131 | test_importance_sampling() 132 | test_searchsorted() 133 | test_pdf_loss() 134 | -------------------------------------------------------------------------------- /tests/test_rendering.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | device = "cuda:0" 5 | 6 | 7 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 8 | def test_render_visibility(): 9 | from nerfacc.volrend import render_visibility_from_alpha 10 | 11 | ray_indices = torch.tensor( 12 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device 13 | ) # (all_samples,) 14 | alphas = torch.tensor( 15 | [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device 16 | ) # (all_samples,) 17 | 18 | # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028] 19 | vis = render_visibility_from_alpha( 20 | alphas, ray_indices=ray_indices, early_stop_eps=0.03, alpha_thre=0.0 21 | ) 22 | vis_tgt = torch.tensor( 23 | [True, True, True, True, False], dtype=torch.bool, device=device 24 | ) 25 | assert torch.allclose(vis, vis_tgt) 26 | 27 | # transmittance: [1.0, 1.0, 1.0, 0.2, 0.04] 28 | vis = render_visibility_from_alpha( 29 | alphas, ray_indices=ray_indices, early_stop_eps=0.05, alpha_thre=0.35 30 | ) 31 | vis_tgt = torch.tensor( 32 | [True, False, True, True, False], dtype=torch.bool, device=device 33 | ) 34 | assert torch.allclose(vis, vis_tgt) 35 | 36 | 37 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 38 | def test_render_weight_from_alpha(): 39 | from nerfacc.volrend import render_weight_from_alpha 40 | 41 | ray_indices = torch.tensor( 42 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device 43 | ) # (all_samples,) 44 | alphas = torch.tensor( 45 | [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device 46 | ) # (all_samples,) 47 | 48 | # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028] 49 | weights, _ = render_weight_from_alpha( 50 | alphas, ray_indices=ray_indices, n_rays=3 51 | ) 52 | weights_tgt = torch.tensor( 53 | [1.0 * 0.4, 1.0 * 0.3, 0.7 * 0.8, 0.14 * 0.8, 0.028 * 0.5], 54 | dtype=torch.float32, 55 | device=device, 56 | ) 57 | assert torch.allclose(weights, weights_tgt) 58 | 59 | 60 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 61 | def test_render_weight_from_density(): 62 | from nerfacc.volrend import ( 63 | render_weight_from_alpha, 64 | render_weight_from_density, 65 | ) 66 | 67 | ray_indices = torch.tensor( 68 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device 69 | ) # (all_samples,) 70 | sigmas = torch.rand( 71 | (ray_indices.shape[0],), device=device 72 | ) # (all_samples,) 73 | t_starts = torch.rand_like(sigmas) 74 | t_ends = torch.rand_like(sigmas) + 1.0 75 | alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) 76 | 77 | weights, _, _ = render_weight_from_density( 78 | t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3 79 | ) 80 | weights_tgt, _ = render_weight_from_alpha( 81 | alphas, ray_indices=ray_indices, n_rays=3 82 | ) 83 | assert torch.allclose(weights, weights_tgt) 84 | 85 | 86 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 87 | def test_accumulate_along_rays(): 88 | from nerfacc.volrend import accumulate_along_rays 89 | 90 | ray_indices = torch.tensor( 91 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device 92 | ) # (all_samples,) 93 | weights = torch.tensor( 94 | [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device 95 | ) # (all_samples,) 96 | values = torch.rand((5, 2), device=device) # (all_samples, 2) 97 | 98 | ray_values = accumulate_along_rays( 99 | weights, values=values, ray_indices=ray_indices, n_rays=3 100 | ) 101 | assert ray_values.shape == (3, 2) 102 | assert torch.allclose(ray_values[0, :], weights[0, None] * values[0, :]) 103 | assert (ray_values[1, :] == 0).all() 104 | assert torch.allclose( 105 | ray_values[2, :], (weights[1:, None] * values[1:]).sum(dim=0) 106 | ) 107 | 108 | 109 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 110 | def test_grads(): 111 | from nerfacc.volrend import ( 112 | render_transmittance_from_density, 113 | render_weight_from_alpha, 114 | render_weight_from_density, 115 | ) 116 | 117 | ray_indices = torch.tensor( 118 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device 119 | ) # (all_samples,) 120 | packed_info = torch.tensor( 121 | [[0, 1], [1, 0], [1, 4]], dtype=torch.long, device=device 122 | ) 123 | sigmas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1], device=device) 124 | sigmas.requires_grad = True 125 | t_starts = torch.rand_like(sigmas) 126 | t_ends = t_starts + 1.0 127 | 128 | weights_ref = torch.tensor( 129 | [0.3297, 0.5507, 0.0428, 0.2239, 0.0174], device=device 130 | ) 131 | sigmas_grad_ref = torch.tensor( 132 | [0.6703, 0.1653, 0.1653, 0.1653, 0.1653], device=device 133 | ) 134 | 135 | # naive impl. trans from sigma 136 | trans, _ = render_transmittance_from_density( 137 | t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3 138 | ) 139 | weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts))) 140 | weights.sum().backward() 141 | sigmas_grad = sigmas.grad.clone() 142 | sigmas.grad.zero_() 143 | assert torch.allclose(weights_ref, weights, atol=1e-4) 144 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) 145 | 146 | # naive impl. trans from alpha 147 | trans, _ = render_transmittance_from_density( 148 | t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3 149 | ) 150 | weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts))) 151 | weights.sum().backward() 152 | sigmas_grad = sigmas.grad.clone() 153 | sigmas.grad.zero_() 154 | assert torch.allclose(weights_ref, weights, atol=1e-4) 155 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) 156 | 157 | weights, _, _ = render_weight_from_density( 158 | t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3 159 | ) 160 | weights.sum().backward() 161 | sigmas_grad = sigmas.grad.clone() 162 | sigmas.grad.zero_() 163 | assert torch.allclose(weights_ref, weights, atol=1e-4) 164 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) 165 | 166 | weights, _, _ = render_weight_from_density( 167 | t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3 168 | ) 169 | weights.sum().backward() 170 | sigmas_grad = sigmas.grad.clone() 171 | sigmas.grad.zero_() 172 | assert torch.allclose(weights_ref, weights, atol=1e-4) 173 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) 174 | 175 | alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) 176 | weights, _ = render_weight_from_alpha( 177 | alphas, ray_indices=ray_indices, n_rays=3 178 | ) 179 | weights.sum().backward() 180 | sigmas_grad = sigmas.grad.clone() 181 | sigmas.grad.zero_() 182 | assert torch.allclose(weights_ref, weights, atol=1e-4) 183 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) 184 | 185 | alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts)) 186 | weights, _ = render_weight_from_alpha( 187 | alphas, packed_info=packed_info, n_rays=3 188 | ) 189 | weights.sum().backward() 190 | sigmas_grad = sigmas.grad.clone() 191 | sigmas.grad.zero_() 192 | assert torch.allclose(weights_ref, weights, atol=1e-4) 193 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4) 194 | 195 | 196 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 197 | def test_rendering(): 198 | from nerfacc.volrend import rendering 199 | 200 | def rgb_sigma_fn(t_starts, t_ends, ray_indices): 201 | return torch.stack([t_starts] * 3, dim=-1), t_starts 202 | 203 | ray_indices = torch.tensor( 204 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device 205 | ) # (all_samples,) 206 | sigmas = torch.rand( 207 | (ray_indices.shape[0],), device=device 208 | ) # (all_samples,) 209 | t_starts = torch.rand_like(sigmas) 210 | t_ends = torch.rand_like(sigmas) + 1.0 211 | 212 | _, _, _, _ = rendering( 213 | t_starts, 214 | t_ends, 215 | ray_indices=ray_indices, 216 | n_rays=3, 217 | rgb_sigma_fn=rgb_sigma_fn, 218 | ) 219 | 220 | 221 | if __name__ == "__main__": 222 | test_render_visibility() 223 | test_render_weight_from_alpha() 224 | test_render_weight_from_density() 225 | test_accumulate_along_rays() 226 | test_grads() 227 | test_rendering() 228 | -------------------------------------------------------------------------------- /tests/test_scan.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | 4 | device = "cuda:0" 5 | 6 | 7 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 8 | def test_inclusive_sum(): 9 | from nerfacc.scan import inclusive_sum 10 | 11 | torch.manual_seed(42) 12 | 13 | data = torch.rand((5, 1000), device=device, requires_grad=True) 14 | outputs1 = inclusive_sum(data) 15 | outputs1 = outputs1.flatten() 16 | outputs1.sum().backward() 17 | grad1 = data.grad.clone() 18 | data.grad.zero_() 19 | 20 | chunk_starts = torch.arange( 21 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long 22 | ) 23 | chunk_cnts = torch.full( 24 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device 25 | ) 26 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1) 27 | flatten_data = data.flatten() 28 | outputs2 = inclusive_sum(flatten_data, packed_info=packed_info) 29 | outputs2.sum().backward() 30 | grad2 = data.grad.clone() 31 | data.grad.zero_() 32 | 33 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long) 34 | indices = indices.repeat_interleave(data.shape[1]) 35 | indices = indices.flatten() 36 | outputs3 = inclusive_sum(flatten_data, indices=indices) 37 | outputs3.sum().backward() 38 | grad3 = data.grad.clone() 39 | data.grad.zero_() 40 | 41 | assert torch.allclose(outputs1, outputs2) 42 | assert torch.allclose(grad1, grad2) 43 | 44 | assert torch.allclose(outputs1, outputs3) 45 | assert torch.allclose(grad1, grad3) 46 | 47 | 48 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 49 | def test_exclusive_sum(): 50 | from nerfacc.scan import exclusive_sum 51 | 52 | torch.manual_seed(42) 53 | 54 | data = torch.rand((5, 1000), device=device, requires_grad=True) 55 | outputs1 = exclusive_sum(data) 56 | outputs1 = outputs1.flatten() 57 | outputs1.sum().backward() 58 | grad1 = data.grad.clone() 59 | data.grad.zero_() 60 | 61 | chunk_starts = torch.arange( 62 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long 63 | ) 64 | chunk_cnts = torch.full( 65 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device 66 | ) 67 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1) 68 | flatten_data = data.flatten() 69 | outputs2 = exclusive_sum(flatten_data, packed_info=packed_info) 70 | outputs2.sum().backward() 71 | grad2 = data.grad.clone() 72 | data.grad.zero_() 73 | 74 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long) 75 | indices = indices.repeat_interleave(data.shape[1]) 76 | indices = indices.flatten() 77 | outputs3 = exclusive_sum(flatten_data, indices=indices) 78 | outputs3.sum().backward() 79 | grad3 = data.grad.clone() 80 | data.grad.zero_() 81 | 82 | # TODO: check exclusive sum. numeric error? 83 | # print((outputs1 - outputs2).abs().max()) # 0.0002 84 | assert torch.allclose(outputs1, outputs2, atol=3e-4) 85 | assert torch.allclose(grad1, grad2) 86 | 87 | assert torch.allclose(outputs1, outputs3, atol=3e-4) 88 | assert torch.allclose(grad1, grad3) 89 | 90 | 91 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 92 | def test_inclusive_prod(): 93 | from nerfacc.scan import inclusive_prod 94 | 95 | torch.manual_seed(42) 96 | 97 | data = torch.rand((5, 1000), device=device, requires_grad=True) 98 | outputs1 = inclusive_prod(data) 99 | outputs1 = outputs1.flatten() 100 | outputs1.sum().backward() 101 | grad1 = data.grad.clone() 102 | data.grad.zero_() 103 | 104 | chunk_starts = torch.arange( 105 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long 106 | ) 107 | chunk_cnts = torch.full( 108 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device 109 | ) 110 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1) 111 | flatten_data = data.flatten() 112 | outputs2 = inclusive_prod(flatten_data, packed_info=packed_info) 113 | outputs2.sum().backward() 114 | grad2 = data.grad.clone() 115 | data.grad.zero_() 116 | 117 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long) 118 | indices = indices.repeat_interleave(data.shape[1]) 119 | indices = indices.flatten() 120 | outputs3 = inclusive_prod(flatten_data, indices=indices) 121 | outputs3.sum().backward() 122 | grad3 = data.grad.clone() 123 | data.grad.zero_() 124 | 125 | assert torch.allclose(outputs1, outputs2) 126 | assert torch.allclose(grad1, grad2) 127 | 128 | assert torch.allclose(outputs1, outputs3) 129 | assert torch.allclose(grad1, grad3) 130 | 131 | 132 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 133 | def test_exclusive_prod(): 134 | from nerfacc.scan import exclusive_prod 135 | 136 | torch.manual_seed(42) 137 | 138 | data = torch.rand((5, 1000), device=device, requires_grad=True) 139 | outputs1 = exclusive_prod(data) 140 | outputs1 = outputs1.flatten() 141 | outputs1.sum().backward() 142 | grad1 = data.grad.clone() 143 | data.grad.zero_() 144 | 145 | chunk_starts = torch.arange( 146 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long 147 | ) 148 | chunk_cnts = torch.full( 149 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device 150 | ) 151 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1) 152 | flatten_data = data.flatten() 153 | outputs2 = exclusive_prod(flatten_data, packed_info=packed_info) 154 | outputs2.sum().backward() 155 | grad2 = data.grad.clone() 156 | data.grad.zero_() 157 | 158 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long) 159 | indices = indices.repeat_interleave(data.shape[1]) 160 | indices = indices.flatten() 161 | outputs3 = exclusive_prod(flatten_data, indices=indices) 162 | outputs3.sum().backward() 163 | grad3 = data.grad.clone() 164 | data.grad.zero_() 165 | 166 | # TODO: check exclusive sum. numeric error? 167 | # print((outputs1 - outputs2).abs().max()) 168 | assert torch.allclose(outputs1, outputs2) 169 | assert torch.allclose(grad1, grad2) 170 | 171 | assert torch.allclose(outputs1, outputs3) 172 | assert torch.allclose(grad1, grad3) 173 | 174 | 175 | def profile(): 176 | import tqdm 177 | 178 | from nerfacc.scan import inclusive_sum 179 | 180 | torch.manual_seed(42) 181 | 182 | data = torch.rand((8192, 8192), device=device, requires_grad=True) 183 | 184 | chunk_starts = torch.arange( 185 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long 186 | ) 187 | chunk_cnts = torch.full( 188 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device 189 | ) 190 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1) 191 | flatten_data = data.flatten() 192 | torch.cuda.synchronize() 193 | for _ in tqdm.trange(2000): 194 | outputs2 = inclusive_sum(flatten_data, packed_info=packed_info) 195 | outputs2.sum().backward() 196 | 197 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long) 198 | indices = indices.repeat_interleave(data.shape[1]) 199 | indices = indices.flatten() 200 | torch.cuda.synchronize() 201 | for _ in tqdm.trange(2000): 202 | outputs3 = inclusive_sum(flatten_data, indices=indices) 203 | outputs3.sum().backward() 204 | 205 | 206 | if __name__ == "__main__": 207 | test_inclusive_sum() 208 | test_exclusive_sum() 209 | test_inclusive_prod() 210 | test_exclusive_prod() 211 | profile() 212 | -------------------------------------------------------------------------------- /tests/test_vdb.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import pytest 4 | import torch 5 | 6 | device = "cuda:0" 7 | 8 | 9 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 10 | def test_traverse_vdbs(): 11 | try: 12 | import fvdb 13 | except ImportError: 14 | warnings.warn("fVDB is not installed. Skip the test.") 15 | return 16 | 17 | from nerfacc.estimators.vdb import traverse_vdbs 18 | from nerfacc.grid import _enlarge_aabb, traverse_grids 19 | 20 | torch.manual_seed(42) 21 | n_rays = 100 22 | n_aabbs = 1 23 | reso = 32 24 | cone_angle = 1e-3 25 | 26 | rays_o = torch.randn((n_rays, 3), device=device) 27 | rays_d = torch.randn((n_rays, 3), device=device) 28 | rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) 29 | 30 | base_aabb = torch.tensor([-1.0, -1.0, -1.0, 1.0, 1.0, 1.0], device=device) 31 | aabbs = torch.stack( 32 | [_enlarge_aabb(base_aabb, 2**i) for i in range(n_aabbs)] 33 | ) 34 | 35 | # Traverse 1-level cascaded grid 36 | binaries = torch.rand((n_aabbs, reso, reso, reso), device=device) > 0.5 37 | 38 | intervals, samples, _ = traverse_grids( 39 | rays_o, rays_d, binaries, aabbs, cone_angle=cone_angle 40 | ) 41 | ray_indices = samples.ray_indices 42 | t_starts = intervals.vals[intervals.is_left] 43 | t_ends = intervals.vals[intervals.is_right] 44 | 45 | # Traverse a single fvdb grid 46 | grid = fvdb.GridBatch(device=device) 47 | voxel_sizes = (aabbs[:, 3:] - aabbs[:, :3]) / reso 48 | origins = aabbs[:, :3] + voxel_sizes / 2 49 | ijks = torch.stack(torch.where(binaries[0]), dim=-1) 50 | grid.set_from_ijk(ijks, voxel_sizes=voxel_sizes, origins=origins) 51 | 52 | t_starts, t_ends, ray_indices = traverse_vdbs( 53 | rays_o, rays_d, grid, cone_angle=cone_angle 54 | ) 55 | 56 | assert torch.allclose(t_starts, t_starts) 57 | assert torch.allclose(t_ends, t_ends) 58 | assert torch.all(ray_indices == ray_indices) 59 | 60 | 61 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device") 62 | def test_base_vdb_estimator(): 63 | try: 64 | import fvdb 65 | except ImportError: 66 | warnings.warn("fVDB is not installed. Skip the test.") 67 | return 68 | 69 | import tqdm 70 | 71 | from nerfacc.estimators.occ_grid import OccGridEstimator 72 | from nerfacc.estimators.vdb import VDBEstimator 73 | from nerfacc.grid import _query 74 | 75 | torch.manual_seed(42) 76 | 77 | profile = True 78 | n_aabbs = 1 79 | reso = 32 80 | base_aabb = torch.tensor([-1.0, -1.0, -1.0, 1.0, 1.0, 1.0], device=device) 81 | occ_thre = 0.1 82 | 83 | # Create the target occ grid 84 | occs = torch.rand((n_aabbs, reso, reso, reso), device=device) 85 | 86 | def occ_eval_fn(x): 87 | return _query(x, occs, base_aabb)[0] 88 | 89 | # Create the OccGridEstimator 90 | estimator1 = OccGridEstimator(base_aabb, reso, n_aabbs).to(device) 91 | for _ in tqdm.trange(1000) if profile else range(1): 92 | estimator1._update(step=0, occ_eval_fn=occ_eval_fn, occ_thre=occ_thre) 93 | occs1 = estimator1.occs.reshape_as(occs) 94 | err = (occs - occs1).abs().max() 95 | if not profile: 96 | assert err == 0 97 | 98 | # Create the OccGridEstimator 99 | voxel_sizes = (base_aabb[3:] - base_aabb[:3]) / reso 100 | origins = base_aabb[:3] + voxel_sizes / 2 101 | grid = fvdb.sparse_grid_from_dense( 102 | 1, (reso, reso, reso), voxel_sizes=voxel_sizes, origins=origins 103 | ) 104 | estimator2 = VDBEstimator(grid).to(device) 105 | for _ in tqdm.trange(1000) if profile else range(1): 106 | estimator2._update(step=0, occ_eval_fn=occ_eval_fn, occ_thre=occ_thre) 107 | 108 | ijks = estimator1.grid_coords 109 | index = estimator2.grid.ijk_to_index(ijks).jdata 110 | occs2 = estimator2.occs[index].reshape_as(occs) 111 | err = (occs - occs2).abs().max() 112 | if not profile: 113 | assert err == 0 114 | 115 | # ray marching in sparse grid 116 | n_rays = 100 117 | n_aabbs = 1 118 | reso = 32 119 | cone_angle = 1e-3 120 | 121 | rays_o = torch.randn((n_rays, 3), device=device) 122 | rays_d = torch.randn((n_rays, 3), device=device) 123 | rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True) 124 | 125 | for _ in tqdm.trange(1000) if profile else range(1): 126 | ray_indices1, t_starts1, t_ends1 = estimator1.sampling( 127 | rays_o, rays_d, render_step_size=0.1, cone_angle=cone_angle 128 | ) 129 | for _ in tqdm.trange(1000) if profile else range(1): 130 | ray_indices2, t_starts2, t_ends2 = estimator2.sampling( 131 | rays_o, rays_d, render_step_size=0.1, cone_angle=cone_angle 132 | ) 133 | assert torch.all(ray_indices1 == ray_indices2) 134 | assert torch.allclose(t_starts1, t_starts2) 135 | assert torch.allclose(t_ends1, t_ends2) 136 | 137 | 138 | if __name__ == "__main__": 139 | test_traverse_vdbs() 140 | test_base_vdb_estimator() 141 | --------------------------------------------------------------------------------