├── .github
    └── workflows
    │   ├── aws
    │       └── update_index.py
    │   ├── building.yml
    │   ├── code_checks.yml
    │   ├── cuda
    │       ├── Linux-env.sh
    │       ├── Linux.sh
    │       ├── Windows-env.sh
    │       └── Windows.sh
    │   ├── doc.yml
    │   └── publish.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
    ├── Makefile
    ├── requirements.txt
    └── source
    │   ├── _static
    │       ├── css
    │       │   └── readthedocs.css
    │       └── images
    │       │   ├── coding
    │       │       └── gpu_util.png
    │       │   ├── illustration_occgrid.png
    │       │   ├── illustration_propnet.png
    │       │   ├── logo.png
    │       │   ├── logo4x.png
    │       │   ├── plot_occgrid.png
    │       │   ├── plot_propnet.png
    │       │   └── teaser.jpg
    │   ├── apis
    │       ├── estimators.rst
    │       ├── generated
    │       │   ├── nerfacc.accumulate_along_rays.rst
    │       │   ├── nerfacc.exclusive_prod.rst
    │       │   ├── nerfacc.exclusive_sum.rst
    │       │   ├── nerfacc.importance_sampling.rst
    │       │   ├── nerfacc.inclusive_prod.rst
    │       │   ├── nerfacc.inclusive_sum.rst
    │       │   ├── nerfacc.pack_info.rst
    │       │   ├── nerfacc.ray_aabb_intersect.rst
    │       │   ├── nerfacc.render_transmittance_from_alpha.rst
    │       │   ├── nerfacc.render_transmittance_from_density.rst
    │       │   ├── nerfacc.render_visibility_from_alpha.rst
    │       │   ├── nerfacc.render_visibility_from_density.rst
    │       │   ├── nerfacc.render_weight_from_alpha.rst
    │       │   ├── nerfacc.render_weight_from_density.rst
    │       │   ├── nerfacc.searchsorted.rst
    │       │   └── nerfacc.traverse_grids.rst
    │       ├── rendering.rst
    │       └── utils.rst
    │   ├── conf.py
    │   ├── examples
    │       ├── camera.rst
    │       ├── camera
    │       │   └── barf.rst
    │       ├── dynamic.rst
    │       ├── dynamic
    │       │   ├── kplanes.rst
    │       │   ├── tineuvox.rst
    │       │   └── tnerf.rst
    │       ├── static.rst
    │       └── static
    │       │   ├── nerf.rst
    │       │   ├── ngp.rst
    │       │   └── tensorf.rst
    │   ├── index.rst
    │   └── methodology
    │       ├── coding.rst
    │       └── sampling.rst
├── examples
    ├── datasets
    │   ├── __init__.py
    │   ├── dnerf_synthetic.py
    │   ├── nerf_360_v2.py
    │   ├── nerf_synthetic.py
    │   └── utils.py
    ├── radiance_fields
    │   ├── __init__.py
    │   ├── mlp.py
    │   └── ngp.py
    ├── requirements.txt
    ├── train_mlp_nerf.py
    ├── train_mlp_tnerf.py
    ├── train_ngp_nerf_occ.py
    ├── train_ngp_nerf_prop.py
    └── utils.py
├── nerfacc
    ├── __init__.py
    ├── cameras.py
    ├── cuda
    │   ├── __init__.py
    │   ├── _backend.py
    │   └── csrc
    │   │   ├── camera.cu
    │   │   ├── grid.cu
    │   │   ├── include
    │   │       ├── data_spec.hpp
    │   │       ├── data_spec_packed.cuh
    │   │       ├── utils.cub.cuh
    │   │       ├── utils_camera.cuh
    │   │       ├── utils_contraction.cuh
    │   │       ├── utils_cuda.cuh
    │   │       ├── utils_grid.cuh
    │   │       ├── utils_math.cuh
    │   │       └── utils_scan.cuh
    │   │   ├── nerfacc.cpp
    │   │   ├── pdf.cu
    │   │   ├── scan.cu
    │   │   └── scan_cub.cu
    ├── data_specs.py
    ├── estimators
    │   ├── __init__.py
    │   ├── base.py
    │   ├── n3tree.py
    │   ├── occ_grid.py
    │   ├── prop_net.py
    │   └── vdb.py
    ├── grid.py
    ├── losses.py
    ├── pack.py
    ├── pdf.py
    ├── scan.py
    ├── version.py
    └── volrend.py
├── scripts
    ├── run_aws_listing.py
    ├── run_dev_checks.py
    └── run_profiler.py
├── setup.cfg
├── setup.py
└── tests
    ├── test_camera.py
    ├── test_grid.py
    ├── test_pack.py
    ├── test_pdf.py
    ├── test_rendering.py
    ├── test_scan.py
    └── test_vdb.py


/.github/workflows/aws/update_index.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | 
 3 | import boto3
 4 | 
 5 | ROOT_URL = 'https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl'
 6 | html = '<!DOCTYPE html>\n<html>\n<body>\n{}\n</body>\n</html>'
 7 | href = '  <a href="{}">{}</a><br/>'
 8 | args = {
 9 |     'ContentType': 'text/html',
10 |     'CacheControl': 'max-age=300',
11 |     'ACL': 'public-read',
12 | }
13 | 
14 | bucket = boto3.resource('s3').Bucket(name='nerfacc-bucket')
15 | 
16 | wheels_dict = defaultdict(list)
17 | for obj in bucket.objects.filter(Prefix='whl'):
18 |     if obj.key[-3:] != 'whl':
19 |         continue
20 |     torch_version, wheel = obj.key.split('/')[-2:]
21 |     wheel = f'{torch_version}/{wheel}'
22 |     wheels_dict[torch_version].append(wheel)
23 | 
24 | index_html = html.format('\n'.join([
25 |     href.format(f'{torch_version}.html'.replace('+', '%2B'), version)
26 |     for version in wheels_dict
27 | ]))
28 | 
29 | with open('index.html', 'w') as f:
30 |     f.write(index_html)
31 | bucket.Object('whl/index.html').upload_file('index.html', args)
32 | 
33 | for torch_version, wheel_names in wheels_dict.items():
34 |     torch_version_html = html.format('\n'.join([
35 |         href.format(f'{ROOT_URL}/{wheel_name}'.replace('+', '%2B'), wheel_name)
36 |         for wheel_name in wheel_names
37 |     ]))
38 | 
39 |     with open(f'{torch_version}.html', 'w') as f:
40 |         f.write(torch_version_html)
41 |     bucket.Object(f'whl/{torch_version}.html').upload_file(
42 |         f'{torch_version}.html', args)


--------------------------------------------------------------------------------
/.github/workflows/building.yml:
--------------------------------------------------------------------------------
  1 | name: Building Wheels
  2 | 
  3 | on: [workflow_dispatch]
  4 | 
  5 | jobs:
  6 | 
  7 |   wheel:
  8 |     runs-on: ${{ matrix.os }}
  9 |     environment: production
 10 | 
 11 |     strategy:
 12 |       fail-fast: false
 13 |       matrix:
 14 |         os: [ubuntu-20.04, windows-2019]
 15 |         # support version based on: https://download.pytorch.org/whl/torch/
 16 |         python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
 17 |         torch-version: [1.11.0, 1.12.0, 1.13.0, 2.0.0]
 18 |         cuda-version: ['cu113', 'cu115', 'cu116', 'cu117', 'cu118']
 19 |         exclude:
 20 |           - torch-version: 1.11.0
 21 |             python-version: '3.11'
 22 |           - torch-version: 1.11.0
 23 |             cuda-version: 'cu116'
 24 |           - torch-version: 1.11.0
 25 |             cuda-version: 'cu117'
 26 |           - torch-version: 1.11.0
 27 |             cuda-version: 'cu118'
 28 |           
 29 |           - torch-version: 1.12.0
 30 |             python-version: '3.11'
 31 |           - torch-version: 1.12.0
 32 |             cuda-version: 'cu115'
 33 |           - torch-version: 1.12.0
 34 |             cuda-version: 'cu117'
 35 |           - torch-version: 1.12.0
 36 |             cuda-version: 'cu118'
 37 |           
 38 |           - torch-version: 1.13.0
 39 |             cuda-version: 'cu102'
 40 |           - torch-version: 1.13.0
 41 |             cuda-version: 'cu113'
 42 |           - torch-version: 1.13.0
 43 |             cuda-version: 'cu115'
 44 |           - torch-version: 1.13.0
 45 |             cuda-version: 'cu118'
 46 |             
 47 |           - torch-version: 2.0.0
 48 |             python-version: '3.7'
 49 |           - torch-version: 2.0.0
 50 |             cuda-version: 'cu102'
 51 |           - torch-version: 2.0.0
 52 |             cuda-version: 'cu113'
 53 |           - torch-version: 2.0.0
 54 |             cuda-version: 'cu115'
 55 |           - torch-version: 2.0.0
 56 |             cuda-version: 'cu116'
 57 |           
 58 |           - os: windows-2019
 59 |             cuda-version: 'cu102'
 60 |           - os: windows-2019
 61 |             torch-version: 1.13.0
 62 |             python-version: '3.11'
 63 |           
 64 |           # - os: windows-2019
 65 |           #   torch-version: 1.13.0
 66 |           #   cuda-version: 'cu117'
 67 |           #   python-version: '3.9'
 68 | 
 69 | 
 70 | 
 71 |     steps:
 72 |       - uses: actions/checkout@v3
 73 | 
 74 |       - name: Set up Python ${{ matrix.python-version }}
 75 |         uses: actions/setup-python@v4
 76 |         with:
 77 |           python-version: ${{ matrix.python-version }}
 78 | 
 79 |       - name: Upgrade pip
 80 |         run: |
 81 |           pip install --upgrade setuptools
 82 |           pip install ninja
 83 | 
 84 |       - name: Free up disk space
 85 |         if: ${{ runner.os == 'Linux' }}
 86 |         run: |
 87 |           sudo rm -rf /usr/share/dotnet
 88 | 
 89 |       - name: Install CUDA ${{ matrix.cuda-version }}
 90 |         if: ${{ matrix.cuda-version != 'cpu' }}
 91 |         run: |
 92 |           bash .github/workflows/cuda/${{ runner.os }}.sh ${{ matrix.cuda-version }}
 93 | 
 94 |       - name: Install PyTorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }}
 95 |         run: |
 96 |           pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/${{ matrix.cuda-version }}
 97 |           python -c "import torch; print('PyTorch:', torch.__version__)"
 98 |           python -c "import torch; print('CUDA:', torch.version.cuda)"
 99 |           python -c "import torch; print('CUDA Available:', torch.cuda.is_available())"
100 | 
101 |       - name: Patch PyTorch static constexpr on Windows
102 |         if: ${{ runner.os == 'Windows' }}
103 |         run: |
104 |           Torch_DIR=`python -c 'import os; import torch; print(os.path.dirname(torch.__file__))'`
105 |           sed -i '31,38c\
106 |           TORCH_API void lazy_init_num_threads();' ${Torch_DIR}/include/ATen/Parallel.h
107 |         shell: bash
108 | 
109 |       - name: Set version
110 |         if: ${{ runner.os != 'macOS' }}
111 |         run: |
112 |           VERSION=`sed -n 's/^__version__ = "\(.*\)"/\1/p' nerfacc/version.py`
113 |           TORCH_VERSION=`echo "pt${{ matrix.torch-version }}" | sed "s/..$//" | sed "s/\.//g"`
114 |           CUDA_VERSION=`echo ${{ matrix.cuda-version }}`
115 |           echo "New version name: $VERSION+$TORCH_VERSION$CUDA_VERSION"
116 |           sed -i "s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/" nerfacc/version.py
117 |         shell:
118 |           bash
119 | 
120 |       - name: Install main package for CPU
121 |         if: ${{ matrix.cuda-version == 'cpu' }}
122 |         run: |
123 |           BUILD_NO_CUDA=1 pip install .
124 | 
125 |       - name: Build wheel
126 |         run: |
127 |           pip install wheel
128 |           source .github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }}
129 |           python setup.py bdist_wheel --dist-dir=dist
130 |         shell: bash  # `source` does not exist in windows powershell
131 | 
132 |       - name: Test wheel
133 |         run: |
134 |           cd dist
135 |           ls -lah
136 |           pip install *.whl
137 |           python -c "import nerfacc; print('nerfacc:', nerfacc.__version__)"
138 |           cd ..
139 |         shell: bash  # `ls -lah` does not exist in windows powershell
140 | 
141 |       - name: Configure AWS
142 |         uses: aws-actions/configure-aws-credentials@v2
143 |         with:
144 |           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
145 |           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
146 |           aws-region: us-west-2
147 | 
148 |       - name: Upload wheel
149 |         run: |
150 |           aws s3 sync dist s3://nerfacc-bucket/whl/torch-${{ matrix.torch-version }}_${{ matrix.cuda-version }} --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers
151 | 
152 |   update_aws_listing:
153 |     needs: [wheel]
154 |     runs-on: ubuntu-latest
155 |     environment: production
156 | 
157 |     steps:
158 |       - uses: actions/checkout@v3
159 |       
160 |       - name: Set up Python
161 |         uses: actions/setup-python@v4
162 |         with:
163 |           python-version: 3.9
164 | 
165 |       - name: Upgrade pip
166 |         run: |
167 |           pip install boto3
168 | 
169 |       - name: Update AWS listing
170 |         run: |
171 |           python scripts/run_aws_listing.py \
172 |             --bucket="nerfacc-bucket" \
173 |             --region="us-west-2"
174 |         env:
175 |           AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
176 |           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
177 | 


--------------------------------------------------------------------------------
/.github/workflows/code_checks.yml:
--------------------------------------------------------------------------------
 1 | name: Core Tests.
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master]
 6 |   pull_request:
 7 |     branches: [master]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ubuntu-latest
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v3
18 |       - name: Set up Python 3.9
19 |         uses: actions/setup-python@v4
20 |         with:
21 |           python-version: "3.9"
22 |       - name: Install dependencies
23 |         run: |
24 |           pip install isort==5.10.1 black[jupyter]==22.3.0
25 |       - name: Run isort
26 |         run: isort docs/ nerfacc/ scripts/ examples/ tests/ --profile black --skip examples/pycolmap --line-length 80 --check
27 |       - name: Run Black
28 |         run: black docs/ nerfacc/ scripts/ examples/ tests/ --exclude examples/pycolmap --line-length 80 --check
29 |       # - name: Python Pylint
30 |       #   run: |
31 |       #     pylint nerfacc/ tests/ scripts/ examples/
32 | 


--------------------------------------------------------------------------------
/.github/workflows/cuda/Linux-env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Took from https://github.com/pyg-team/pyg-lib/
 4 | 
 5 | case ${1} in
 6 |   cu118)
 7 |     export CUDA_HOME=/usr/local/cuda-11.8
 8 |     export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
 9 |     export PATH=${CUDA_HOME}/bin:${PATH}
10 |     export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
11 |     ;;
12 |   cu117)
13 |     export CUDA_HOME=/usr/local/cuda-11.7
14 |     export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
15 |     export PATH=${CUDA_HOME}/bin:${PATH}
16 |     export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
17 |     ;;
18 |   cu116)
19 |     export CUDA_HOME=/usr/local/cuda-11.6
20 |     export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
21 |     export PATH=${CUDA_HOME}/bin:${PATH}
22 |     export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
23 |     ;;
24 |   cu115)
25 |     export CUDA_HOME=/usr/local/cuda-11.5
26 |     export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
27 |     export PATH=${CUDA_HOME}/bin:${PATH}
28 |     export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
29 |     ;;
30 |   cu113)
31 |     export CUDA_HOME=/usr/local/cuda-11.3
32 |     export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
33 |     export PATH=${CUDA_HOME}/bin:${PATH}
34 |     export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
35 |     ;;
36 |   cu102)
37 |     export CUDA_HOME=/usr/local/cuda-10.2
38 |     export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
39 |     export PATH=${CUDA_HOME}/bin:${PATH}
40 |     export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5"
41 |     ;;
42 |   *)
43 |     ;;
44 | esac


--------------------------------------------------------------------------------
/.github/workflows/cuda/Linux.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Took from https://github.com/pyg-team/pyg-lib/
 4 | 
 5 | OS=ubuntu2004
 6 | 
 7 | case ${1} in
 8 |   cu118)
 9 |     CUDA=11.8
10 |     APT_KEY=${OS}-${CUDA/./-}-local
11 |     FILENAME=cuda-repo-${APT_KEY}_${CUDA}.0-520.61.05-1_amd64.deb
12 |     URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.0/local_installers
13 |     ;;
14 |   cu117)
15 |     CUDA=11.7
16 |     APT_KEY=${OS}-${CUDA/./-}-local
17 |     FILENAME=cuda-repo-${APT_KEY}_${CUDA}.1-515.65.01-1_amd64.deb
18 |     URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.1/local_installers
19 |     ;;
20 |   cu116)
21 |     CUDA=11.6
22 |     APT_KEY=${OS}-${CUDA/./-}-local
23 |     FILENAME=cuda-repo-${APT_KEY}_${CUDA}.2-510.47.03-1_amd64.deb
24 |     URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.2/local_installers
25 |     ;;
26 |   cu115)
27 |     CUDA=11.5
28 |     APT_KEY=${OS}-${CUDA/./-}-local
29 |     FILENAME=cuda-repo-${APT_KEY}_${CUDA}.2-495.29.05-1_amd64.deb
30 |     URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.2/local_installers
31 |     ;;
32 |   cu113)
33 |     CUDA=11.3
34 |     APT_KEY=${OS}-${CUDA/./-}-local
35 |     FILENAME=cuda-repo-${APT_KEY}_${CUDA}.0-465.19.01-1_amd64.deb
36 |     URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.0/local_installers
37 |     ;;
38 |   cu102)
39 |     CUDA=10.2
40 |     APT_KEY=${CUDA/./-}-local-${CUDA}.89-440.33.01
41 |     FILENAME=cuda-repo-${OS}-${APT_KEY}_1.0-1_amd64.deb
42 |     URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}/Prod/local_installers
43 |     ;;
44 |   *)
45 |     echo "Unrecognized CUDA_VERSION=${1}"
46 |     exit 1
47 |     ;;
48 | esac
49 | 
50 | wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
51 | sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
52 | wget -nv ${URL}/${FILENAME}
53 | sudo dpkg -i ${FILENAME}
54 | 
55 | if [ "${1}" = "cu117" ] || [ "${1}" = "cu118" ]; then
56 |   sudo cp /var/cuda-repo-${APT_KEY}/cuda-*-keyring.gpg /usr/share/keyrings/
57 | else
58 |   sudo apt-key add /var/cuda-repo-${APT_KEY}/7fa2af80.pub
59 | fi
60 | 
61 | sudo apt-get update
62 | sudo apt-get -y install cuda
63 | 
64 | rm -f ${FILENAME}


--------------------------------------------------------------------------------
/.github/workflows/cuda/Windows-env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Took from https://github.com/pyg-team/pyg-lib/
 4 | 
 5 | case ${1} in
 6 |   cu118)
 7 |     CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.8
 8 |     PATH=${CUDA_HOME}/bin:$PATH
 9 |     PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
10 |     export TORCH_CUDA_ARCH_LIST="6.0+PTX"
11 |     ;;
12 |   cu117)
13 |     CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.7
14 |     PATH=${CUDA_HOME}/bin:$PATH
15 |     PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
16 |     export TORCH_CUDA_ARCH_LIST="6.0+PTX"
17 |     ;;
18 |   cu116)
19 |     CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.6
20 |     PATH=${CUDA_HOME}/bin:$PATH
21 |     PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
22 |     export TORCH_CUDA_ARCH_LIST="6.0+PTX"
23 |     ;;
24 |   cu115)
25 |     CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.5
26 |     PATH=${CUDA_HOME}/bin:$PATH
27 |     PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
28 |     export TORCH_CUDA_ARCH_LIST="6.0+PTX"
29 |     ;;
30 |   cu113)
31 |     CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3
32 |     PATH=${CUDA_HOME}/bin:$PATH
33 |     PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
34 |     export TORCH_CUDA_ARCH_LIST="6.0+PTX"
35 |     ;;
36 |   *)
37 |     ;;
38 | esac


--------------------------------------------------------------------------------
/.github/workflows/cuda/Windows.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Took from https://github.com/pyg-team/pyg-lib/
 4 | 
 5 | # Install NVIDIA drivers, see:
 6 | # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
 7 | curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
 8 | 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
 9 | 
10 | case ${1} in
11 |   cu118)
12 |     CUDA_SHORT=11.8
13 |     CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
14 |     CUDA_FILE=cuda_${CUDA_SHORT}.0_522.06_windows.exe
15 |     ;;
16 |   cu117)
17 |     CUDA_SHORT=11.7
18 |     CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers
19 |     CUDA_FILE=cuda_${CUDA_SHORT}.1_516.94_windows.exe
20 |     ;;
21 |   cu116)
22 |     CUDA_SHORT=11.3
23 |     CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
24 |     CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
25 |     ;;
26 |   cu115)
27 |     CUDA_SHORT=11.3
28 |     CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
29 |     CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
30 |     ;;
31 |   cu113)
32 |     CUDA_SHORT=11.3
33 |     CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
34 |     CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
35 |     ;;
36 |   *)
37 |     echo "Unrecognized CUDA_VERSION=${1}"
38 |     exit 1
39 |     ;;
40 | esac
41 | 
42 | curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}"
43 | echo ""
44 | echo "Installing from ${CUDA_FILE}..."
45 | PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
46 | echo "Done!"
47 | rm -f "${CUDA_FILE}"


--------------------------------------------------------------------------------
/.github/workflows/doc.yml:
--------------------------------------------------------------------------------
 1 | name: Docs
 2 | on:
 3 |   push:
 4 |     branches: [main]
 5 |   pull_request:
 6 |     branches: [main]
 7 |   workflow_dispatch:
 8 | 
 9 | permissions:
10 |     contents: write
11 | jobs:
12 |   docs:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |         with:
17 |           submodules: 'recursive'
18 |       - uses: actions/setup-python@v3
19 |         with:
20 |           python-version: '3.9'
21 |       - name: Install dependencies
22 |         run: |
23 |           pip install -r docs/requirements.txt
24 |           pip install torch --index-url https://download.pytorch.org/whl/cpu
25 |           BUILD_NO_CUDA=1 pip install .
26 |       - name: Sphinx build
27 |         run: |
28 |           sphinx-build docs/source _build
29 |       - name: Deploy
30 |         uses: peaceiris/actions-gh-pages@v3
31 |         with:
32 |           publish_branch: gh-pages
33 |           github_token: ${{ secrets.GITHUB_TOKEN }}
34 |           publish_dir: _build/
35 |           force_orphan: true
36 |           cname: www.nerfacc.com
37 |         if: github.event_name != 'pull_request'


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 |     branches: [master]
10 | 
11 | jobs:
12 |   deploy:
13 |     runs-on: ubuntu-latest
14 |     environment: production
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v3
18 |       - name: Set up Python
19 |         uses: actions/setup-python@v4
20 |         with:
21 |           python-version: '3.7'
22 |       - name: Install dependencies
23 |         run: |
24 |           python -m pip install build twine
25 |       - name: Strip unsupported tags in README
26 |         run: |
27 |           sed -i '/<!-- pypi-strip -->/,/<!-- \/pypi-strip -->/d' README.md
28 |       - name: Build and publish
29 |         env:
30 |           PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
31 |         run: |
32 |           BUILD_NO_CUDA=1 python -m build
33 |           twine upload --username __token__ --password $PYPI_TOKEN dist/*


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Visual Studio Code configs.
  2 | .vscode/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | # lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 
109 | .DS_Store
110 | 
111 | # Direnv config.
112 | .envrc
113 | 
114 | # line_profiler
115 | *.lprof
116 | 
117 | # vscode
118 | .vsocde
119 | 
120 | outputs/
121 | data
122 | 
123 | benchmarks/*
124 | !benchmarks/barf
125 | !benchmarks/kplanes
126 | !benchmarks/tensorf
127 | !benchmarks/tineuvox


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "examples/pycolmap"]
 2 | 	path = examples/pycolmap
 3 | 	url = https://github.com/rmbrualla/pycolmap.git
 4 | 
 5 | [submodule "benchmarks/tensorf"]
 6 | 	path = benchmarks/tensorf
 7 | 	url = https://github.com/liruilong940607/tensorf.git
 8 | 	branch = nerfacc
 9 | 
10 | [submodule "benchmarks/barf"]
11 | 	path = benchmarks/barf
12 | 	url = https://github.com/liruilong940607/barf.git
13 | 	branch = nerfacc
14 | 
15 | [submodule "benchmarks/tineuvox"]
16 | 	path = benchmarks/tineuvox
17 | 	url = https://github.com/liruilong940607/tineuvox.git
18 | 	branch = nerfacc
19 | 
20 | [submodule "benchmarks/kplanes"]
21 | 	path = benchmarks/kplanes
22 | 	url = https://github.com/liruilong940607/kplanes.git
23 | 	branch = nerfacc
24 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v2.3.0
 4 |     hooks:
 5 |     -   id: end-of-file-fixer
 6 |     -   id: trailing-whitespace
 7 |     -   id: check-yaml
 8 |     -   id: check-merge-conflict
 9 |     -   id: requirements-txt-fixer
10 | -   repo: https://github.com/psf/black
11 |     rev: 22.10.0
12 |     hooks:
13 |       - id: black
14 |         language_version: python3.8.12
15 |         args: # arguments to configure black
16 |           - --line-length=80
17 | 
18 | -   repo: https://github.com/pycqa/isort
19 |     rev: 5.10.1
20 |     hooks:
21 |       - id: isort
22 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-20.04
 5 |   tools:
 6 |     python: "3.9"
 7 | 
 8 | sphinx:
 9 |   fail_on_warning: true
10 |   configuration: docs/source/conf.py
11 | 
12 | python:
13 |   install:
14 |     # Equivalent to 'pip install .'
15 |     - method: pip
16 |       path: .
17 |     # Equivalent to 'pip install -r docs/requirements.txt'
18 |     - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Change Log
 3 | All notable changes to this project will be documented in this file.
 4 |  
 5 | The format is based on [Keep a Changelog](http://keepachangelog.com/)
 6 | and this project adheres to [Semantic Versioning](http://semver.org/).
 7 |  
 8 | ## [0.5.0] - 2023-04-04
 9 |  
10 | This is a major upgrade in which 90% of the code has been rewritten. In this version
11 | we achieves:
12 | 
13 | ![Teaser](/docs/source/_static/images/teaser.jpg?raw=true)
14 |  
15 | Links:
16 | - Documentation: https://www.nerfacc.com/en/v0.5.0/
17 | - ArXiv Report: Coming Soon.
18 | 
19 | Methodologies:
20 | - Upgrade Occupancy Grid to support multiple levels.
21 | - Support Proposal Network from Mip-NeRF 360.
22 | - Update examples on unbounded scenes to use Multi-level Occupancy Grid or Proposal Network.
23 | - Contraction for Occupancy Grid is no longer supported due to it's inefficiency for ray traversal.
24 | 
25 | API Changes:
26 | - [Changed] `OccupancyGrid()` -> `OccGridEstimator()`. 
27 |     - [Added] Argument `levels=1` for multi-level support.
28 |     - [Added] Function `self.sampling()` that does basically the same thing with the old `nerfacc.ray_marching`.
29 |     - [Renamed] Function `self.every_n_step()` -> `self.update_every_n_steps()`
30 | - [Added] `PropNetEstimator()`. With functions `self.sampling()`, `self.update_every_n_steps()`
31 | and `self.compute_loss()`.
32 | - [Removed] `ray_marching()`. Ray marching is now implemented through calling `sampling()` of
33 | the `OccGridEstimator()` / `PropNetEstimator()`.
34 | - [Changed] `ray_aabb_intersect()` now supports multiple aabb, and supports new argument `near_plane`, `far_plane`, `miss_value`.
35 | - [Changed] `render_*_from_*()`. The input shape changes from `(all_samples, 1)` to `(all_samples)`. And the function will returns all intermediate results so it might be a tuple.
36 | - [Changed] `rendering()`. The input shape changes from `(all_samples, 1)` to `(all_samples)`, including the shape assumption for the `rgb_sigma_fn` and `rgb_alpha_fn`. Be aware of this shape change.
37 | - [Changed] `accumulate_along_rays()`. The shape of the `weights` in the inputs should be `(all_samples)` now.
38 | - [Removed] `unpack_info()`, `pack_data()`, `unpack_data()` are temporally removed due to in-compatibility
39 | with the new backend implementation. Will add them back later.
40 | - [Added] Some basic functions that support both batched tensor and flattened tensor: `inclusive_prod()`, `inclusive_sum()`, `exclusive_prod()`, `exclusive_sum()`, `importance_sampling()`, `searchsorted()`.
41 | 
42 | Examples & Benchmarks: 
43 | - More benchmarks and examples. See folder `examples/` and `benchmarks/`.
44 |  
45 | ## [0.3.5] - 2023-02-23
46 | 
47 | A stable version that achieves:
48 | - The vanilla Nerf model with 8-layer MLPs can be trained to better quality (+0.5 PNSR) in 1 hour rather than days as in the paper.
49 | - The Instant-NGP Nerf model can be trained to equal quality in 4.5 minutes, comparing to the official pure-CUDA implementation.
50 | - The D-Nerf model for dynamic objects can also be trained in 1 hour rather than 2 days as in the paper, and with better quality (+~2.5 PSNR).
51 | - Both bounded and unbounded scenes are supported.
52 | 
53 | Links:
54 | - Documentation: https://www.nerfacc.com/en/v0.3.5/
55 | - ArXiv Report: https://arxiv.org/abs/2210.04847v2/
56 | 
57 | Methodologies:
58 | - Single resolution `nerfacc.OccupancyGrid` for synthetic scenes.
59 | - Contraction methods `nerfacc.ContractionType` for unbounded scenes.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Ruilong Li
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | THIS SOFTWARE AND/OR DATA WAS DEPOSITED IN THE BAIR OPEN RESEARCH COMMONS 
24 | REPOSITORY ON 05/08/2023.
25 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include nerfacc/cuda/csrc/include/*
2 | include nerfacc/cuda/csrc/*
3 | 
4 | include nerfacc/_cuda/csrc/include/*
5 | include nerfacc/_cuda/csrc/*
6 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | pytorch_sphinx_theme @ git+https://github.com/liruilong940607/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
2 | sphinx==5.2.1
3 | sphinx-copybutton==0.5.0
4 | sphinx-design==0.2.0


--------------------------------------------------------------------------------
/docs/source/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 |     background-image: url("../images/logo4x.png");
3 |     background-size: 156px 35px;
4 |     height: 35px;
5 |     width: 156px;
6 | }
7 | code {
8 |     word-break: normal;
9 | }


--------------------------------------------------------------------------------
/docs/source/_static/images/coding/gpu_util.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/coding/gpu_util.png


--------------------------------------------------------------------------------
/docs/source/_static/images/illustration_occgrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/illustration_occgrid.png


--------------------------------------------------------------------------------
/docs/source/_static/images/illustration_propnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/illustration_propnet.png


--------------------------------------------------------------------------------
/docs/source/_static/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/logo.png


--------------------------------------------------------------------------------
/docs/source/_static/images/logo4x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/logo4x.png


--------------------------------------------------------------------------------
/docs/source/_static/images/plot_occgrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/plot_occgrid.png


--------------------------------------------------------------------------------
/docs/source/_static/images/plot_propnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/plot_propnet.png


--------------------------------------------------------------------------------
/docs/source/_static/images/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/teaser.jpg


--------------------------------------------------------------------------------
/docs/source/apis/estimators.rst:
--------------------------------------------------------------------------------
 1 | .. _`Estimators`:
 2 |  
 3 | Estimators
 4 | ===================================
 5 | 
 6 | .. currentmodule:: nerfacc
 7 | 
 8 | .. autoclass:: OccGridEstimator
 9 |     :members:
10 | 
11 | .. autoclass:: PropNetEstimator
12 |     :members:
13 | 


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.accumulate_along_rays.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.accumulate\_along\_rays
2 | ===============================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: accumulate_along_rays


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.exclusive_prod.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.exclusive\_prod
2 | =======================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: exclusive_prod


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.exclusive_sum.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.exclusive\_sum
2 | ======================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: exclusive_sum


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.importance_sampling.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.importance\_sampling
2 | ============================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: importance_sampling


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.inclusive_prod.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.inclusive\_prod
2 | =======================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: inclusive_prod


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.inclusive_sum.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.inclusive\_sum
2 | ======================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: inclusive_sum


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.pack_info.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.pack\_info
2 | ==================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: pack_info


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.ray_aabb_intersect.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.ray\_aabb\_intersect
2 | ============================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: ray_aabb_intersect


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_transmittance_from_alpha.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.render\_transmittance\_from\_alpha
2 | ==========================================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: render_transmittance_from_alpha


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_transmittance_from_density.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.render\_transmittance\_from\_density
2 | ============================================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: render_transmittance_from_density


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_visibility_from_alpha.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.render\_visibility\_from\_alpha
2 | =======================================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: render_visibility_from_alpha


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_visibility_from_density.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.render\_visibility\_from\_density
2 | =========================================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: render_visibility_from_density


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_weight_from_alpha.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.render\_weight\_from\_alpha
2 | ===================================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: render_weight_from_alpha


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_weight_from_density.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.render\_weight\_from\_density
2 | =====================================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: render_weight_from_density


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.searchsorted.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.searchsorted
2 | ====================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: searchsorted


--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.traverse_grids.rst:
--------------------------------------------------------------------------------
1 | ﻿nerfacc.traverse\_grids
2 | =======================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: traverse_grids


--------------------------------------------------------------------------------
/docs/source/apis/rendering.rst:
--------------------------------------------------------------------------------
1 | Rendering
2 | ===================================
3 | 
4 | .. currentmodule:: nerfacc
5 | 
6 | .. autofunction:: rendering
7 | 


--------------------------------------------------------------------------------
/docs/source/apis/utils.rst:
--------------------------------------------------------------------------------
 1 | Utils
 2 | ===================================
 3 | 
 4 | Below are the basic functions that supports sampling and rendering. 
 5 | 
 6 | .. currentmodule:: nerfacc
 7 | 
 8 | .. autosummary::
 9 |    :nosignatures:
10 |    :toctree: generated/
11 | 
12 |    inclusive_prod
13 |    exclusive_prod
14 |    inclusive_sum
15 |    exclusive_sum
16 | 
17 |    pack_info
18 | 
19 |    render_visibility_from_alpha
20 |    render_visibility_from_density
21 |    render_weight_from_alpha
22 |    render_weight_from_density
23 |    render_transmittance_from_alpha
24 |    render_transmittance_from_density
25 |    accumulate_along_rays
26 | 
27 |    importance_sampling
28 |    searchsorted
29 | 
30 |    ray_aabb_intersect
31 |    traverse_grids
32 |    


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | import pytorch_sphinx_theme
 2 | 
 3 | __version__ = None
 4 | exec(open("../../nerfacc/version.py", "r").read())
 5 | 
 6 | # -- Project information
 7 | 
 8 | project = "nerfacc"
 9 | copyright = "2022, Ruilong"
10 | author = "Ruilong"
11 | 
12 | release = __version__
13 | 
14 | # -- General configuration
15 | 
16 | extensions = [
17 |     "sphinx.ext.napoleon",
18 |     "sphinx.ext.duration",
19 |     "sphinx.ext.doctest",
20 |     "sphinx.ext.autodoc",
21 |     "sphinx.ext.autosummary",
22 |     "sphinx.ext.intersphinx",
23 | ]
24 | 
25 | intersphinx_mapping = {
26 |     "python": ("https://docs.python.org/3/", None),
27 |     "sphinx": ("https://www.sphinx-doc.org/en/master/", None),
28 | }
29 | intersphinx_disabled_domains = ["std"]
30 | 
31 | templates_path = ["_templates"]
32 | 
33 | # -- Options for HTML output
34 | 
35 | # html_theme = "furo"
36 | 
37 | html_theme = "pytorch_sphinx_theme"
38 | html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
39 | html_static_path = ["_static"]
40 | html_css_files = ["css/readthedocs.css"]
41 | 
42 | # Ignore >>> when copying code
43 | copybutton_prompt_text = r">>> |\.\.\. "
44 | copybutton_prompt_is_regexp = True
45 | 
46 | # Theme options are theme-specific and customize the look and feel of a theme
47 | # further.  For a list of options available for each theme, see the
48 | # documentation.
49 | html_theme_options = {
50 |     # The target url that the logo directs to. Unset to do nothing
51 |     "logo_url": "https://www.nerfacc.com/",
52 |     # "menu" is a list of dictionaries where you can specify the content and the
53 |     # behavior of each item in the menu. Each item can either be a link or a
54 |     # dropdown menu containing a list of links.
55 |     "menu": [
56 |         # A link
57 |         {
58 |             "name": "GitHub",
59 |             "url": "https://github.com/nerfstudio-project/nerfacc",
60 |         },
61 |         # A dropdown menu
62 |         # {
63 |         #     "name": "Projects",
64 |         #     "children": [
65 |         #         # A vanilla dropdown item
66 |         #         {
67 |         #             "name": "nerfstudio",
68 |         #             "url": "https://docs.nerf.studio/",
69 |         #             "description": "The all-in-one repo for NeRFs",
70 |         #         },
71 |         #     ],
72 |         #     # Optional, determining whether this dropdown menu will always be
73 |         #     # highlighted.
74 |         #     # "active": True,
75 |         # },
76 |     ],
77 | }
78 | # html_theme_options = {
79 | #     "canonical_url": "",
80 | #     "analytics_id": "",
81 | #     "logo_only": False,
82 | #     "display_version": True,
83 | #     "prev_next_buttons_location": "bottom",
84 | #     "style_external_links": False,
85 | #     # Toc options
86 | #     "collapse_navigation": True,
87 | #     "sticky_navigation": True,
88 | #     "navigation_depth": 4,
89 | #     "includehidden": True,
90 | #     "titles_only": False
91 | # }
92 | 
93 | # -- Options for EPUB output
94 | epub_show_urls = "footnote"
95 | 
96 | # typehints
97 | autodoc_typehints = "description"
98 | 


--------------------------------------------------------------------------------
/docs/source/examples/camera.rst:
--------------------------------------------------------------------------------
 1 | Camera Optimization NeRFs
 2 | ===================================
 3 | 
 4 | Performance Overview
 5 | --------------------
 6 | 
 7 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+------------------------+------------------------+
 8 | | Methods              | Dataset        | Training Time :math:`\downarrow` | PSNR :math:`\uparrow` | LPIPS :math:`\downarrow` | E_R :math:`\downarrow` | E_T :math:`\downarrow` |
 9 | +======================+================+==================================+=======================+==========================+========================+========================+
10 | | BARF `[1]`_          | NeRF-Synthetic | 586min                           | 28.83                 | 0.054                    | 0.19                   | 0.74                   |
11 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+------------------------+------------------------+
12 | | *+nerfacc (occgrid)* |                | 130min                           | 30.11                 | 0.044                    | 0.07                   | 0.35                   |
13 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+------------------------+------------------------+
14 | 
15 | Implementation Details
16 | ----------------------
17 | 
18 | .. toctree::
19 |    :glob:
20 |    :maxdepth: 1
21 | 
22 |    camera/*
23 | 
24 | .. _`[1]`: https://arxiv.org/abs/2104.06405
25 | 


--------------------------------------------------------------------------------
/docs/source/examples/camera/barf.rst:
--------------------------------------------------------------------------------
 1 | BARF
 2 | ====================
 3 | 
 4 | In this example we showcase how to plug the nerfacc library into the *official* codebase 
 5 | of `BARF <https://chenhsuanlin.bitbucket.io/bundle-adjusting-NeRF/>`_. See 
 6 | `our forked repo <https://github.com/liruilong940607/barf/tree/90440d975fc76b3559126992b2fbce27dd02456f>`_
 7 | for details.
 8 | 
 9 | 
10 | Benchmark: NeRF-Synthetic Dataset
11 | ---------------------------------
12 | *updated on 2023-04-04 with nerfacc==0.5.0*
13 | 
14 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 
15 | 
16 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
17 | | PSNR                  | Lego  | Mic   |Materials| Chair |Hotdog | Ficus | Drums | Ship  | MEAN  |
18 | |                       |       |       |         |       |       |       |       |       |       |
19 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
20 | | BARF                  | 31.16 | 23.87 | 26.28   | 34.48 | 28.4  | 27.86 | 31.07 | 27.55 | 28.83 |
21 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
22 | | training time         | 9.5hrs| 9.5hrs| 9.2hrs  | 9.3hrs|12.3hrs| 9.3hrs| 9.3hrs| 9.5hrs| 9.8hrs|
23 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
24 | | camera errors (R)     | 0.105 | 0.047 | 0.085   | 0.226 | 0.071 | 0.846 | 0.068 | 0.089 | 0.192 |
25 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
26 | | camera errors (T)     | 0.0043| 0.0021| 0.0040  | 0.0120| 0.0026| 0.0272| 0.0025| 0.0044| 0.0074|
27 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
28 | | Ours (occ)            | 32.25 | 24.77 | 27.73   | 35.84 | 29.98 | 28.83 | 32.84 | 28.62 | 30.11 |
29 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
30 | | training time         | 1.5hrs| 2.0hrs| 2.0hrs  | 2.3hrs| 2.2hrs| 1.9hrs| 2.2hrs| 2.3hrs| 2.0hrs|
31 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
32 | | camera errors (R)     | 0.081 | 0.036 | 0.056   | 0.171 | 0.058 | 0.039 | 0.039 | 0.079 | 0.070 |
33 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
34 | | camera errors (T)     | 0.0038| 0.0019| 0.0031  | 0.0106| 0.0021| 0.0013| 0.0014| 0.0041| 0.0035|
35 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
36 | 


--------------------------------------------------------------------------------
/docs/source/examples/dynamic.rst:
--------------------------------------------------------------------------------
 1 | Dynamic NeRFs
 2 | ===================================
 3 | 
 4 | 
 5 | The :class:`nerfacc.PropNetEstimator` can natually work with dynamic NeRFs. To make the 
 6 | :class:`nerfacc.OccGridEstimator` also work with dynamic NeRFs, we need to make some compromises.
 7 | In these examples, we use the :class:`nerfacc.OccGridEstimator` to estimate the
 8 | `maximum` opacity at each area `over all the timestamps`. This allows us to share the same estimator
 9 | across all the timestamps, including those timestamps that are not in the training set. 
10 | In other words, we use it to cache the union of the occupancy at all timestamps.
11 | It is not optimal but still makes the rendering very efficient if the motion is not crazyly significant.
12 | 
13 | 
14 | Performance Overview
15 | --------------------
16 | *updated on 2023-04-04*
17 | 
18 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
19 | | Methods              | Dataset   | Training Time :math:`\downarrow` | PSNR :math:`\uparrow` | LPIPS :math:`\downarrow` |
20 | +======================+===========+==================================+=======================+==========================+
21 | | TiNeuVox `[1]`_      | HyperNeRF | 56.3min                          | 24.19                 | 0.425                    |
22 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
23 | | *+nerfacc (occgrid)* |           | 33.0min                          | 24.19                 | 0.434                    |
24 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
25 | | *+nerfacc (propnet)* |           | 34.3min                          | 24.26                 | 0.398                    |
26 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
27 | | TiNeuVox `[1]`_      | D-NeRF    | 11.8min                          | 31.14                 | 0.050                    |
28 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
29 | | *+nerfacc (occgrid)* |           | 4.2min                           | 31.75                 | 0.038                    |
30 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
31 | | K-Planes `[2]`_      | D-NeRF    | 63.9min                          | 30.28                 | 0.043                    |
32 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
33 | | *+nerfacc (occgrid)* |           | 38.8min                          | 30.35                 | 0.042                    |
34 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
35 | | T-NeRF `[3]`_        | D-NeRF    | 20hours                          | 28.78                 | 0.069                    |
36 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
37 | | *+nerfacc (occgrid)* |           | 58min                            | 32.22                 | 0.040                    |
38 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
39 | 
40 | Implementation Details
41 | ----------------------
42 | 
43 | .. toctree::
44 |    :glob:
45 |    :maxdepth: 1
46 | 
47 |    dynamic/*
48 | 
49 | |
50 | 
51 | 3rd-Party Use Cases
52 | -------------------
53 | 
54 | - `Representing Volumetric Videos as Dynamic MLP Maps, CVPR 2023 <https://github.com/zju3dv/mlp_maps>`_.
55 | 
56 | .. _`[1]`: https://arxiv.org/abs/2205.15285
57 | .. _`[2]`: https://arxiv.org/abs/2301.10241
58 | .. _`[3]`: https://arxiv.org/abs/2011.13961


--------------------------------------------------------------------------------
/docs/source/examples/dynamic/kplanes.rst:
--------------------------------------------------------------------------------
 1 | K-Planes
 2 | ====================
 3 | 
 4 | In this example we showcase how to plug the nerfacc library into the *official* codebase 
 5 | of `K-Planes <https://sarafridov.github.io/K-Planes/>`_. See 
 6 | `our forked repo <https://github.com/liruilong940607/kplanes/tree/b97bc2eefc18f00cd54833800e7fc1072e58be51>`_
 7 | for details.
 8 | 
 9 | 
10 | Benchmark: D-NeRF Dataset
11 | ---------------------------------
12 | *updated on 2023-04-04 with nerfacc==0.5.0*
13 | 
14 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 
15 | 
16 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
17 | | PSNR                 | bouncing | hell    | hook  | jumping | lego  | mutant | standup | trex  | MEAN  |
18 | |                      | balls    | warrior |       | jacks   |       |        |         |       |       |
19 | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+
20 | | K-Planes             | 39.10    | 23.95   | 27.76 | 31.11   | 25.18 | 32.44  | 32.51   | 30.25 | 30.29 |
21 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
22 | | training time        | 68min    | 70min   | 70min | 70min   | 70min | 71min  | 72min   | 71min | 70min |
23 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
24 | | Ours (occ)           | 38.95    | 24.00   | 27.74 | 30.46   | 25.25 | 32.58  | 32.84   | 30.49 | 30.29 |
25 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
26 | | training time        | 41min    | 41min   | 40min | 40min   | 39min | 39min  | 40min   | 38min | 40min |
27 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
28 | 


--------------------------------------------------------------------------------
/docs/source/examples/dynamic/tineuvox.rst:
--------------------------------------------------------------------------------
 1 | .. _`TiNeuVox Example`:
 2 | 
 3 | TiNeuVox
 4 | ====================
 5 | 
 6 | In this example we showcase how to plug the nerfacc library into the *official* codebase 
 7 | of `TiNeuVox <https://jaminfong.cn/tineuvox/>`_. See 
 8 | `our forked repo <https://github.com/liruilong940607/tineuvox/tree/0999858745577ff32e5226c51c5c78b8315546c8>`_
 9 | for details.
10 | 
11 | 
12 | Benchmark: D-NeRF Dataset
13 | ---------------------------------
14 | *updated on 2023-04-04 with nerfacc==0.5.0*
15 | 
16 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 
17 | 
18 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
19 | | PSNR                 | bouncing | hell    | hook  | jumping | lego  | mutant | standup | trex  | MEAN  |
20 | |                      | balls    | warrior |       | jacks   |       |        |         |       |       |
21 | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+
22 | | TiNeuVox             | 39.37    | 27.05   | 29.61 | 32.92   | 24.32 | 31.47  | 33.59   | 30.01 | 31.04 |
23 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
24 | | Training Time        | 832s     | 829s    | 833s  | 841s    | 824s  | 833s   | 827s    | 840s  | 833s  |
25 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
26 | | Ours (occ)           | 40.56    | 27.17   | 31.35 | 33.44   | 25.17 | 34.05  | 35.35   | 32.29 | 32.42 |
27 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
28 | | Training Time        | 378s     | 302s    | 342s  | 325s    | 355s  | 360s   | 346s    | 362s  | 346s  |
29 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
30 | 
31 | 
32 | Benchmark: HyperNeRF Dataset
33 | ---------------------------------
34 | *updated on 2023-04-04 with nerfacc==0.5.0*
35 | 
36 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 
37 | 
38 | +----------------------+----------+---------+-------+-------------+-------+
39 | | PSNR                 | 3dprinter| broom   |chicken| peel-banana | MEAN  |
40 | |                      |          |         |       |             |       |
41 | +======================+==========+=========+=======+=============+=======+
42 | | TiNeuVox             | 22.77    | 21.30   | 28.29 | 24.50       | 24.22 |
43 | +----------------------+----------+---------+-------+-------------+-------+
44 | | Training Time        | 3253s    | 2811s   | 3933s | 2705s       | 3175s |
45 | +----------------------+----------+---------+-------+-------------+-------+
46 | | Ours (occ)           | 22.72    | 21.27   | 28.27 | 24.54       | 24.20 |
47 | +----------------------+----------+---------+-------+-------------+-------+
48 | | Training Time        | 2265s    | 2221s   | 2157s | 2101s       | 2186s |
49 | +----------------------+----------+---------+-------+-------------+-------+
50 | | Ours (prop)          | 22.75    | 21.17   | 28.27 | 24.97       | 24.29 |
51 | +----------------------+----------+---------+-------+-------------+-------+
52 | | Training Time        | 2307s    | 2281s   | 2267s | 2510s       | 2341s |
53 | +----------------------+----------+---------+-------+-------------+-------+
54 | 


--------------------------------------------------------------------------------
/docs/source/examples/dynamic/tnerf.rst:
--------------------------------------------------------------------------------
 1 | .. _`T-NeRF Example`:
 2 | 
 3 | T-NeRF 
 4 | ====================
 5 | See code `examples/train_mlp_dnerf.py` at our `github repository`_ for details.
 6 | 
 7 | Radiance Field
 8 | --------------
 9 | Here we implement a very basic time-conditioned NeRF (T-NeRF) model (`examples/radiance_fields/mlp.py`)
10 | for dynamic scene reconstruction.
11 | The implementation is mostly follow the T-NeRF described in the `D-NeRF`_ paper, with a 8-layer-MLP 
12 | for the radiance field and a 4-layer-MLP for the warping field. The only major difference is that
13 | we reduce the max frequency of the positional encoding from 10 to 4, to respect the fact that the
14 | motion of the object is relatively smooth.
15 | 
16 | 
17 | Benchmarks: D-NeRF Dataset
18 | ---------------------------
19 | *updated on 2022-10-08*
20 | 
21 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU. 
22 | The training memory footprint is about 11GB.
23 | 
24 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
25 | | PSNR                 | bouncing | hell    | hook  | jumping | lego  | mutant | standup | trex  | MEAN  |
26 | |                      | balls    | warrior |       | jacks   |       |        |         |       |       |
27 | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+
28 | | D-NeRF (~ days)      | 32.80    | 25.02   | 29.25 | 32.80   | 21.64 | 31.29  | 32.79   | 31.75 | 29.67 |
29 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
30 | | Ours  (~ 1 hr)       | 39.49    | 25.58   | 31.86 | 32.73   | 24.32 | 35.55  | 35.90   | 32.33 | 32.22 |
31 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
32 | 
33 | .. _`D-NeRF`: https://arxiv.org/abs/2011.13961
34 | .. _`github repository`: https://github.com/nerfstudio-project/nerfacc/
35 | 
36 | 


--------------------------------------------------------------------------------
/docs/source/examples/static.rst:
--------------------------------------------------------------------------------
 1 | Static NeRFs
 2 | ===================================
 3 | 
 4 | Performance Overview
 5 | --------------------
 6 | 
 7 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
 8 | | Methods              | Dataset        | Training Time :math:`\downarrow` | PSNR :math:`\uparrow` | LPIPS :math:`\downarrow` |
 9 | +======================+================+==================================+=======================+==========================+
10 | | TensoRF `[1]`_       | Tanks&Temple   | 19min                            | 28.11                 | 0.167                    |
11 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
12 | | *+nerfacc (occgrid)* |                | 14min                            | 28.06                 | 0.174                    |
13 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
14 | | TensoRF `[1]`_       | NeRF-Synthetic | 10.3min                          | 32.73                 | 0.049                    |
15 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
16 | | *+nerfacc (occgrid)* |                | 7.1min                           | 32.52                 | 0.054                    |
17 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
18 | | NeRF `[2]`_          | NeRF-Synthetic | days                             | 31.00                 | 0.047                    |
19 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
20 | | *+nerfacc (occgrid)* |                | 1hr                              | 31.55                 | 0.072                    |
21 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
22 | | Instant-NGP `[3]`_   | NeRF-Synthetic | 4.4min                           | 32.35                 |                          |
23 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
24 | | *+nerfacc (occgrid)* |                | 4.5min                           | 33.11                 | 0.053                    |
25 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
26 | | *+nerfacc (propnet)* |                | 4.0min                           | 31.76                 | 0.062                    |
27 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
28 | | Instant-NGP `[3]`_   | Mip-NeRF 360   | 5.3min                           | 25.93                 |                          |
29 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
30 | | *+nerfacc (occgrid)* |                | 5.0min                           | 26.41                 | 0.353                    |
31 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
32 | | *+nerfacc (propnet)* |                | 4.9min                           | 27.58                 | 0.292                    |
33 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
34 | 
35 | Implementation Details
36 | ----------------------
37 | 
38 | .. toctree::
39 |    :glob:
40 |    :maxdepth: 1
41 | 
42 |    static/*
43 | 
44 | |
45 | 
46 | 3rd-Party Use Cases
47 | -------------------
48 | 
49 | - `nerfstudio <https://docs.nerf.studio/>`_: A collaboration friendly studio for NeRFs.
50 | - `modelscope <https://github.com/modelscope/modelscope/blob/master/modelscope/models/cv/nerf_recon_acc/network/nerf.py>`_: A collection of deep-learning algorithms.
51 | - `sdfstudio <https://autonomousvision.github.io/sdfstudio/>`_: A Unified Framework for Surface Reconstruction.
52 | - `instant-nsr-pl <https://github.com/bennyguo/instant-nsr-pl>`_: Train NeuS in 10min.
53 | 
54 | .. _`[1]`: https://arxiv.org/abs/2203.09517
55 | .. _`[2]`: https://arxiv.org/abs/2003.08934
56 | .. _`[3]`: https://arxiv.org/abs/2201.05989


--------------------------------------------------------------------------------
/docs/source/examples/static/nerf.rst:
--------------------------------------------------------------------------------
 1 | .. _`Vanilla NeRF Example`:
 2 | 
 3 | Vanilla NeRF
 4 | ====================
 5 | See code `examples/train_mlp_nerf.py` at our `github repository`_ for details.
 6 | 
 7 | 
 8 | Radiance Field
 9 | --------------
10 | We follow the original `NeRF`_ paper to implement a 8-layer-MLP radiance field (`examples/radiance_fields/mlp.py`)
11 | with positional encoding. 
12 | 
13 | .. note:: 
14 |     The vanilla Nerf paper uses two MLPs for course-to-fine sampling. Instead here we only use a 
15 |     single MLP with more samples (1024). Both ways share the same spirit to do dense sampling 
16 |     around the surface. Our fast rendering inheritly skip samples away from the surface 
17 |     so we can simplly increase the number of samples with a single MLP, to achieve the same goal 
18 |     with the coarse-to-fine sampling, without runtime or memory issue.
19 | 
20 | 
21 | Benchmark: Nerf-Synthetic Dataset
22 | ---------------------------------
23 | *updated on 2022-10-08*
24 | 
25 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU. 
26 | The training memory footprint is about 10GB.
27 | 
28 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
29 | | PSNR                 | Lego  | Mic   |Materials| Chair |Hotdog | Ficus | Drums | Ship  | MEAN  |
30 | |                      |       |       |         |       |       |       |       |       |       |
31 | +======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
32 | | NeRF  (~ days)       | 32.54 | 32.91 | 29.62   | 33.00 | 36.18 | 30.13 | 25.01 | 28.65 | 31.00 |
33 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
34 | | Ours  (~ 1 hr)       | 33.69 | 33.76 | 29.73   | 33.32 | 35.80 | 32.52 | 25.39 | 28.18 | 31.55 |
35 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
36 | 
37 | .. _`github repository`: https://github.com/nerfstudio-project/nerfacc/
38 | .. _`NeRF`: https://arxiv.org/abs/2003.08934
39 | 


--------------------------------------------------------------------------------
/docs/source/examples/static/ngp.rst:
--------------------------------------------------------------------------------
 1 | .. _`Instant-NGP Example`:
 2 | 
 3 | Instant-NGP
 4 | ====================
 5 | 
 6 | See code `examples/train_ngp_nerf_occ.py` and `examples/train_ngp_nerf_prop.py` at our 
 7 | `github repository`_ for details.
 8 | 
 9 | 
10 | Radiance Field
11 | --------------
12 | We follow the `Instant-NGP`_ paper to implement the radiance field (`examples/radiance_fields/ngp.py`),
13 | and aligns the hyperparameters (e.g., hashencoder, mlp) with the paper. It is build on top of the
14 | `tiny-cuda-nn`_ library.
15 | 
16 | 
17 | Benchmark: Nerf-Synthetic Dataset
18 | ---------------------------------
19 | *updated on 2023-04-04 with nerfacc==0.5.0*
20 | 
21 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU. 
22 | The training memory footprint is about 3GB.
23 | 
24 | .. note::
25 |     
26 |     The Instant-NGP paper makes use of the alpha channel in the images to apply random background
27 |     augmentation during training. For fair comparision, we rerun their code with a constant white
28 |     background during both training and testing. Also it is worth to mention that we didn't strictly
29 |     follow the training receipe in the Instant-NGP paper, such as the learning rate schedule etc, as
30 |     the purpose of this benchmark is to showcase instead of reproducing the paper.
31 | 
32 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
33 | | PSNR                  | Lego  | Mic   |Materials| Chair |Hotdog | Ficus | Drums | Ship  | MEAN  |
34 | |                       |       |       |         |       |       |       |       |       |       |
35 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
36 | |Instant-NGP 35k steps  | 35.87 | 36.22 | 29.08   | 35.10 | 37.48 | 30.61 | 23.85 | 30.62 | 32.35 |
37 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
38 | | training time         | 309s  | 258s  | 256s    | 316s  | 292s  | 207s  | 218s  | 250s  | 263s  |
39 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
40 | |Ours (occ) 20k steps   | 35.67 | 36.85 | 29.60   | 35.71 | 37.37 | 33.95 | 25.44 | 30.29 | 33.11 |
41 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
42 | | training time         | 288s  | 260s  | 253s    | 326s  | 272s  | 249s  | 252s  | 251s  | 269s  |
43 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
44 | |Ours (prop) 20k steps  | 34.04 | 34.56 | 28.76   | 34.21 | 36.44 | 31.41 | 24.81 | 29.85 | 31.76 |
45 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
46 | | training time         | 225s  | 235s  | 235s    | 240s  | 239s  | 242s  | 258s  | 247s  | 240s  |
47 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
48 | 
49 | 
50 | Benchmark: Mip-NeRF 360 Dataset
51 | ---------------------------------
52 | *updated on 2023-04-04 with nerfacc==0.5.0*
53 | 
54 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU.
55 | 
56 | .. note:: 
57 |     `Ours (prop)` combines the proposal network (:class:`nerfacc.PropNetEstimator`) with the 
58 |     Instant-NGP radiance field. This is exactly what the `Nerfacto`_ model is doing in the
59 |     `nerfstudio`_ project. In fact, the hyperparameters for `Ours (prop)` in this experiment
60 |     are aligned with the `Nerfacto`_ model.
61 | 
62 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
63 | | PSNR                  |Bicycle| Garden|   Stump | Bonsai|Counter|Kitchen| Room  | MEAN  |
64 | |                       |       |       |         |       |       |       |       |       |
65 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+
66 | |NeRF++ (~days)         | 22.64 | 24.32 | 23.34   | 29.15 | 26.38 | 27.80 | 28.87 | 26.21 |
67 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
68 | |Mip-NeRF 360 (~days)   | 24.37 | 26.98 | 26.40   | 33.46 | 29.55 | 32.23 | 31.63 | 29.23 |
69 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
70 | |Instant-NGP 35k steps  | 22.40 | 24.86 | 23.17   | 24.41 | 27.38 | 29.07 | 30.24 | 25.93 |
71 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
72 | | training time         | 301s  | 339s  | 295s    | 279s  | 339s  | 366s  | 317s  | 319s  |
73 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
74 | |Ours (occ) 20k steps   | 22.40 | 23.94 | 22.98   | 30.09 | 26.84 | 28.03 | 30.60 | 26.41 |
75 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
76 | | training time         | 277s  | 302s  | 299s    | 278s  | 315s  | 331s  | 301s  | 300s  |
77 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
78 | |Ours (prop) 20k steps  | 23.23 | 25.42 | 25.24   | 30.71 | 26.74 | 30.70 | 30.99 | 27.58 |
79 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
80 | | training time         | 276s  | 293s  | 291s    | 291s  | 291s  | 295s  | 287s  | 289s  |
81 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
82 | 
83 | 
84 | .. _`github repository`: https://github.com/nerfstudio-project/nerfacc/
85 | .. _`Instant-NGP`: https://arxiv.org/abs/2201.05989
86 | .. _`tiny-cuda-nn`: https://github.com/NVlabs/tiny-cuda-nn
87 | .. _`Nerfacto`: https://docs.nerf.studio/nerfology/methods/nerfacto.html
88 | .. _`nerfstudio`: https://docs.nerf.studio/


--------------------------------------------------------------------------------
/docs/source/examples/static/tensorf.rst:
--------------------------------------------------------------------------------
 1 | .. _`TensoRF Example`:
 2 | 
 3 | TensoRF
 4 | ====================
 5 | 
 6 | In this example we showcase how to plug the nerfacc library into the *official* codebase 
 7 | of `TensoRF <https://apchenstu.github.io/TensoRF/>`_. See 
 8 | `our forked repo <https://github.com/liruilong940607/tensorf/tree/f2d350873c54f249e64b6e745919b6a94bf54f1d>`_
 9 | for details.
10 | 
11 | 
12 | Benchmark: NeRF-Synthetic Dataset
13 | ---------------------------------
14 | *updated on 2023-04-04 with nerfacc==0.5.0*
15 | 
16 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 
17 | 
18 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
19 | | PSNR                  | Lego  | Mic   |Materials| Chair |Hotdog | Ficus | Drums | Ship  | MEAN  |
20 | |                       |       |       |         |       |       |       |       |       |       |
21 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
22 | | TensoRF               | 35.14 | 25.70 | 33.69   | 37.03 | 36.04 | 29.77 | 34.35 | 30.12 | 32.73 |
23 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
24 | | training time         | 504s  | 522s  | 633s    | 648s  | 584s  | 824s  | 464s  | 759s  | 617s  |
25 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
26 | | Ours (occ)            | 35.05 | 25.70 | 33.54   | 36.99 | 35.62 | 29.76 | 34.08 | 29.39 | 32.52 |
27 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
28 | | training time         | 310s  | 312s  | 463s    | 433s  | 363s  | 750s  | 303s  | 468s  | 425s  |
29 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
30 | 
31 | 
32 | Benchmark: Tanks&Temples Dataset
33 | ---------------------------------
34 | *updated on 2023-04-04 with nerfacc==0.5.0*
35 | 
36 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti. 
37 | 
38 | +-----------------------+-------+-------------+--------+-------+-------+
39 | | PSNR                  | Barn  | Caterpillar | Family | Truck | MEAN  |
40 | |                       |       |             |        |       |       |
41 | +=======================+=======+=============+========+=======+=======+
42 | | TensoRF               | 26.88 | 25.48       | 33.48  | 26.59 | 28.11 |
43 | +-----------------------+-------+-------------+--------+-------+-------+
44 | | training time         | 24min | 19min       | 15min  | 18min | 19min |
45 | +-----------------------+-------+-------------+--------+-------+-------+
46 | | Ours (occ)            | 26.74 | 25.64       | 33.16  | 26.70 | 28.06 |
47 | +-----------------------+-------+-------------+--------+-------+-------+
48 | | training time         | 19min | 15min       | 11min  | 13min | 14min |
49 | +-----------------------+-------+-------------+--------+-------+-------+
50 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | NerfAcc Documentation
  2 | ===================================
  3 | 
  4 | NerfAcc is a PyTorch Nerf acceleration toolbox for both training and inference. It focus on
  5 | efficient sampling in the volumetric rendering pipeline of radiance fields, which is 
  6 | universal and plug-and-play for most of the NeRFs.
  7 | With minimal modifications to the existing codebases, Nerfacc provides significant speedups 
  8 | in training various recent NeRF papers.
  9 | **And it is pure Python interface with flexible APIs!**
 10 | 
 11 | |
 12 | 
 13 | .. image:: _static/images/teaser.jpg
 14 |   :align: center
 15 | 
 16 | |
 17 | 
 18 | | Github: https://github.com/nerfstudio-project/nerfacc
 19 | | Paper: https://arxiv.org/pdf/2305.04966.pdf
 20 | | Authors: `Ruilong Li`_, `Hang Gao`_, `Matthew Tancik`_, `Angjoo Kanazawa`_
 21 | 
 22 | .. note::
 23 | 
 24 |    Though this repo only contains examples for single scene optimization,
 25 |    we believe generalizable NeRFs across multiple scenes can also be accelerate with our 
 26 |    :class:`nerfacc.PropNetEstimator`. Examples will be added soon.
 27 | 
 28 | 
 29 | Installation:
 30 | -------------
 31 | 
 32 | **Dependence**: Please install `Pytorch`_ first.
 33 | 
 34 | The easist way is to install from PyPI. In this way it will build the CUDA code **on the first run** (JIT).
 35 | 
 36 | .. code-block:: console
 37 |    
 38 |    $ pip install nerfacc
 39 | 
 40 | Or install from source. In this way it will build the CUDA code during installation.
 41 | 
 42 | .. code-block:: console
 43 | 
 44 |    $ pip install git+https://github.com/nerfstudio-project/nerfacc.git
 45 | 
 46 | We also provide pre-built wheels covering major combinations of Pytorch + CUDA versions. 
 47 | See our `Github README`_ for what we support.
 48 | 
 49 | .. code-block:: console
 50 |    
 51 |    // e.g. torch 1.13.0 + CUDA 11.7
 52 |    $ pip install nerfacc -f https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl/torch-1.13.0_cu117.html
 53 | 
 54 | 
 55 | Usage:
 56 | -------------
 57 | 
 58 | The idea of NerfAcc is to perform efficient volumetric sampling with a computationally cheap estimator to discover surfaces.
 59 | So NerfAcc can work with any user-defined radiance field. To plug the NerfAcc rendering pipeline into your code and enjoy 
 60 | the acceleration, you only need to define two functions with your radience field.
 61 | 
 62 | - `sigma_fn`: Compute density at each sample. It will be used by the estimator
 63 |   (e.g., :class:`nerfacc.OccGridEstimator`, :class:`nerfacc.PropNetEstimator`) to discover surfaces. 
 64 | - `rgb_sigma_fn`: Compute color and density at each sample. It will be used by 
 65 |   :func:`nerfacc.rendering` to conduct differentiable volumetric rendering. This function 
 66 |   will receive gradients to update your radiance field.
 67 | 
 68 | An simple example is like this:
 69 | 
 70 | .. code-block:: python
 71 | 
 72 |    import torch
 73 |    from torch import Tensor
 74 |    import nerfacc 
 75 | 
 76 |    radiance_field = ...  # network: a NeRF model
 77 |    rays_o: Tensor = ...  # ray origins. (n_rays, 3)
 78 |    rays_d: Tensor = ...  # ray normalized directions. (n_rays, 3)
 79 |    optimizer = ...  # optimizer
 80 | 
 81 |    estimator = nerfacc.OccGridEstimator(...)
 82 | 
 83 |    def sigma_fn(
 84 |       t_starts: Tensor, t_ends:Tensor, ray_indices: Tensor
 85 |    ) -> Tensor:
 86 |       """ Define how to query density for the estimator."""
 87 |       t_origins = rays_o[ray_indices]  # (n_samples, 3)
 88 |       t_dirs = rays_d[ray_indices]  # (n_samples, 3)
 89 |       positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0
 90 |       sigmas = radiance_field.query_density(positions) 
 91 |       return sigmas  # (n_samples,)
 92 | 
 93 |    def rgb_sigma_fn(
 94 |       t_starts: Tensor, t_ends: Tensor, ray_indices: Tensor
 95 |    ) -> Tuple[Tensor, Tensor]:
 96 |       """ Query rgb and density values from a user-defined radiance field. """
 97 |       t_origins = rays_o[ray_indices]  # (n_samples, 3)
 98 |       t_dirs = rays_d[ray_indices]  # (n_samples, 3)
 99 |       positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0
100 |       rgbs, sigmas = radiance_field(positions, condition=t_dirs)  
101 |       return rgbs, sigmas  # (n_samples, 3), (n_samples,)
102 | 
103 |    # Efficient Raymarching:
104 |    # ray_indices: (n_samples,). t_starts: (n_samples,). t_ends: (n_samples,).
105 |    ray_indices, t_starts, t_ends = estimator.sampling(
106 |       rays_o, rays_d, sigma_fn=sigma_fn, near_plane=0.2, far_plane=1.0, 
107 |       early_stop_eps=1e-4, alpha_thre=1e-2, 
108 |    )
109 | 
110 |    # Differentiable Volumetric Rendering.
111 |    # colors: (n_rays, 3). opaicity: (n_rays, 1). depth: (n_rays, 1).
112 |    color, opacity, depth, extras = nerfacc.rendering(
113 |       t_starts, t_ends, ray_indices, n_rays=rays_o.shape[0], rgb_sigma_fn=rgb_sigma_fn
114 |    )
115 | 
116 |    # Optimize: Both the network and rays will receive gradients
117 |    optimizer.zero_grad()
118 |    loss = F.mse_loss(color, color_gt)
119 |    loss.backward()
120 |    optimizer.step()
121 | 
122 | 
123 | Links:
124 | -------------
125 | 
126 | .. toctree::
127 |    :glob:
128 |    :maxdepth: 1
129 |    :caption: Methodology
130 | 
131 |    methodology/*
132 | 
133 | .. toctree::
134 |    :glob:
135 |    :maxdepth: 1
136 |    :caption: Python API
137 | 
138 |    apis/*
139 | 
140 | .. toctree::
141 |    :glob:
142 |    :maxdepth: 1
143 |    :caption: Example Usages and Benchmarks
144 | 
145 |    examples/*
146 | 
147 | .. toctree::
148 |    :maxdepth: 1
149 |    :caption: Projects
150 | 
151 |    nerfstudio <https://docs.nerf.studio/>
152 |    sdfstudio <https://autonomousvision.github.io/sdfstudio/>
153 |    instant-nsr-pl <https://github.com/bennyguo/instant-nsr-pl>
154 | 
155 | .. _`vanilla Nerf`: https://arxiv.org/abs/2003.08934
156 | .. _`Instant-NGP Nerf`: https://arxiv.org/abs/2201.05989
157 | .. _`D-Nerf`: https://arxiv.org/abs/2011.13961
158 | .. _`MipNerf360`: https://arxiv.org/abs/2111.12077
159 | .. _`pixel-Nerf`: https://arxiv.org/abs/2012.02190
160 | .. _`Nerf++`: https://arxiv.org/abs/2010.07492
161 | 
162 | .. _`Ruilong Li`: https://www.liruilong.cn/
163 | .. _`Hang Gao`: https://hangg7.com/
164 | .. _`Matthew Tancik`: https://www.matthewtancik.com/
165 | .. _`Angjoo Kanazawa`: https://people.eecs.berkeley.edu/~kanazawa/
166 | 
167 | .. _`Github README`: https://github.com/nerfstudio-project/nerfacc#readme
168 | 
169 | .. _`PyTorch`: https://pytorch.org/get-started/locally/
170 | 


--------------------------------------------------------------------------------
/docs/source/methodology/coding.rst:
--------------------------------------------------------------------------------
  1 | .. _`Efficient Coding`:
  2 | 
  3 | Efficient Coding
  4 | ================
  5 | 
  6 | Monitor the GPU Utilization
  7 | ----------------------------
  8 | 
  9 | The rule of thumb is to maximize the computation power you have. When working with GPU, 
 10 | that means to maximize the percentage of GPU kernels are running at the same time. This
 11 | can be monitored by the GPU utilization (last column) reported by the `nvidia-smi` command:
 12 | 
 13 | .. image:: ../_static/images/coding/gpu_util.png
 14 |   :align: center
 15 | 
 16 | |
 17 | 
 18 | If the GPU utilization is less than 100%, it means there are some GPU kernels are idling
 19 | from time to time (if not all the time). This is of course not good for the performance.
 20 | Under the hood, Pytorch will try to use all the GPU kernels to parellelize the computation,
 21 | but in most of the case you don't see 100%. Why is that?
 22 | 
 23 | 
 24 | The first reason is simplly that there might not be too much to parellelize, for which
 25 | we don't have too much to do other than increasing batch size. For example
 26 | 
 27 | .. code-block:: python
 28 | 
 29 |     # Only 1000 threads are running at the same time to create this tensor.
 30 |     # So we see 28% GPU utilization.
 31 |     while True: torch.zeros((1000), device="cuda") 
 32 |     # Now we see 100% GPU utilization.
 33 |     while True: torch.zeros((10000000), device="cuda")
 34 | 
 35 | The second reason, which is more common, is that there is *CPU-GPU synchronization*
 36 | happening in the code. For example:
 37 | 
 38 | .. code-block:: python
 39 | 
 40 |     data = torch.rand((10000000), device="cuda")
 41 |     mask = data > 0.5
 42 |     ids = torch.where(mask)[0]
 43 |     assert torch.all(data[mask] == data[ids])
 44 | 
 45 |     # 100% GPU utilization.
 46 |     while True: data[ids]
 47 |     # 95% GPU utilization.
 48 |     while True: data[mask]
 49 | 
 50 | Besides, if there are many cpu operations in the pipeline, such as data loading and 
 51 | preprocessing, it might also cause the GPU utilization to be low. In this case, you
 52 | can try to use `torch.utils.data.DataLoader` to overlap the data processing time
 53 | with the GPU computation time.
 54 | 
 55 | Avoid CPU-GPU Synchronization
 56 | -----------------------------
 57 | 
 58 | In the above example, if you time your code, you will see a significant difference:
 59 | 
 60 | .. code-block:: python
 61 | 
 62 |     # 177 µs ± 15.4 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
 63 |     %timeit data[ids]   
 64 |     # 355 µs ± 466 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
 65 |     %timeit data[mask]  
 66 | 
 67 | Explanation: In this case, the mask operation needs to decide the size of the output tensor, which 
 68 | lives on CPU, based on the number of `True` values in the mask that lives on GPU. So
 69 | a synchronization is required. The index selection operation, on the other hand, 
 70 | already knows the size of the output tensor based the size of the `ids`, so no 
 71 | synchronization is required.
 72 | 
 73 | You may argue in this case we can't really improve things because the `ids` are computed
 74 | from the `mask`, which would require the synchronization anyway (`torch.where`). However,
 75 | in many cases we can avoid the synchronization by carefully writing the code. For example:
 76 | 
 77 | .. code-block:: python
 78 | 
 79 |     # no sync. 67.3 µs ± 5.01 ns per loop
 80 |     while True: torch.zeros((10000000), device="cuda") 
 81 |     # sync. 13.7 ms ± 320 µs per loop
 82 |     while True: torch.zeros((10000000)).to("cuda") 
 83 | 
 84 | Operations that require synchronization including `torch.where`, `tensor.item()`, 
 85 | `print(tensor)`, `tensor.to(device)`, `torch.nonzero` etc. Just imagine those functions
 86 | have a inherit `torch.cuda.synchronize()` called under the hood. See the 
 87 | `official guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#avoid-unnecessary-cpu-gpu-synchronization>`_ 
 88 | if with more interests.
 89 | 
 90 | In our library, :func:`nerfacc.traverse_grids` is a function that requires synchronization,
 91 | because it needs to know the size of the output tensor when traversing the grids. As a result,
 92 | sampling with :class:`nerfacc.OccGridEstimator` also requires synchronization. But there is 
 93 | no walkaround in this case so just be aware of it.
 94 | 
 95 | 
 96 | Profiling
 97 | -----------------------------
 98 | 
 99 | There are plenty of tools for profiling. My personal favorite is 
100 | `line_profiler <https://github.com/pyutils/line_profiler>`_ which will give you *per-line* runtime
101 | of a function with a simple decorator `@profile`. It is very useful for finding where the bottleneck
102 | is in your code. It is worth to note that due to the asynchronized nature of Pytorch code, you would 
103 | need to set `CUDA_LAUNCH_BLOCKING=1` when profiling your code (no matter which profiling tool you are using).
104 | This variable will force CPU-GPU synchronization for every torch function (equavalent to add 
105 | `torch.cuda.synchronize()` everywhere), which can reveal the true runtime of each line of code.
106 | And of course, with `CUDA_LAUNCH_BLOCKING=1` you would get slower total runtime, so don't forget to
107 | remove it when you are done profiling.


--------------------------------------------------------------------------------
/docs/source/methodology/sampling.rst:
--------------------------------------------------------------------------------
  1 | .. _`Efficient Sampling`:
  2 |  
  3 | Efficient Sampling
  4 | ===================================
  5 | 
  6 | Importance Sampling via Transmittance Estimator.
  7 | -------------------------------------------------
  8 | 
  9 | Efficient sampling is a well-explored problem in Graphics, wherein the 
 10 | emphasis is on identifying regions that make the most significant 
 11 | contribution to the final rendering. This objective is generally accomplished 
 12 | through importance sampling, which aims to distribute samples based on the 
 13 | probability density function (PDF), denoted as :math:`p(t)`, between the range 
 14 | of :math:`[t_n, t_f]`. By computing the cumulative distribution function (CDF) 
 15 | through integration, *i.e.*, :math:`F(t) = \int_{t_n}^{t} p(v)\,dv`, 
 16 | samples are generated using the inverse transform sampling method:
 17 | 
 18 | .. math::
 19 | 
 20 |    t = F^{-1}(u) \quad \text{where} \quad u \sim \mathcal{U}[0,1].
 21 | 
 22 | In volumetric rendering, the contribution of each sample to the final 
 23 | rendering is expressed by the accumulation weights :math:`T(t)\sigma(t)`:
 24 | 
 25 | .. math::
 26 | 
 27 |       C(\mathbf{r}) = \int_{t_n}^{t_f} T(t)\,\sigma(t)\,c(t)\,dt
 28 | 
 29 | Hence, the PDF for volumetric rendering is :math:`p(t) = T(t)\sigma(t)` 
 30 | and the CDF is:
 31 | 
 32 | .. math::
 33 |    
 34 |    F(t)  = \int_{t_n}^{t} T(v)\sigma(v)\,dv = 1 - T(t)
 35 |    
 36 | Therefore, inverse sampling the CDF :math:`F(t)` is equivalent to inverse 
 37 | sampling the transmittance :math:`T(t)`. A transmittance estimator is sufficient 
 38 | to determine the optimal samples. We refer readers to the 
 39 | `SIGGRAPH 2017 Course: Production Volume Rendering`_ for more details about this
 40 | concept if within interests. 
 41 | 
 42 | Occupancy Grid Estimator.
 43 | ---------------------------- 
 44 | 
 45 | .. image:: ../_static/images/illustration_occgrid.png
 46 |   :class: float-right
 47 |   :width: 200px
 48 | 
 49 | The idea of Occupancy Grid is to cache the density in the scene with a binaraized voxel grid. When
 50 | sampling, the ray marches through the grid with a preset step sizes, and skip the empty regions by querying
 51 | the voxel grid. Intuitively, the binaraized voxel grid is an *estimator* of the radiance field, with much 
 52 | faster readout. This technique is proposed in `Instant-NGP`_ with highly optimized CUDA implementations. 
 53 | More formally, The estimator describes a binaraized density distribution :math:`\hat{\sigma}` along 
 54 | the ray with a conservative threshold :math:`\tau`: 
 55 | 
 56 | .. math::
 57 |    
 58 |       \hat{\sigma}(t_i) = \mathbb{1}\big[\sigma(t_i) > \tau\big]
 59 | 
 60 | Consequently, the piece-wise constant PDF can be expressed as 
 61 | 
 62 | .. math:: 
 63 |    
 64 |    p(t_i) = \hat{\sigma}(t_i) / \sum_{j=1}^{n} \hat{\sigma}(t_j) 
 65 |    
 66 | and the piece-wise linear transmittance estimator is 
 67 | 
 68 | .. math::
 69 |    
 70 |    T(t_i) = 1 - \sum_{j=1}^{i-1}\hat{\sigma}(t_j) / \sum_{j=1}^{n} \hat{\sigma}(t_j)
 71 | 
 72 | See the figure below for an illustration.
 73 | 
 74 | ..  rst-class::  clear-both
 75 | 
 76 | .. image:: ../_static/images/plot_occgrid.png
 77 |   :align: center
 78 | 
 79 | |
 80 | 
 81 | In `nerfacc`, this is implemented via the :class:`nerfacc.OccGridEstimator` class.
 82 | 
 83 | Proposal Network Estimator.
 84 | -----------------------------
 85 | 
 86 | .. image:: ../_static/images/illustration_propnet.png
 87 |   :class: float-right
 88 |   :width: 200px
 89 | 
 90 | Another type of approach is to directly estimate the PDF along the ray with discrete samples. 
 91 | In `vanilla NeRF`_, the coarse MLP is trained using volumetric rendering loss to output a set of 
 92 | densities :math:`{\sigma(t_i)}`. This allows for the creation of a piece-wise constant PDF: 
 93 | 
 94 | .. math:: 
 95 | 
 96 |    p(t_i) = \sigma(t_i)\exp(-\sigma(t_i)\,dt)
 97 | 
 98 | and a piece-wise linear transmittance estimator:
 99 | 
100 | .. math::
101 |    
102 |    T(t_i) = \exp(-\sum_{j=1}^{i-1}\sigma(t_i)\,dt) 
103 |    
104 | This approach was further improved in `Mip-NeRF 360`_ with a PDF matching loss, which allows for 
105 | the use of a much smaller MLP in the coarse level, namely Proposal Network, to speedup the 
106 | PDF construction. 
107 | 
108 | See the figure below for an illustration.
109 | 
110 | .. image:: ../_static/images/plot_propnet.png
111 |   :align: center
112 | 
113 | |
114 | 
115 | In `nerfacc`, this is implemented via the :class:`nerfacc.PropNetEstimator` class.
116 | 
117 | Which Estimator to use?
118 | -----------------------
119 | - :class:`nerfacc.OccGridEstimator` is a generally more efficient when most of the space in the scene is empty, such as in the case of `NeRF-Synthetic`_ dataset. But it still places samples within occluded areas that contribute little to the final rendering (e.g., the last sample in the above illustration).
120 | 
121 | - :class:`nerfacc.PropNetEstimator` generally provide more accurate transmittance estimation, enabling samples to concentrate more on high-contribution areas (e.g., surfaces) and to be more spread out in both empty and occluded regions. Also this method works nicely on unbouned scenes as it does not require a preset bounding box of the scene. Thus datasets like `Mip-NeRF 360`_ are better suited with this estimator.
122 | 
123 | .. _`SIGGRAPH 2017 Course: Production Volume Rendering`: https://graphics.pixar.com/library/ProductionVolumeRendering/paper.pdf
124 | .. _`Instant-NGP`: https://arxiv.org/abs/2201.05989
125 | .. _`Mip-NeRF 360`: https://arxiv.org/abs/2111.12077
126 | .. _`vanilla NeRF`: https://arxiv.org/abs/2003.08934
127 | .. _`NeRF-Synthetic`: https://arxiv.org/abs/2003.08934


--------------------------------------------------------------------------------
/examples/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/examples/datasets/__init__.py


--------------------------------------------------------------------------------
/examples/datasets/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
 3 | """
 4 | 
 5 | import collections
 6 | 
 7 | Rays = collections.namedtuple("Rays", ("origins", "viewdirs"))
 8 | 
 9 | 
10 | def namedtuple_map(fn, tup):
11 |     """Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple."""
12 |     return type(tup)(*(None if x is None else fn(x) for x in tup))
13 | 


--------------------------------------------------------------------------------
/examples/radiance_fields/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/examples/radiance_fields/__init__.py


--------------------------------------------------------------------------------
/examples/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
2 | opencv-python
3 | imageio
4 | numpy
5 | tqdm
6 | scipy
7 | lpips


--------------------------------------------------------------------------------
/examples/train_mlp_nerf.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
  3 | """
  4 | 
  5 | import argparse
  6 | import pathlib
  7 | import time
  8 | 
  9 | import imageio
 10 | import numpy as np
 11 | import torch
 12 | import torch.nn.functional as F
 13 | import tqdm
 14 | from datasets.nerf_synthetic import SubjectLoader
 15 | from lpips import LPIPS
 16 | from radiance_fields.mlp import VanillaNeRFRadianceField
 17 | 
 18 | from examples.utils import (
 19 |     NERF_SYNTHETIC_SCENES,
 20 |     render_image_with_occgrid,
 21 |     set_random_seed,
 22 | )
 23 | from nerfacc.estimators.occ_grid import OccGridEstimator
 24 | 
 25 | device = "cuda:0"
 26 | set_random_seed(42)
 27 | 
 28 | parser = argparse.ArgumentParser()
 29 | parser.add_argument(
 30 |     "--data_root",
 31 |     type=str,
 32 |     default=str(pathlib.Path.cwd() / "data/nerf_synthetic"),
 33 |     help="the root dir of the dataset",
 34 | )
 35 | parser.add_argument(
 36 |     "--train_split",
 37 |     type=str,
 38 |     default="train",
 39 |     choices=["train", "trainval"],
 40 |     help="which train split to use",
 41 | )
 42 | parser.add_argument(
 43 |     "--model_path",
 44 |     type=str,
 45 |     default=None,
 46 |     help="the path of the pretrained model",
 47 | )
 48 | parser.add_argument(
 49 |     "--scene",
 50 |     type=str,
 51 |     default="lego",
 52 |     choices=NERF_SYNTHETIC_SCENES,
 53 |     help="which scene to use",
 54 | )
 55 | parser.add_argument(
 56 |     "--test_chunk_size",
 57 |     type=int,
 58 |     default=4096,
 59 | )
 60 | args = parser.parse_args()
 61 | 
 62 | # training parameters
 63 | max_steps = 50000
 64 | init_batch_size = 1024
 65 | target_sample_batch_size = 1 << 16
 66 | # scene parameters
 67 | aabb = torch.tensor([-1.5, -1.5, -1.5, 1.5, 1.5, 1.5], device=device)
 68 | near_plane = 0.0
 69 | far_plane = 1.0e10
 70 | # model parameters
 71 | grid_resolution = 128
 72 | grid_nlvl = 1
 73 | # render parameters
 74 | render_step_size = 5e-3
 75 | 
 76 | # setup the dataset
 77 | train_dataset = SubjectLoader(
 78 |     subject_id=args.scene,
 79 |     root_fp=args.data_root,
 80 |     split=args.train_split,
 81 |     num_rays=init_batch_size,
 82 |     device=device,
 83 | )
 84 | test_dataset = SubjectLoader(
 85 |     subject_id=args.scene,
 86 |     root_fp=args.data_root,
 87 |     split="test",
 88 |     num_rays=None,
 89 |     device=device,
 90 | )
 91 | 
 92 | estimator = OccGridEstimator(
 93 |     roi_aabb=aabb, resolution=grid_resolution, levels=grid_nlvl
 94 | ).to(device)
 95 | 
 96 | # setup the radiance field we want to train.
 97 | radiance_field = VanillaNeRFRadianceField().to(device)
 98 | optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4)
 99 | scheduler = torch.optim.lr_scheduler.MultiStepLR(
100 |     optimizer,
101 |     milestones=[
102 |         max_steps // 2,
103 |         max_steps * 3 // 4,
104 |         max_steps * 5 // 6,
105 |         max_steps * 9 // 10,
106 |     ],
107 |     gamma=0.33,
108 | )
109 | 
110 | lpips_net = LPIPS(net="vgg").to(device)
111 | lpips_norm_fn = lambda x: x[None, ...].permute(0, 3, 1, 2) * 2 - 1
112 | lpips_fn = lambda x, y: lpips_net(lpips_norm_fn(x), lpips_norm_fn(y)).mean()
113 | 
114 | if args.model_path is not None:
115 |     checkpoint = torch.load(args.model_path)
116 |     radiance_field.load_state_dict(checkpoint["radiance_field_state_dict"])
117 |     optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
118 |     scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
119 |     estimator.load_state_dict(checkpoint["estimator_state_dict"])
120 |     step = checkpoint["step"]
121 | else:
122 |     step = 0
123 | 
124 | # training
125 | tic = time.time()
126 | for step in range(max_steps + 1):
127 |     radiance_field.train()
128 |     estimator.train()
129 | 
130 |     i = torch.randint(0, len(train_dataset), (1,)).item()
131 |     data = train_dataset[i]
132 | 
133 |     render_bkgd = data["color_bkgd"]
134 |     rays = data["rays"]
135 |     pixels = data["pixels"]
136 | 
137 |     def occ_eval_fn(x):
138 |         density = radiance_field.query_density(x)
139 |         return density * render_step_size
140 | 
141 |     # update occupancy grid
142 |     estimator.update_every_n_steps(
143 |         step=step,
144 |         occ_eval_fn=occ_eval_fn,
145 |         occ_thre=1e-2,
146 |     )
147 | 
148 |     # render
149 |     rgb, acc, depth, n_rendering_samples = render_image_with_occgrid(
150 |         radiance_field,
151 |         estimator,
152 |         rays,
153 |         # rendering options
154 |         near_plane=near_plane,
155 |         render_step_size=render_step_size,
156 |         render_bkgd=render_bkgd,
157 |     )
158 |     if n_rendering_samples == 0:
159 |         continue
160 | 
161 |     if target_sample_batch_size > 0:
162 |         # dynamic batch size for rays to keep sample batch size constant.
163 |         num_rays = len(pixels)
164 |         num_rays = int(
165 |             num_rays * (target_sample_batch_size / float(n_rendering_samples))
166 |         )
167 |         train_dataset.update_num_rays(num_rays)
168 | 
169 |     # compute loss
170 |     loss = F.smooth_l1_loss(rgb, pixels)
171 | 
172 |     optimizer.zero_grad()
173 |     loss.backward()
174 |     optimizer.step()
175 |     scheduler.step()
176 | 
177 |     if step % 5000 == 0:
178 |         elapsed_time = time.time() - tic
179 |         loss = F.mse_loss(rgb, pixels)
180 |         psnr = -10.0 * torch.log(loss) / np.log(10.0)
181 |         print(
182 |             f"elapsed_time={elapsed_time:.2f}s | step={step} | "
183 |             f"loss={loss:.5f} | psnr={psnr:.2f} | "
184 |             f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} | "
185 |             f"max_depth={depth.max():.3f} | "
186 |         )
187 | 
188 |     if step > 0 and step % max_steps == 0:
189 |         model_save_path = str(pathlib.Path.cwd() / f"mlp_nerf_{step}")
190 |         torch.save(
191 |             {
192 |                 "step": step,
193 |                 "radiance_field_state_dict": radiance_field.state_dict(),
194 |                 "optimizer_state_dict": optimizer.state_dict(),
195 |                 "scheduler_state_dict": scheduler.state_dict(),
196 |                 "estimator_state_dict": estimator.state_dict(),
197 |             },
198 |             model_save_path,
199 |         )
200 | 
201 |         # evaluation
202 |         radiance_field.eval()
203 |         estimator.eval()
204 | 
205 |         psnrs = []
206 |         lpips = []
207 |         with torch.no_grad():
208 |             for i in tqdm.tqdm(range(len(test_dataset))):
209 |                 data = test_dataset[i]
210 |                 render_bkgd = data["color_bkgd"]
211 |                 rays = data["rays"]
212 |                 pixels = data["pixels"]
213 | 
214 |                 # rendering
215 |                 rgb, acc, depth, _ = render_image_with_occgrid(
216 |                     radiance_field,
217 |                     estimator,
218 |                     rays,
219 |                     # rendering options
220 |                     near_plane=near_plane,
221 |                     render_step_size=render_step_size,
222 |                     render_bkgd=render_bkgd,
223 |                     # test options
224 |                     test_chunk_size=args.test_chunk_size,
225 |                 )
226 |                 mse = F.mse_loss(rgb, pixels)
227 |                 psnr = -10.0 * torch.log(mse) / np.log(10.0)
228 |                 psnrs.append(psnr.item())
229 |                 lpips.append(lpips_fn(rgb, pixels).item())
230 |                 # if i == 0:
231 |                 #     imageio.imwrite(
232 |                 #         "rgb_test.png",
233 |                 #         (rgb.cpu().numpy() * 255).astype(np.uint8),
234 |                 #     )
235 |                 #     imageio.imwrite(
236 |                 #         "rgb_error.png",
237 |                 #         (
238 |                 #             (rgb - pixels).norm(dim=-1).cpu().numpy() * 255
239 |                 #         ).astype(np.uint8),
240 |                 #     )
241 |         psnr_avg = sum(psnrs) / len(psnrs)
242 |         lpips_avg = sum(lpips) / len(lpips)
243 |         print(f"evaluation: psnr_avg={psnr_avg}, lpips_avg={lpips_avg}")
244 | 


--------------------------------------------------------------------------------
/examples/train_mlp_tnerf.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
  3 | """
  4 | 
  5 | import argparse
  6 | import math
  7 | import pathlib
  8 | import time
  9 | 
 10 | import imageio
 11 | import numpy as np
 12 | import torch
 13 | import torch.nn.functional as F
 14 | import tqdm
 15 | from datasets.dnerf_synthetic import SubjectLoader
 16 | from lpips import LPIPS
 17 | from radiance_fields.mlp import TNeRFRadianceField
 18 | 
 19 | from examples.utils import render_image_with_occgrid, set_random_seed
 20 | from nerfacc.estimators.occ_grid import OccGridEstimator
 21 | 
 22 | device = "cuda:0"
 23 | set_random_seed(42)
 24 | 
 25 | parser = argparse.ArgumentParser()
 26 | parser.add_argument(
 27 |     "--data_root",
 28 |     type=str,
 29 |     default=str(pathlib.Path.cwd() / "data/dnerf"),
 30 |     help="the root dir of the dataset",
 31 | )
 32 | parser.add_argument(
 33 |     "--train_split",
 34 |     type=str,
 35 |     default="train",
 36 |     choices=["train"],
 37 |     help="which train split to use",
 38 | )
 39 | parser.add_argument(
 40 |     "--scene",
 41 |     type=str,
 42 |     default="lego",
 43 |     choices=[
 44 |         # dnerf
 45 |         "bouncingballs",
 46 |         "hellwarrior",
 47 |         "hook",
 48 |         "jumpingjacks",
 49 |         "lego",
 50 |         "mutant",
 51 |         "standup",
 52 |         "trex",
 53 |     ],
 54 |     help="which scene to use",
 55 | )
 56 | parser.add_argument(
 57 |     "--test_chunk_size",
 58 |     type=int,
 59 |     default=4096,
 60 | )
 61 | args = parser.parse_args()
 62 | 
 63 | # training parameters
 64 | max_steps = 30000
 65 | init_batch_size = 1024
 66 | target_sample_batch_size = 1 << 16
 67 | # scene parameters
 68 | aabb = torch.tensor([-1.5, -1.5, -1.5, 1.5, 1.5, 1.5], device=device)
 69 | near_plane = 0.0
 70 | far_plane = 1.0e10
 71 | # model parameters
 72 | grid_resolution = 128
 73 | grid_nlvl = 1
 74 | # render parameters
 75 | render_step_size = 5e-3
 76 | 
 77 | # setup the dataset
 78 | train_dataset = SubjectLoader(
 79 |     subject_id=args.scene,
 80 |     root_fp=args.data_root,
 81 |     split=args.train_split,
 82 |     num_rays=init_batch_size,
 83 |     device=device,
 84 | )
 85 | test_dataset = SubjectLoader(
 86 |     subject_id=args.scene,
 87 |     root_fp=args.data_root,
 88 |     split="test",
 89 |     num_rays=None,
 90 |     device=device,
 91 | )
 92 | 
 93 | estimator = OccGridEstimator(
 94 |     roi_aabb=aabb, resolution=grid_resolution, levels=grid_nlvl
 95 | ).to(device)
 96 | 
 97 | # setup the radiance field we want to train.
 98 | radiance_field = TNeRFRadianceField().to(device)
 99 | optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4)
100 | scheduler = torch.optim.lr_scheduler.MultiStepLR(
101 |     optimizer,
102 |     milestones=[
103 |         max_steps // 2,
104 |         max_steps * 3 // 4,
105 |         max_steps * 5 // 6,
106 |         max_steps * 9 // 10,
107 |     ],
108 |     gamma=0.33,
109 | )
110 | 
111 | lpips_net = LPIPS(net="vgg").to(device)
112 | lpips_norm_fn = lambda x: x[None, ...].permute(0, 3, 1, 2) * 2 - 1
113 | lpips_fn = lambda x, y: lpips_net(lpips_norm_fn(x), lpips_norm_fn(y)).mean()
114 | 
115 | # training
116 | tic = time.time()
117 | for step in range(max_steps + 1):
118 |     radiance_field.train()
119 |     estimator.train()
120 | 
121 |     i = torch.randint(0, len(train_dataset), (1,)).item()
122 |     data = train_dataset[i]
123 | 
124 |     render_bkgd = data["color_bkgd"]
125 |     rays = data["rays"]
126 |     pixels = data["pixels"]
127 |     timestamps = data["timestamps"]
128 | 
129 |     # update occupancy grid
130 |     estimator.update_every_n_steps(
131 |         step=step,
132 |         occ_eval_fn=lambda x: radiance_field.query_opacity(
133 |             x, timestamps, render_step_size
134 |         ),
135 |         occ_thre=1e-2,
136 |     )
137 | 
138 |     # render
139 |     rgb, acc, depth, n_rendering_samples = render_image_with_occgrid(
140 |         radiance_field,
141 |         estimator,
142 |         rays,
143 |         # rendering options
144 |         near_plane=near_plane,
145 |         render_step_size=render_step_size,
146 |         render_bkgd=render_bkgd,
147 |         alpha_thre=0.01 if step > 1000 else 0.00,
148 |         # t-nerf options
149 |         timestamps=timestamps,
150 |     )
151 |     if n_rendering_samples == 0:
152 |         continue
153 | 
154 |     if target_sample_batch_size > 0:
155 |         # dynamic batch size for rays to keep sample batch size constant.
156 |         num_rays = len(pixels)
157 |         num_rays = int(
158 |             num_rays * (target_sample_batch_size / float(n_rendering_samples))
159 |         )
160 |         train_dataset.update_num_rays(num_rays)
161 | 
162 |     # compute loss
163 |     loss = F.smooth_l1_loss(rgb, pixels)
164 | 
165 |     optimizer.zero_grad()
166 |     loss.backward()
167 |     optimizer.step()
168 |     scheduler.step()
169 | 
170 |     if step % 5000 == 0:
171 |         elapsed_time = time.time() - tic
172 |         loss = F.mse_loss(rgb, pixels)
173 |         psnr = -10.0 * torch.log(loss) / np.log(10.0)
174 |         print(
175 |             f"elapsed_time={elapsed_time:.2f}s | step={step} | "
176 |             f"loss={loss:.5f} | psnr={psnr:.2f} | "
177 |             f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} | "
178 |             f"max_depth={depth.max():.3f} | "
179 |         )
180 | 
181 |     if step > 0 and step % max_steps == 0:
182 |         # evaluation
183 |         radiance_field.eval()
184 |         estimator.eval()
185 | 
186 |         psnrs = []
187 |         lpips = []
188 |         with torch.no_grad():
189 |             for i in tqdm.tqdm(range(len(test_dataset))):
190 |                 data = test_dataset[i]
191 |                 render_bkgd = data["color_bkgd"]
192 |                 rays = data["rays"]
193 |                 pixels = data["pixels"]
194 |                 timestamps = data["timestamps"]
195 | 
196 |                 # rendering
197 |                 rgb, acc, depth, _ = render_image_with_occgrid(
198 |                     radiance_field,
199 |                     estimator,
200 |                     rays,
201 |                     # rendering options
202 |                     near_plane=near_plane,
203 |                     render_step_size=render_step_size,
204 |                     render_bkgd=render_bkgd,
205 |                     alpha_thre=0.01,
206 |                     # test options
207 |                     test_chunk_size=args.test_chunk_size,
208 |                     # t-nerf options
209 |                     timestamps=timestamps,
210 |                 )
211 |                 mse = F.mse_loss(rgb, pixels)
212 |                 psnr = -10.0 * torch.log(mse) / np.log(10.0)
213 |                 psnrs.append(psnr.item())
214 |                 lpips.append(lpips_fn(rgb, pixels).item())
215 |                 # if i == 0:
216 |                 #     imageio.imwrite(
217 |                 #         "rgb_test.png",
218 |                 #         (rgb.cpu().numpy() * 255).astype(np.uint8),
219 |                 #     )
220 |                 #     imageio.imwrite(
221 |                 #         "rgb_error.png",
222 |                 #         (
223 |                 #             (rgb - pixels).norm(dim=-1).cpu().numpy() * 255
224 |                 #         ).astype(np.uint8),
225 |                 #     )
226 |         psnr_avg = sum(psnrs) / len(psnrs)
227 |         lpips_avg = sum(lpips) / len(lpips)
228 |         print(f"evaluation: psnr_avg={psnr_avg}, lpips_avg={lpips_avg}")
229 | 


--------------------------------------------------------------------------------
/nerfacc/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
 3 | """
 4 | 
 5 | from .data_specs import RayIntervals, RaySamples
 6 | from .estimators.occ_grid import OccGridEstimator
 7 | from .estimators.prop_net import PropNetEstimator
 8 | from .estimators.vdb import VDBEstimator, traverse_vdbs
 9 | from .grid import ray_aabb_intersect, traverse_grids
10 | from .losses import distortion
11 | from .pack import pack_info
12 | from .pdf import importance_sampling, searchsorted
13 | from .scan import exclusive_prod, exclusive_sum, inclusive_prod, inclusive_sum
14 | from .version import __version__
15 | from .volrend import (
16 |     accumulate_along_rays,
17 |     render_transmittance_from_alpha,
18 |     render_transmittance_from_density,
19 |     render_visibility_from_alpha,
20 |     render_visibility_from_density,
21 |     render_weight_from_alpha,
22 |     render_weight_from_density,
23 |     rendering,
24 | )
25 | 
26 | __all__ = [
27 |     "__version__",
28 |     "inclusive_prod",
29 |     "exclusive_prod",
30 |     "inclusive_sum",
31 |     "exclusive_sum",
32 |     "inclusive_prod_cub",
33 |     "exclusive_prod_cub",
34 |     "inclusive_sum_cub",
35 |     "exclusive_sum_cub",
36 |     "pack_info",
37 |     "render_visibility_from_alpha",
38 |     "render_visibility_from_density",
39 |     "render_weight_from_alpha",
40 |     "render_weight_from_density",
41 |     "render_transmittance_from_alpha",
42 |     "render_transmittance_from_density",
43 |     "accumulate_along_rays",
44 |     "rendering",
45 |     "importance_sampling",
46 |     "searchsorted",
47 |     "RayIntervals",
48 |     "RaySamples",
49 |     "ray_aabb_intersect",
50 |     "traverse_grids",
51 |     "traverse_vdbs",
52 |     "OccGridEstimator",
53 |     "PropNetEstimator",
54 |     "VDBEstimator",
55 |     "distortion",
56 | ]
57 | 


--------------------------------------------------------------------------------
/nerfacc/cameras.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
  3 | """
  4 | from typing import Tuple
  5 | 
  6 | import torch
  7 | import torch.nn.functional as F
  8 | from torch import Tensor
  9 | 
 10 | from . import cuda as _C
 11 | 
 12 | 
 13 | def opencv_lens_undistortion(
 14 |     uv: Tensor, params: Tensor, eps: float = 1e-6, iters: int = 10
 15 | ) -> Tensor:
 16 |     """Undistort the opencv distortion.
 17 | 
 18 |     Note:
 19 |         This function is not differentiable to any inputs.
 20 | 
 21 |     Args:
 22 |         uv: (..., 2) UV coordinates.
 23 |         params: (..., N) or (N) OpenCV distortion parameters. We support
 24 |             N = 0, 1, 2, 4, 8. If N = 0, we return the input uv directly.
 25 |             If N = 1, we assume the input is {k1}. If N = 2, we assume the
 26 |             input is {k1, k2}. If N = 4, we assume the input is {k1, k2, p1, p2}.
 27 |             If N = 8, we assume the input is {k1, k2, p1, p2, k3, k4, k5, k6}.
 28 | 
 29 |     Returns:
 30 |         (..., 2) undistorted UV coordinates.
 31 |     """
 32 |     assert uv.shape[-1] == 2
 33 |     assert params.shape[-1] in [0, 1, 2, 4, 8]
 34 | 
 35 |     if params.shape[-1] == 0:
 36 |         return uv
 37 |     elif params.shape[-1] < 8:
 38 |         params = F.pad(params, (0, 8 - params.shape[-1]), "constant", 0)
 39 |     assert params.shape[-1] == 8
 40 | 
 41 |     batch_shape = uv.shape[:-1]
 42 |     params = torch.broadcast_to(params, batch_shape + (params.shape[-1],))
 43 | 
 44 |     return _C.opencv_lens_undistortion(
 45 |         uv.contiguous(), params.contiguous(), eps, iters
 46 |     )
 47 | 
 48 | 
 49 | def opencv_lens_undistortion_fisheye(
 50 |     uv: Tensor, params: Tensor, eps: float = 1e-6, iters: int = 10
 51 | ) -> Tensor:
 52 |     """Undistort the opencv distortion of {k1, k2, k3, k4}.
 53 | 
 54 |     Note:
 55 |         This function is not differentiable to any inputs.
 56 | 
 57 |     Args:
 58 |         uv: (..., 2) UV coordinates.
 59 |         params: (..., 4) or (4) OpenCV distortion parameters.
 60 | 
 61 |     Returns:
 62 |         (..., 2) undistorted UV coordinates.
 63 |     """
 64 |     assert uv.shape[-1] == 2
 65 |     assert params.shape[-1] == 4
 66 |     batch_shape = uv.shape[:-1]
 67 |     params = torch.broadcast_to(params, batch_shape + (params.shape[-1],))
 68 | 
 69 |     return _C.opencv_lens_undistortion_fisheye(
 70 |         uv.contiguous(), params.contiguous(), eps, iters
 71 |     )
 72 | 
 73 | 
 74 | def _opencv_lens_distortion(uv: Tensor, params: Tensor) -> Tensor:
 75 |     """The opencv camera distortion of {k1, k2, p1, p2, k3, k4, k5, k6}.
 76 | 
 77 |     See https://docs.opencv.org/3.4/d9/d0c/group__calib3d.html for more details.
 78 |     """
 79 |     k1, k2, p1, p2, k3, k4, k5, k6 = torch.unbind(params, dim=-1)
 80 |     s1, s2, s3, s4 = 0, 0, 0, 0
 81 |     u, v = torch.unbind(uv, dim=-1)
 82 |     r2 = u * u + v * v
 83 |     r4 = r2**2
 84 |     r6 = r4 * r2
 85 |     ratial = (1 + k1 * r2 + k2 * r4 + k3 * r6) / (
 86 |         1 + k4 * r2 + k5 * r4 + k6 * r6
 87 |     )
 88 |     fx = 2 * p1 * u * v + p2 * (r2 + 2 * u * u) + s1 * r2 + s2 * r4
 89 |     fy = 2 * p2 * u * v + p1 * (r2 + 2 * v * v) + s3 * r2 + s4 * r4
 90 |     return torch.stack([u * ratial + fx, v * ratial + fy], dim=-1)
 91 | 
 92 | 
 93 | def _opencv_lens_distortion_fisheye(
 94 |     uv: Tensor, params: Tensor, eps: float = 1e-10
 95 | ) -> Tensor:
 96 |     """The opencv camera distortion of {k1, k2, k3, p1, p2}.
 97 | 
 98 |     See https://docs.opencv.org/4.x/db/d58/group__calib3d__fisheye.html for more details.
 99 | 
100 |     Args:
101 |         uv: (..., 2) UV coordinates.
102 |         params: (..., 4) or (4) OpenCV distortion parameters.
103 | 
104 |     Returns:
105 |         (..., 2) distorted UV coordinates.
106 |     """
107 |     assert params.shape[-1] == 4, f"Invalid params shape: {params.shape}"
108 |     k1, k2, k3, k4 = torch.unbind(params, dim=-1)
109 |     u, v = torch.unbind(uv, dim=-1)
110 |     r = torch.sqrt(u * u + v * v)
111 |     theta = torch.atan(r)
112 |     theta_d = theta * (
113 |         1
114 |         + k1 * theta**2
115 |         + k2 * theta**4
116 |         + k3 * theta**6
117 |         + k4 * theta**8
118 |     )
119 |     scale = theta_d / torch.clamp(r, min=eps)
120 |     return uv * scale[..., None]
121 | 
122 | 
123 | @torch.jit.script
124 | def _compute_residual_and_jacobian(
125 |     x: Tensor, y: Tensor, xd: Tensor, yd: Tensor, params: Tensor
126 | ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
127 |     assert params.shape[-1] == 8
128 | 
129 |     k1, k2, p1, p2, k3, k4, k5, k6 = torch.unbind(params, dim=-1)
130 | 
131 |     # let r(x, y) = x^2 + y^2;
132 |     #     alpha(x, y) = 1 + k1 * r(x, y) + k2 * r(x, y) ^2 + k3 * r(x, y)^3;
133 |     #     beta(x, y) = 1 + k4 * r(x, y) + k5 * r(x, y) ^2 + k6 * r(x, y)^3;
134 |     #     d(x, y) = alpha(x, y) / beta(x, y);
135 |     r = x * x + y * y
136 |     alpha = 1.0 + r * (k1 + r * (k2 + r * k3))
137 |     beta = 1.0 + r * (k4 + r * (k5 + r * k6))
138 |     d = alpha / beta
139 | 
140 |     # The perfect projection is:
141 |     # xd = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2);
142 |     # yd = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2);
143 |     #
144 |     # Let's define
145 |     #
146 |     # fx(x, y) = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2) - xd;
147 |     # fy(x, y) = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2) - yd;
148 |     #
149 |     # We are looking for a solution that satisfies
150 |     # fx(x, y) = fy(x, y) = 0;
151 |     fx = d * x + 2 * p1 * x * y + p2 * (r + 2 * x * x) - xd
152 |     fy = d * y + 2 * p2 * x * y + p1 * (r + 2 * y * y) - yd
153 | 
154 |     # Compute derivative of alpha, beta over r.
155 |     alpha_r = k1 + r * (2.0 * k2 + r * (3.0 * k3))
156 |     beta_r = k4 + r * (2.0 * k5 + r * (3.0 * k6))
157 | 
158 |     # Compute derivative of d over [x, y]
159 |     d_r = (alpha_r * beta - alpha * beta_r) / (beta * beta)
160 |     d_x = 2.0 * x * d_r
161 |     d_y = 2.0 * y * d_r
162 | 
163 |     # Compute derivative of fx over x and y.
164 |     fx_x = d + d_x * x + 2.0 * p1 * y + 6.0 * p2 * x
165 |     fx_y = d_y * x + 2.0 * p1 * x + 2.0 * p2 * y
166 | 
167 |     # Compute derivative of fy over x and y.
168 |     fy_x = d_x * y + 2.0 * p2 * y + 2.0 * p1 * x
169 |     fy_y = d + d_y * y + 2.0 * p2 * x + 6.0 * p1 * y
170 | 
171 |     return fx, fy, fx_x, fx_y, fy_x, fy_y
172 | 
173 | 
174 | @torch.jit.script
175 | def _opencv_lens_undistortion(
176 |     uv: Tensor, params: Tensor, eps: float = 1e-6, iters: int = 10
177 | ) -> Tensor:
178 |     """Same as opencv_lens_undistortion(), but native PyTorch.
179 | 
180 |     Took from with bug fix and modification.
181 |     https://github.com/nerfstudio-project/nerfstudio/blob/ec603634edbd61b13bdf2c598fda8c993370b8f7/nerfstudio/cameras/camera_utils.py
182 |     """
183 |     assert uv.shape[-1] == 2
184 |     assert params.shape[-1] in [0, 1, 2, 4, 8]
185 | 
186 |     if params.shape[-1] == 0:
187 |         return uv
188 |     elif params.shape[-1] < 8:
189 |         params = F.pad(params, (0, 8 - params.shape[-1]), "constant", 0.0)
190 |     assert params.shape[-1] == 8
191 | 
192 |     # Initialize from the distorted point.
193 |     x, y = x0, y0 = torch.unbind(uv, dim=-1)
194 | 
195 |     zeros = torch.zeros_like(x)
196 |     for _ in range(iters):
197 |         fx, fy, fx_x, fx_y, fy_x, fy_y = _compute_residual_and_jacobian(
198 |             x=x, y=y, xd=x0, yd=y0, params=params
199 |         )
200 |         denominator = fy_x * fx_y - fx_x * fy_y
201 |         mask = torch.abs(denominator) > eps
202 | 
203 |         x_numerator = fx * fy_y - fy * fx_y
204 |         y_numerator = fy * fx_x - fx * fy_x
205 |         step_x = torch.where(mask, x_numerator / denominator, zeros)
206 |         step_y = torch.where(mask, y_numerator / denominator, zeros)
207 | 
208 |         x = x + step_x
209 |         y = y + step_y
210 | 
211 |     return torch.stack([x, y], dim=-1)
212 | 


--------------------------------------------------------------------------------
/nerfacc/cuda/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
 3 | """
 4 | 
 5 | from typing import Any, Callable
 6 | 
 7 | 
 8 | def _make_lazy_cuda_func(name: str) -> Callable:
 9 |     def call_cuda(*args, **kwargs):
10 |         # pylint: disable=import-outside-toplevel
11 |         from ._backend import _C
12 | 
13 |         return getattr(_C, name)(*args, **kwargs)
14 | 
15 |     return call_cuda
16 | 
17 | 
18 | # data specs
19 | RaySegmentsSpec = _make_lazy_cuda_func("RaySegmentsSpec")
20 | 
21 | # grid
22 | ray_aabb_intersect = _make_lazy_cuda_func("ray_aabb_intersect")
23 | traverse_grids = _make_lazy_cuda_func("traverse_grids")
24 | 
25 | # scan
26 | inclusive_sum = _make_lazy_cuda_func("inclusive_sum")
27 | exclusive_sum = _make_lazy_cuda_func("exclusive_sum")
28 | inclusive_prod_forward = _make_lazy_cuda_func("inclusive_prod_forward")
29 | inclusive_prod_backward = _make_lazy_cuda_func("inclusive_prod_backward")
30 | exclusive_prod_forward = _make_lazy_cuda_func("exclusive_prod_forward")
31 | exclusive_prod_backward = _make_lazy_cuda_func("exclusive_prod_backward")
32 | 
33 | is_cub_available = _make_lazy_cuda_func("is_cub_available")
34 | inclusive_sum_cub = _make_lazy_cuda_func("inclusive_sum_cub")
35 | exclusive_sum_cub = _make_lazy_cuda_func("exclusive_sum_cub")
36 | inclusive_prod_cub_forward = _make_lazy_cuda_func("inclusive_prod_cub_forward")
37 | inclusive_prod_cub_backward = _make_lazy_cuda_func(
38 |     "inclusive_prod_cub_backward"
39 | )
40 | exclusive_prod_cub_forward = _make_lazy_cuda_func("exclusive_prod_cub_forward")
41 | exclusive_prod_cub_backward = _make_lazy_cuda_func(
42 |     "exclusive_prod_cub_backward"
43 | )
44 | 
45 | # pdf
46 | importance_sampling = _make_lazy_cuda_func("importance_sampling")
47 | searchsorted = _make_lazy_cuda_func("searchsorted")
48 | 
49 | # camera
50 | opencv_lens_undistortion = _make_lazy_cuda_func("opencv_lens_undistortion")
51 | opencv_lens_undistortion_fisheye = _make_lazy_cuda_func(
52 |     "opencv_lens_undistortion_fisheye"
53 | )
54 | 


--------------------------------------------------------------------------------
/nerfacc/cuda/_backend.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
 3 | """
 4 | 
 5 | import glob
 6 | import json
 7 | import os
 8 | import shutil
 9 | from subprocess import DEVNULL, call
10 | 
11 | from rich.console import Console
12 | from torch.utils.cpp_extension import _get_build_directory, load
13 | 
14 | PATH = os.path.dirname(os.path.abspath(__file__))
15 | 
16 | 
17 | def cuda_toolkit_available():
18 |     """Check if the nvcc is avaiable on the machine."""
19 |     try:
20 |         call(["nvcc"], stdout=DEVNULL, stderr=DEVNULL)
21 |         return True
22 |     except FileNotFoundError:
23 |         return False
24 | 
25 | 
26 | def cuda_toolkit_version():
27 |     """Get the cuda toolkit version."""
28 |     cuda_home = os.path.join(os.path.dirname(shutil.which("nvcc")), "..")
29 |     if os.path.exists(os.path.join(cuda_home, "version.txt")):
30 |         with open(os.path.join(cuda_home, "version.txt")) as f:
31 |             cuda_version = f.read().strip().split()[-1]
32 |     elif os.path.exists(os.path.join(cuda_home, "version.json")):
33 |         with open(os.path.join(cuda_home, "version.json")) as f:
34 |             cuda_version = json.load(f)["cuda"]["version"]
35 |     else:
36 |         raise RuntimeError("Cannot find the cuda version.")
37 |     return cuda_version
38 | 
39 | 
40 | name = "nerfacc_cuda"
41 | build_dir = _get_build_directory(name, verbose=False)
42 | extra_include_paths = []
43 | extra_cflags = ["-O3"]
44 | extra_cuda_cflags = ["-O3"]
45 | 
46 | _C = None
47 | sources = list(glob.glob(os.path.join(PATH, "csrc/*.cu"))) + list(
48 |     glob.glob(os.path.join(PATH, "csrc/*.cpp"))
49 | )
50 | 
51 | try:
52 |     # try to import the compiled module (via setup.py)
53 |     from nerfacc import csrc as _C
54 | except ImportError:
55 |     # if failed, try with JIT compilation
56 |     if cuda_toolkit_available():
57 |         if os.listdir(build_dir) != []:
58 |             # If the build exists, we assume the extension has been built
59 |             # and we can load it.
60 | 
61 |             _C = load(
62 |                 name=name,
63 |                 sources=sources,
64 |                 extra_cflags=extra_cflags,
65 |                 extra_cuda_cflags=extra_cuda_cflags,
66 |                 extra_include_paths=extra_include_paths,
67 |             )
68 |         else:
69 |             # Build from scratch. Remove the build directory just to be safe: pytorch jit might stuck
70 |             # if the build directory exists.
71 |             shutil.rmtree(build_dir)
72 |             with Console().status(
73 |                 "[bold yellow]NerfAcc: Setting up CUDA (This may take a few minutes the first time)",
74 |                 spinner="bouncingBall",
75 |             ):
76 |                 _C = load(
77 |                     name=name,
78 |                     sources=sources,
79 |                     extra_cflags=extra_cflags,
80 |                     extra_cuda_cflags=extra_cuda_cflags,
81 |                     extra_include_paths=extra_include_paths,
82 |                 )
83 |     else:
84 |         Console().print(
85 |             "[yellow]NerfAcc: No CUDA toolkit found. NerfAcc will be disabled.[/yellow]"
86 |         )
87 | 
88 | 
89 | __all__ = ["_C"]
90 | 


--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/camera.cu:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | 
  3 | #include "include/utils_cuda.cuh"
  4 | #include "include/utils_camera.cuh"
  5 | 
  6 | 
  7 | namespace {
  8 | namespace device {
  9 | 
 10 | __global__ void opencv_lens_undistortion_fisheye(
 11 |     const int64_t N,
 12 |     const float* uv,
 13 |     const float* params,
 14 |     const int criteria_iters,
 15 |     const float criteria_eps,
 16 |     float* uv_out,
 17 |     bool* success)
 18 | {
 19 |     // parallelize over outputs
 20 |     for (int64_t tid = blockIdx.x * blockDim.x + threadIdx.x; tid < N; tid += blockDim.x * gridDim.x)
 21 |     {
 22 |         success[tid] = iterative_opencv_lens_undistortion_fisheye(
 23 |             uv[tid * 2 + 0], 
 24 |             uv[tid * 2 + 1],
 25 |             params[tid * 4 + 0], // k1
 26 |             params[tid * 4 + 1], // k2
 27 |             params[tid * 4 + 2], // k3
 28 |             params[tid * 4 + 3], // k4
 29 |             criteria_iters,
 30 |             criteria_eps,
 31 |             uv_out[tid * 2 + 0],
 32 |             uv_out[tid * 2 + 1]
 33 |         );
 34 |     }
 35 | }
 36 | 
 37 | __global__ void opencv_lens_undistortion(
 38 |     const int64_t N,
 39 |     const int64_t n_params,
 40 |     const float* uv,
 41 |     const float* params,
 42 |     const float eps,
 43 |     const int max_iterations,
 44 |     float* uv_out)
 45 | {
 46 |     // parallelize over outputs
 47 |     for (int64_t tid = blockIdx.x * blockDim.x + threadIdx.x; tid < N; tid += blockDim.x * gridDim.x)
 48 |     {
 49 |         if (n_params == 5) {
 50 |             radial_and_tangential_undistort(
 51 |                 uv[tid * 2 + 0], 
 52 |                 uv[tid * 2 + 1],
 53 |                 params[tid * n_params + 0], // k1
 54 |                 params[tid * n_params + 1], // k2
 55 |                 params[tid * n_params + 4], // k3
 56 |                 0.f, // k4
 57 |                 0.f, // k5
 58 |                 0.f, // k6
 59 |                 params[tid * n_params + 2], // p1
 60 |                 params[tid * n_params + 3], // p2
 61 |                 eps,
 62 |                 max_iterations,
 63 |                 uv_out[tid * 2 + 0],
 64 |                 uv_out[tid * 2 + 1]);
 65 |         } else if (n_params == 8) {
 66 |             radial_and_tangential_undistort(
 67 |                 uv[tid * 2 + 0], 
 68 |                 uv[tid * 2 + 1],
 69 |                 params[tid * n_params + 0], // k1
 70 |                 params[tid * n_params + 1], // k2
 71 |                 params[tid * n_params + 4], // k3
 72 |                 params[tid * n_params + 5], // k4
 73 |                 params[tid * n_params + 6], // k5
 74 |                 params[tid * n_params + 7], // k6
 75 |                 params[tid * n_params + 2], // p1
 76 |                 params[tid * n_params + 3], // p2
 77 |                 eps,
 78 |                 max_iterations,
 79 |                 uv_out[tid * 2 + 0],
 80 |                 uv_out[tid * 2 + 1]);
 81 |         } else if (n_params == 12) {
 82 |             bool success = iterative_opencv_lens_undistortion(
 83 |                 uv[tid * 2 + 0], 
 84 |                 uv[tid * 2 + 1],
 85 |                 params[tid * 12 + 0], // k1
 86 |                 params[tid * 12 + 1], // k2
 87 |                 params[tid * 12 + 2], // k3
 88 |                 params[tid * 12 + 3], // k4
 89 |                 params[tid * 12 + 4], // k5
 90 |                 params[tid * 12 + 5], // k6
 91 |                 params[tid * 12 + 6], // p1
 92 |                 params[tid * 12 + 7], // p2
 93 |                 params[tid * 12 + 8], // s1
 94 |                 params[tid * 12 + 9], // s2
 95 |                 params[tid * 12 + 10], // s3
 96 |                 params[tid * 12 + 11], // s4
 97 |                 max_iterations,
 98 |                 uv_out[tid * 2 + 0],
 99 |                 uv_out[tid * 2 + 1]
100 |             );
101 |             if (!success) {
102 |                 uv_out[tid * 2 + 0] = uv[tid * 2 + 0];
103 |                 uv_out[tid * 2 + 1] = uv[tid * 2 + 1];
104 |             }
105 |         }
106 |     }
107 | }
108 | 
109 | 
110 | }  // namespace device
111 | }  // namespace
112 | 
113 | 
114 | torch::Tensor opencv_lens_undistortion(
115 |     const torch::Tensor& uv,      // [..., 2]
116 |     const torch::Tensor& params,  // [..., 5] or [..., 12]
117 |     const float eps,
118 |     const int max_iterations)
119 | {
120 |     DEVICE_GUARD(uv);
121 |     CHECK_INPUT(uv);
122 |     CHECK_INPUT(params);
123 |     TORCH_CHECK(uv.ndimension() == params.ndimension());
124 |     TORCH_CHECK(uv.size(-1) == 2, "uv must have shape [..., 2]");
125 |     TORCH_CHECK(params.size(-1) == 5 || params.size(-1) == 8 || params.size(-1) == 12);
126 | 
127 |     int64_t N = uv.numel() / 2;
128 |     int64_t n_params = params.size(-1);
129 | 
130 |     at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream();
131 |     int64_t max_threads = 512;
132 |     int64_t max_blocks = 65535;
133 |     dim3 threads = dim3(min(max_threads, N));
134 |     dim3 blocks = dim3(min(max_blocks, ceil_div<int64_t>(N, threads.x)));
135 | 
136 |     auto uv_out = torch::empty_like(uv);
137 |     device::opencv_lens_undistortion<<<blocks, threads, 0, stream>>>(
138 |         N,
139 |         n_params,
140 |         uv.data_ptr<float>(),
141 |         params.data_ptr<float>(),
142 |         eps,
143 |         max_iterations,
144 |         uv_out.data_ptr<float>());
145 | 
146 |     return uv_out;
147 | }
148 | 
149 | torch::Tensor opencv_lens_undistortion_fisheye(
150 |     const torch::Tensor& uv,      // [..., 2]
151 |     const torch::Tensor& params,  // [..., 4]
152 |     const float criteria_eps,
153 |     const int criteria_iters)
154 | {
155 |     DEVICE_GUARD(uv);
156 |     CHECK_INPUT(uv);
157 |     CHECK_INPUT(params);
158 |     TORCH_CHECK(uv.ndimension() == params.ndimension());
159 |     TORCH_CHECK(uv.size(-1) == 2, "uv must have shape [..., 2]");
160 |     TORCH_CHECK(params.size(-1) == 4);
161 | 
162 |     int64_t N = uv.numel() / 2;
163 | 
164 |     at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream();
165 |     int64_t max_threads = 512;
166 |     int64_t max_blocks = 65535;
167 |     dim3 threads = dim3(min(max_threads, N));
168 |     dim3 blocks = dim3(min(max_blocks, ceil_div<int64_t>(N, threads.x)));
169 | 
170 |     auto uv_out = torch::empty_like(uv);
171 |     auto success = torch::empty(
172 |         uv.sizes().slice(0, uv.ndimension() - 1), uv.options().dtype(torch::kBool));
173 |     device::opencv_lens_undistortion_fisheye<<<blocks, threads, 0, stream>>>(
174 |         N,
175 |         uv.data_ptr<float>(),
176 |         params.data_ptr<float>(),
177 |         criteria_iters,
178 |         criteria_eps,
179 |         uv_out.data_ptr<float>(),
180 |         success.data_ptr<bool>());
181 | 
182 |     return uv_out;
183 | }
184 | 


--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/data_spec.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <torch/extension.h>
  4 | #include "utils_cuda.cuh"
  5 | 
  6 | struct RaySegmentsSpec {
  7 |   torch::Tensor vals;        // [n_edges] or [n_rays, n_edges_per_ray]
  8 |   // for flattened tensor
  9 |   torch::Tensor chunk_starts; // [n_rays]
 10 |   torch::Tensor chunk_cnts;   // [n_rays]
 11 |   torch::Tensor ray_indices;      // [n_edges]
 12 |   torch::Tensor is_left;      // [n_edges] have n_bins true values
 13 |   torch::Tensor is_right;     // [n_edges] have n_bins true values
 14 |   torch::Tensor is_valid;     // [n_edges] have n_bins true values
 15 | 
 16 |   inline void check() {
 17 |     CHECK_INPUT(vals);
 18 |     TORCH_CHECK(vals.defined());
 19 | 
 20 |     // batched tensor [..., n_edges_per_ray]
 21 |     if (vals.ndimension() > 1) return;
 22 | 
 23 |     // flattend tensor [n_edges]
 24 |     CHECK_INPUT(chunk_starts);
 25 |     CHECK_INPUT(chunk_cnts);
 26 |     TORCH_CHECK(chunk_starts.defined());
 27 |     TORCH_CHECK(chunk_cnts.defined());
 28 |     TORCH_CHECK(chunk_starts.ndimension() == 1);
 29 |     TORCH_CHECK(chunk_cnts.ndimension() == 1);
 30 |     TORCH_CHECK(chunk_starts.numel() == chunk_cnts.numel());
 31 |     if (ray_indices.defined()) {
 32 |       CHECK_INPUT(ray_indices);
 33 |       TORCH_CHECK(ray_indices.ndimension() == 1);
 34 |       TORCH_CHECK(vals.numel() == ray_indices.numel());
 35 |     }
 36 |     if (is_left.defined()) {
 37 |       CHECK_INPUT(is_left);
 38 |       TORCH_CHECK(is_left.ndimension() == 1);
 39 |       TORCH_CHECK(vals.numel() == is_left.numel());
 40 |     }
 41 |     if (is_right.defined()) {
 42 |       CHECK_INPUT(is_right);
 43 |       TORCH_CHECK(is_right.ndimension() == 1);
 44 |       TORCH_CHECK(vals.numel() == is_right.numel());
 45 |     }
 46 |     if (is_valid.defined()) {
 47 |       CHECK_INPUT(is_valid);
 48 |       TORCH_CHECK(is_valid.ndimension() == 1);
 49 |       TORCH_CHECK(vals.numel() == is_valid.numel());
 50 |     }
 51 |   }
 52 | 
 53 |   inline void memalloc_cnts(int32_t n_rays, at::TensorOptions options, bool zero_init = true) {
 54 |     TORCH_CHECK(!chunk_cnts.defined());
 55 |     if (zero_init) {
 56 |       chunk_cnts = torch::zeros({n_rays}, options.dtype(torch::kLong));
 57 |     } else {
 58 |       chunk_cnts = torch::empty({n_rays}, options.dtype(torch::kLong));
 59 |     }
 60 |   }
 61 | 
 62 |   inline void memalloc_data(int32_t size, bool alloc_masks = true, bool zero_init = true, bool alloc_valid = false) {
 63 |     TORCH_CHECK(chunk_cnts.defined());
 64 |     TORCH_CHECK(!vals.defined());
 65 | 
 66 |     if (zero_init) {
 67 |       vals = torch::zeros({size}, chunk_cnts.options().dtype(torch::kFloat32));
 68 |       ray_indices = torch::zeros({size}, chunk_cnts.options().dtype(torch::kLong));
 69 |       if (alloc_masks) {
 70 |         is_left = torch::zeros({size}, chunk_cnts.options().dtype(torch::kBool));
 71 |         is_right = torch::zeros({size}, chunk_cnts.options().dtype(torch::kBool));
 72 |       }
 73 |     } else {
 74 |       vals = torch::empty({size}, chunk_cnts.options().dtype(torch::kFloat32));
 75 |       ray_indices = torch::empty({size}, chunk_cnts.options().dtype(torch::kLong));
 76 |       if (alloc_masks) {
 77 |         is_left = torch::empty({size}, chunk_cnts.options().dtype(torch::kBool));
 78 |         is_right = torch::empty({size}, chunk_cnts.options().dtype(torch::kBool));
 79 |       }
 80 |     }
 81 |     if (alloc_valid) {
 82 |       is_valid = torch::zeros({size}, chunk_cnts.options().dtype(torch::kBool));
 83 |     }
 84 |   }
 85 | 
 86 |   inline int64_t memalloc_data_from_chunk(bool alloc_masks = true, bool zero_init = true, bool alloc_valid = false) {
 87 |     TORCH_CHECK(chunk_cnts.defined());
 88 |     TORCH_CHECK(!chunk_starts.defined());
 89 |     
 90 |     torch::Tensor cumsum = torch::cumsum(chunk_cnts, 0, chunk_cnts.scalar_type());
 91 |     int64_t n_edges = cumsum[-1].item<int64_t>();
 92 |     
 93 |     chunk_starts = cumsum - chunk_cnts;
 94 |     memalloc_data(n_edges, alloc_masks, zero_init, alloc_valid);
 95 |     return 1;
 96 |   }
 97 | 
 98 |   // compute the chunk_start from chunk_cnts
 99 |   inline int64_t compute_chunk_start() {
100 |     TORCH_CHECK(chunk_cnts.defined());
101 |     // TORCH_CHECK(!chunk_starts.defined());
102 |     
103 |     torch::Tensor cumsum = torch::cumsum(chunk_cnts, 0, chunk_cnts.scalar_type());
104 |     chunk_starts = cumsum - chunk_cnts;
105 |     return 1;
106 |   }
107 | };


--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/data_spec_packed.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <torch/extension.h>
 4 | 
 5 | #include "data_spec.hpp"
 6 | 
 7 | namespace {
 8 | namespace device {
 9 | 
10 | struct PackedRaySegmentsSpec {
11 |     PackedRaySegmentsSpec(RaySegmentsSpec& spec) :
12 |         vals(spec.vals.defined() ? spec.vals.data_ptr<float>() : nullptr),
13 |         is_batched(spec.vals.defined() ? spec.vals.dim() > 1 : false),
14 |         // for flattened tensor
15 |         chunk_starts(spec.chunk_starts.defined() ? spec.chunk_starts.data_ptr<int64_t>() : nullptr),
16 |         chunk_cnts(spec.chunk_cnts.defined() ? spec.chunk_cnts.data_ptr<int64_t>(): nullptr),
17 |         ray_indices(spec.ray_indices.defined() ? spec.ray_indices.data_ptr<int64_t>() : nullptr),
18 |         is_left(spec.is_left.defined() ? spec.is_left.data_ptr<bool>() : nullptr),
19 |         is_right(spec.is_right.defined() ? spec.is_right.data_ptr<bool>() : nullptr),
20 |         is_valid(spec.is_valid.defined() ? spec.is_valid.data_ptr<bool>() : nullptr),
21 |         // for dimensions
22 |         n_edges(spec.vals.defined() ? spec.vals.numel() : 0),
23 |         n_rays(spec.chunk_cnts.defined() ? spec.chunk_cnts.size(0) : 0),  // for flattened tensor
24 |         n_edges_per_ray(spec.vals.defined() ? spec.vals.size(-1) : 0)   // for batched tensor
25 |     { }
26 | 
27 |     float* vals;
28 |     bool is_batched;
29 | 
30 |     int64_t* chunk_starts;
31 |     int64_t* chunk_cnts; 
32 |     int64_t* ray_indices;
33 |     bool* is_left;
34 |     bool* is_right;
35 |     bool* is_valid;
36 | 
37 |     int64_t n_edges;
38 |     int32_t n_rays;
39 |     int32_t n_edges_per_ray;
40 | };
41 | 
42 | 
43 | struct SingleRaySpec {
44 |     // TODO: check inv_dir if dir is zero.
45 |     __device__ SingleRaySpec(
46 |         float *rays_o, float *rays_d, float tmin, float tmax) :
47 |         origin{rays_o[0], rays_o[1], rays_o[2]},
48 |         dir{rays_d[0], rays_d[1], rays_d[2]},
49 |         inv_dir{1.0f/rays_d[0], 1.0f/rays_d[1], 1.0f/rays_d[2]},
50 |         tmin{tmin},
51 |         tmax{tmax}
52 |     { }
53 | 
54 |     float3 origin;
55 |     float3 dir;
56 |     float3 inv_dir;
57 |     float tmin;
58 |     float tmax;
59 | };
60 | 
61 | struct AABBSpec {
62 |     __device__ AABBSpec(float *aabb) :
63 |         min{aabb[0], aabb[1], aabb[2]},
64 |         max{aabb[3], aabb[4], aabb[5]}
65 |     { }
66 |     __device__ AABBSpec(float3 min, float3 max) :
67 |         min{min.x, min.y, min.z},
68 |         max{max.x, max.y, max.z}
69 |     { }
70 |     float3 min;
71 |     float3 max;
72 | };
73 | 
74 | 
75 | }  // namespace device
76 | }  // namespace


--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils.cub.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022 Ruilong Li, UC Berkeley.
 3 |  * Modified from aten/src/ATen/cuda/cub_definitions.cuh in PyTorch.
 4 |  */
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <cuda.h>  // for CUDA_VERSION
 9 | 
10 | #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
11 | #include <cub/version.cuh>
12 | #else
13 | #define CUB_VERSION 0
14 | #endif
15 | 
16 | // cub support for scan by key is added to cub 1.15
17 | // in https://github.com/NVIDIA/cub/pull/376
18 | #if CUB_VERSION >= 101500
19 | #define CUB_SUPPORTS_SCAN_BY_KEY() 1
20 | #else
21 | #define CUB_SUPPORTS_SCAN_BY_KEY() 0
22 | #endif
23 | 
24 | // https://github.com/pytorch/pytorch/blob/233305a852e1cd7f319b15b5137074c9eac455f6/aten/src/ATen/cuda/cub.cuh#L38-L46
25 | #define CUB_WRAPPER(func, ...) do {                                       \
26 |   size_t temp_storage_bytes = 0;                                          \
27 |   func(nullptr, temp_storage_bytes, __VA_ARGS__);                         \
28 |   auto& caching_allocator = *::c10::cuda::CUDACachingAllocator::get();    \
29 |   auto temp_storage = caching_allocator.allocate(temp_storage_bytes);     \
30 |   func(temp_storage.get(), temp_storage_bytes, __VA_ARGS__);              \
31 |   AT_CUDA_CHECK(cudaGetLastError());                                      \
32 | } while (false)


--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils_camera.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2022 Ruilong Li, UC Berkeley.
  3 |  */
  4 | 
  5 | #include "utils_cuda.cuh"
  6 | 
  7 | #define PI 3.14159265358979323846
  8 | 
  9 | namespace {
 10 | namespace device {
 11 | 
 12 | // https://github.com/JamesPerlman/TurboNeRF/blob/75f1228d41b914b0a768a876d2a851f3b3213a58/src/utils/camera-kernels.cuh
 13 | inline __device__ void _compute_residual_and_jacobian(
 14 |     // inputs
 15 |     float x, float y,
 16 |     float xd, float yd,
 17 |     float k1, float k2, float k3, float k4, float k5, float k6,
 18 |     float p1, float p2,
 19 |     // outputs
 20 |     float& fx, float& fy,
 21 |     float& fx_x, float& fx_y,
 22 |     float& fy_x, float& fy_y
 23 | ) {
 24 |     // let r(x, y) = x^2 + y^2;
 25 |     //     alpha(x, y) = 1 + k1 * r(x, y) + k2 * r(x, y) ^2 + k3 * r(x, y)^3;
 26 |     //     beta(x, y) = 1 + k4 * r(x, y) + k5 * r(x, y) ^2 + k6 * r(x, y)^3;
 27 |     //     d(x, y) = alpha(x, y) / beta(x, y);
 28 |     const float r = x * x + y * y;
 29 |     const float alpha = 1.0f + r * (k1 + r * (k2 + r * k3));
 30 |     const float beta = 1.0f + r * (k4 + r * (k5 + r * k6));
 31 |     const float d = alpha / beta;
 32 | 
 33 |     // The perfect projection is:
 34 |     // xd = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2);
 35 |     // yd = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2);
 36 | 
 37 |     // Let's define
 38 |     // fx(x, y) = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2) - xd;
 39 |     // fy(x, y) = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2) - yd;
 40 | 
 41 |     // We are looking for a solution that satisfies
 42 |     // fx(x, y) = fy(x, y) = 0;
 43 |     
 44 |     fx = d * x + 2 * p1 * x * y + p2 * (r + 2 * x * x) - xd;
 45 |     fy = d * y + 2 * p2 * x * y + p1 * (r + 2 * y * y) - yd;
 46 | 
 47 |     // Compute derivative of alpha, beta over r.
 48 |     const float alpha_r = k1 + r * (2.0 * k2 + r * (3.0 * k3));
 49 |     const float beta_r = k4 + r * (2.0 * k5 + r * (3.0 * k6));
 50 | 
 51 |     // Compute derivative of d over [x, y]
 52 |     const float d_r = (alpha_r * beta - alpha * beta_r) / (beta * beta); 
 53 |     const float d_x = 2.0 * x * d_r;
 54 |     const float d_y = 2.0 * y * d_r;
 55 | 
 56 |     // Compute derivative of fx over x and y.
 57 |     fx_x = d + d_x * x + 2.0 * p1 * y + 6.0 * p2 * x;
 58 |     fx_y = d_y * x + 2.0 * p1 * x + 2.0 * p2 * y;
 59 | 
 60 |     // Compute derivative of fy over x and y.
 61 |     fy_x = d_x * y + 2.0 * p2 * y + 2.0 * p1 * x;
 62 |     fy_y = d + d_y * y + 2.0 * p2 * x + 6.0 * p1 * y;
 63 | }
 64 | 
 65 | // https://github.com/JamesPerlman/TurboNeRF/blob/75f1228d41b914b0a768a876d2a851f3b3213a58/src/utils/camera-kernels.cuh
 66 | inline __device__ void radial_and_tangential_undistort(
 67 |     float xd, float yd,
 68 |     float k1, float k2, float k3, float k4, float k5, float k6,
 69 |     float p1, float p2,
 70 |     const float& eps,
 71 |     const int& max_iterations,
 72 |     float& x, float& y
 73 | ) {
 74 |     // Initial guess.
 75 |     x = xd;
 76 |     y = yd;
 77 | 
 78 |     // Newton's method.
 79 |     for (int i = 0; i < max_iterations; ++i) {
 80 |         float fx, fy, fx_x, fx_y, fy_x, fy_y;
 81 | 
 82 |         _compute_residual_and_jacobian(
 83 |             x, y,
 84 |             xd, yd,
 85 |             k1, k2, k3, k4, k5, k6,
 86 |             p1, p2,
 87 |             fx, fy,
 88 |             fx_x, fx_y, fy_x, fy_y
 89 |         );
 90 | 
 91 |         // Compute the Jacobian.
 92 |         const float det =  fx_y * fy_x - fx_x * fy_y;
 93 |         if (fabs(det) < eps) {
 94 |             break;
 95 |         }
 96 | 
 97 |         // Compute the update.
 98 |         const float dx = (fx * fy_y - fy * fx_y) / det;
 99 |         const float dy = (fy * fx_x - fx * fy_x) / det;
100 | 
101 |         // Update the solution.
102 |         x += dx;
103 |         y += dy;
104 | 
105 |         // Check for convergence.
106 |         if (fabs(dx) < eps && fabs(dy) < eps) {
107 |             break;
108 |         }
109 |     }
110 | }
111 | 
112 | // not good
113 | // https://github.com/opencv/opencv/blob/8d0fbc6a1e9f20c822921e8076551a01e58cd632/modules/calib3d/src/undistort.dispatch.cpp#L578
114 | inline __device__ bool iterative_opencv_lens_undistortion(
115 |     float u, float v,
116 |     float k1, float k2, float k3, float k4, float k5, float k6,
117 |     float p1, float p2, float s1, float s2, float s3, float s4,
118 |     int iters,
119 |     // outputs
120 |     float& x, float& y) 
121 | {
122 |     x = u;
123 |     y = v;
124 |     for(int i = 0; i < iters; i++)
125 |     {
126 |         float r2 = x*x + y*y;
127 |         float icdist = (1 + ((k6*r2 + k5)*r2 + k4)*r2) / (1 + ((k3*r2 + k2)*r2 + k1)*r2);
128 |         if (icdist < 0) return false;
129 |         float deltaX = 2*p1*x*y + p2*(r2 + 2*x*x) + s1*r2 + s2*r2*r2;
130 |         float deltaY = p1*(r2 + 2*y*y) + 2*p2*x*y + s3*r2 + s4*r2*r2;
131 |         x = (u - deltaX) * icdist;
132 |         y = (v - deltaY) * icdist;
133 |     }
134 |     return true;
135 | }
136 | 
137 | // https://github.com/opencv/opencv/blob/master/modules/calib3d/src/fisheye.cpp#L321
138 | inline __device__ bool iterative_opencv_lens_undistortion_fisheye(
139 |     float u, float v, 
140 |     float k1, float k2, float k3, float k4,
141 |     int criteria_iters,
142 |     float criteria_eps,
143 |     // outputs
144 |     float& u_out, float& v_out)
145 | {
146 |     // image point (u, v) to world point (x, y)
147 |     float theta_d = sqrt(u * u + v * v);
148 | 
149 |     // the current camera model is only valid up to 180 FOV
150 |     // for larger FOV the loop below does not converge
151 |     // clip values so we still get plausible results for super fisheye images > 180 grad
152 |     theta_d = min(max(-PI/2., theta_d), PI/2.);
153 | 
154 |     bool converged = false;
155 |     float theta = theta_d;
156 | 
157 |     float scale = 0.0;
158 | 
159 |     if (fabs(theta_d) > criteria_eps)
160 |     {
161 |         // compensate distortion iteratively using Newton method
162 |         for (int j = 0; j < criteria_iters; j++)
163 |         {
164 |             double theta2 = theta*theta, theta4 = theta2*theta2, theta6 = theta4*theta2, theta8 = theta6*theta2;
165 |             double k0_theta2 = k1 * theta2, k1_theta4 = k2 * theta4, k2_theta6 = k3 * theta6, k3_theta8 = k4 * theta8;
166 |             /* new_theta = theta - theta_fix, theta_fix = f0(theta) / f0'(theta) */
167 |             double theta_fix = (theta * (1 + k0_theta2 + k1_theta4 + k2_theta6 + k3_theta8) - theta_d) /
168 |                                 (1 + 3*k0_theta2 + 5*k1_theta4 + 7*k2_theta6 + 9*k3_theta8);
169 |             theta = theta - theta_fix;
170 | 
171 |             if (fabs(theta_fix) < criteria_eps)
172 |             {
173 |                 converged = true;
174 |                 break;
175 |             }
176 |         }
177 | 
178 |         scale = std::tan(theta) / theta_d;
179 |     }
180 |     else
181 |     {
182 |         converged = true;
183 |     }
184 | 
185 |     // theta is monotonously increasing or decreasing depending on the sign of theta
186 |     // if theta has flipped, it might converge due to symmetry but on the opposite of the camera center
187 |     // so we can check whether theta has changed the sign during the optimization
188 |     bool theta_flipped = ((theta_d < 0 && theta > 0) || (theta_d > 0 && theta < 0));
189 | 
190 |     if (converged && !theta_flipped)
191 |     {
192 |         u_out = u * scale;
193 |         v_out = v * scale;
194 |     }
195 | 
196 |     return converged;
197 | }
198 | 
199 | 
200 | } // namespace device
201 | } // namespace
202 | 


--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils_contraction.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2022 Ruilong Li, UC Berkeley.
  3 |  */
  4 | 
  5 | #pragma once
  6 | 
  7 | #include "utils_math.cuh"
  8 | 
  9 | namespace {
 10 | namespace device {
 11 | 
 12 | enum ContractionType
 13 | {
 14 |     AABB = 0,
 15 |     UN_BOUNDED_TANH = 1,
 16 |     UN_BOUNDED_SPHERE = 2,
 17 | };
 18 | 
 19 | inline __device__ __host__ float3 roi_to_unit(
 20 |     const float3 xyz, const float3 roi_min, const float3 roi_max)
 21 | {
 22 |     // roi -> [0, 1]^3
 23 |     return (xyz - roi_min) / (roi_max - roi_min);
 24 | }
 25 | 
 26 | inline __device__ __host__ float3 unit_to_roi(
 27 |     const float3 xyz, const float3 roi_min, const float3 roi_max)
 28 | {
 29 |     // [0, 1]^3 -> roi
 30 |     return xyz * (roi_max - roi_min) + roi_min;
 31 | }
 32 | 
 33 | inline __device__ __host__ float3 inf_to_unit_tanh(
 34 |     const float3 xyz, float3 roi_min, const float3 roi_max)
 35 | {
 36 |     /**
 37 |       [-inf, inf]^3 -> [0, 1]^3
 38 |       roi -> cube of [0.25, 0.75]^3
 39 |     **/
 40 |     float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3
 41 |     xyz_unit = xyz_unit - 0.5f;                           // roi -> [-0.5, 0.5]^3
 42 |     return make_float3(tanhf(xyz_unit.x), tanhf(xyz_unit.y), tanhf(xyz_unit.z)) * 0.5f + 0.5f;
 43 | }
 44 | 
 45 | inline __device__ __host__ float3 unit_to_inf_tanh(
 46 |     const float3 xyz, float3 roi_min, const float3 roi_max)
 47 | {
 48 |     /**
 49 |       [0, 1]^3 -> [-inf, inf]^3
 50 |       cube of [0.25, 0.75]^3 -> roi
 51 |     **/
 52 |     float3 xyz_unit = clamp(
 53 |         make_float3(
 54 |             atanhf(xyz.x * 2.0f - 1.0f),
 55 |             atanhf(xyz.y * 2.0f - 1.0f),
 56 |             atanhf(xyz.z * 2.0f - 1.0f)),
 57 |         -1e10f,
 58 |         1e10f);
 59 |     xyz_unit = xyz_unit + 0.5f;
 60 |     xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max);
 61 |     return xyz_unit;
 62 | }
 63 | 
 64 | inline __device__ __host__ float3 inf_to_unit_sphere(
 65 |     const float3 xyz, const float3 roi_min, const float3 roi_max)
 66 | {
 67 |     /** From MipNeRF360
 68 |         [-inf, inf]^3 -> sphere of [0, 1]^3;
 69 |         roi -> sphere of [0.25, 0.75]^3
 70 |     **/
 71 |     float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3
 72 |     xyz_unit = xyz_unit * 2.0f - 1.0f;                    // roi -> [-1, 1]^3
 73 | 
 74 |     float norm_sq = dot(xyz_unit, xyz_unit);
 75 |     float norm = sqrt(norm_sq);
 76 |     if (norm > 1.0f)
 77 |     {
 78 |         xyz_unit = (2.0f - 1.0f / norm) * (xyz_unit / norm);
 79 |     }
 80 |     xyz_unit = xyz_unit * 0.25f + 0.5f; // [-1, 1]^3 -> [0.25, 0.75]^3
 81 |     return xyz_unit;
 82 | }
 83 | 
 84 | inline __device__ __host__ float3 unit_sphere_to_inf(
 85 |     const float3 xyz, const float3 roi_min, const float3 roi_max)
 86 | {
 87 |     /** From MipNeRF360
 88 |         sphere of [0, 1]^3 -> [-inf, inf]^3;
 89 |         sphere of [0.25, 0.75]^3 -> roi
 90 |     **/
 91 |     float3 xyz_unit = (xyz - 0.5f) * 4.0f; // [0.25, 0.75]^3 -> [-1, 1]^3
 92 | 
 93 |     float norm_sq = dot(xyz_unit, xyz_unit);
 94 |     float norm = sqrt(norm_sq);
 95 |     if (norm > 1.0f)
 96 |     {
 97 |         xyz_unit = xyz_unit / fmaxf((2.0f * norm - 1.0f * norm_sq), 1e-10f);
 98 |     }
 99 |     xyz_unit = xyz_unit * 0.5f + 0.5f;                  // [-1, 1]^3 -> [0, 1]^3
100 |     xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max); // [0, 1]^3 -> roi
101 |     return xyz_unit;
102 | }
103 | 
104 | inline __device__ __host__ float3 apply_contraction(
105 |     const float3 xyz, const float3 roi_min, const float3 roi_max,
106 |     const ContractionType type)
107 | {
108 |     switch (type)
109 |     {
110 |     case AABB:
111 |         return roi_to_unit(xyz, roi_min, roi_max);
112 |     case UN_BOUNDED_TANH:
113 |         return inf_to_unit_tanh(xyz, roi_min, roi_max);
114 |     case UN_BOUNDED_SPHERE:
115 |         return inf_to_unit_sphere(xyz, roi_min, roi_max);
116 |     }
117 | }
118 | 
119 | inline __device__ __host__ float3 apply_contraction_inv(
120 |     const float3 xyz, const float3 roi_min, const float3 roi_max,
121 |     const ContractionType type)
122 | {
123 |     switch (type)
124 |     {
125 |     case AABB:
126 |         return unit_to_roi(xyz, roi_min, roi_max);
127 |     case UN_BOUNDED_TANH:
128 |         return unit_to_inf_tanh(xyz, roi_min, roi_max);
129 |     case UN_BOUNDED_SPHERE:
130 |         return unit_sphere_to_inf(xyz, roi_min, roi_max);
131 |     }
132 | }
133 | 
134 | } // namespace device
135 | } // namespace


--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils_cuda.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2022 Ruilong Li, UC Berkeley.
 3 |  */
 4 | 
 5 | #pragma once
 6 | 
 7 | #include <torch/extension.h>
 8 | #include <c10/cuda/CUDAGuard.h>
 9 | #include <ATen/cuda/Exceptions.h>
10 | 
11 | 
12 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
13 | #define CHECK_CONTIGUOUS(x) \
14 |     TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
15 | #define CHECK_INPUT(x) \
16 |     CHECK_CUDA(x);     \
17 |     CHECK_CONTIGUOUS(x)
18 | #define CUDA_GET_THREAD_ID(tid, Q)                         \
19 |     const int tid = blockIdx.x * blockDim.x + threadIdx.x; \
20 |     if (tid >= Q)                                          \
21 |     return
22 | #define CUDA_N_BLOCKS_NEEDED(Q, CUDA_N_THREADS) ((Q - 1) / CUDA_N_THREADS + 1)
23 | #define DEVICE_GUARD(_ten) \
24 |     const at::cuda::OptionalCUDAGuard device_guard(device_of(_ten));
25 | 
26 | template <typename scalar_t>
27 | inline __device__ __host__ scalar_t ceil_div(scalar_t a, scalar_t b)
28 | {
29 |   return (a + b - 1) / b;
30 | }


--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils_grid.cuh:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "data_spec_packed.cuh"
  4 | #include "utils_contraction.cuh"
  5 | #include "utils_math.cuh"
  6 | 
  7 | namespace {
  8 | namespace device {
  9 | 
 10 | inline __device__ bool ray_aabb_intersect(
 11 |     SingleRaySpec ray, AABBSpec aabb,
 12 |     // outputs
 13 |     float& tmin, float& tmax) 
 14 | {    
 15 |     float tmin_temp{};
 16 |     float tmax_temp{};
 17 | 
 18 |     if (ray.inv_dir.x >= 0) {
 19 |         tmin = (aabb.min.x - ray.origin.x) * ray.inv_dir.x;
 20 |         tmax = (aabb.max.x - ray.origin.x) * ray.inv_dir.x;
 21 |     } else {
 22 |         tmin = (aabb.max.x - ray.origin.x) * ray.inv_dir.x;
 23 |         tmax = (aabb.min.x - ray.origin.x) * ray.inv_dir.x;
 24 |     }
 25 | 
 26 |     if (ray.inv_dir.y >= 0) {
 27 |         tmin_temp = (aabb.min.y - ray.origin.y) * ray.inv_dir.y;
 28 |         tmax_temp = (aabb.max.y - ray.origin.y) * ray.inv_dir.y;
 29 |     } else {
 30 |         tmin_temp = (aabb.max.y - ray.origin.y) * ray.inv_dir.y;
 31 |         tmax_temp = (aabb.min.y - ray.origin.y) * ray.inv_dir.y;
 32 |     }
 33 | 
 34 |     if (tmin > tmax_temp || tmin_temp > tmax) return false;
 35 |     if (tmin_temp > tmin) tmin = tmin_temp;
 36 |     if (tmax_temp < tmax) tmax = tmax_temp;
 37 | 
 38 |     if (ray.inv_dir.z >= 0) {
 39 |         tmin_temp = (aabb.min.z - ray.origin.z) * ray.inv_dir.z;
 40 |         tmax_temp = (aabb.max.z - ray.origin.z) * ray.inv_dir.z;
 41 |     } else {
 42 |         tmin_temp = (aabb.max.z - ray.origin.z) * ray.inv_dir.z;
 43 |         tmax_temp = (aabb.min.z - ray.origin.z) * ray.inv_dir.z;
 44 |     }
 45 | 
 46 |     if (tmin > tmax_temp || tmin_temp > tmax) return false;
 47 |     if (tmin_temp > tmin) tmin = tmin_temp;
 48 |     if (tmax_temp < tmax) tmax = tmax_temp;
 49 | 
 50 |     if (tmax <= 0) return false;
 51 | 
 52 |     tmin = fmaxf(tmin, ray.tmin);
 53 |     tmax = fminf(tmax, ray.tmax);
 54 |     return true;
 55 | }
 56 | 
 57 | 
 58 | inline __device__ void setup_traversal(
 59 |     SingleRaySpec ray, float tmin, float tmax, float eps, 
 60 |     AABBSpec aabb, int3 resolution,
 61 |     // outputs
 62 |     float3 &delta, float3 &tdist, 
 63 |     int3 &step_index, int3 &current_index, int3 &final_index) 
 64 | {     
 65 |     const float3 res = make_float3(resolution);
 66 |     const float3 voxel_size = (aabb.max - aabb.min) / res;
 67 |     const float3 ray_start = ray.origin + ray.dir * (tmin + eps);
 68 |     const float3 ray_end = ray.origin + ray.dir * (tmax - eps);
 69 | 
 70 |     // get voxel index of start and end within grid
 71 |     // TODO: check float error here!
 72 |     current_index = make_int3(
 73 |         apply_contraction(ray_start, aabb.min, aabb.max, ContractionType::AABB)
 74 |         * res
 75 |     );
 76 |     current_index = clamp(current_index, make_int3(0, 0, 0), resolution - 1);
 77 | 
 78 |     final_index = make_int3(
 79 |         apply_contraction(ray_end, aabb.min, aabb.max, ContractionType::AABB)
 80 |         * res
 81 |     );
 82 |     final_index = clamp(final_index, make_int3(0, 0, 0), resolution - 1);
 83 |     
 84 |     // 
 85 |     const int3 index_delta = make_int3(
 86 |         ray.dir.x > 0 ? 1 : 0, ray.dir.y > 0 ? 1 : 0, ray.dir.z > 0 ? 1 : 0
 87 |     );
 88 |     const int3 start_index = current_index + index_delta;
 89 |     const float3 tmax_xyz = ((aabb.min + 
 90 |         ((make_float3(start_index) * voxel_size) - ray_start)) * ray.inv_dir) + tmin;
 91 |             
 92 |     tdist = make_float3(
 93 |         (ray.dir.x == 0.0f) ? tmax : tmax_xyz.x,
 94 |         (ray.dir.y == 0.0f) ? tmax : tmax_xyz.y,
 95 |         (ray.dir.z == 0.0f) ? tmax : tmax_xyz.z
 96 |     );
 97 |     // printf("tdist: %f %f %f\n", tdist.x, tdist.y, tdist.z);
 98 | 
 99 |     const float3 step_float = make_float3(
100 |         (ray.dir.x == 0.0f) ? 0.0f : (ray.dir.x > 0.0f ? 1.0f : -1.0f),
101 |         (ray.dir.y == 0.0f) ? 0.0f : (ray.dir.y > 0.0f ? 1.0f : -1.0f),
102 |         (ray.dir.z == 0.0f) ? 0.0f : (ray.dir.z > 0.0f ? 1.0f : -1.0f)
103 |     );
104 |     step_index = make_int3(step_float);
105 |     // printf("step_index: %d %d %d\n", step_index.x, step_index.y, step_index.z);
106 | 
107 |     const float3 delta_temp = voxel_size * ray.inv_dir * step_float;
108 |     delta = make_float3(
109 |         (ray.dir.x == 0.0f) ? tmax : delta_temp.x,
110 |         (ray.dir.y == 0.0f) ? tmax : delta_temp.y,
111 |         (ray.dir.z == 0.0f) ? tmax : delta_temp.z
112 |     );
113 |     // printf("delta: %f %f %f\n", delta.x, delta.y, delta.z);
114 | }
115 | 
116 | inline __device__ bool single_traversal(
117 |     float3& tdist, int3& current_index,
118 |     const int3 overflow_index, const int3 step_index, const float3 delta) {
119 |     if ((tdist.x < tdist.y) && (tdist.x < tdist.z)) {
120 |         // X-axis traversal.
121 |         current_index.x += step_index.x;
122 |         tdist.x += delta.x;
123 |         if (current_index.x == overflow_index.x) {
124 |             return false;
125 |         }
126 |     } else if (tdist.y < tdist.z) {
127 |         // Y-axis traversal.
128 |         current_index.y += step_index.y;
129 |         tdist.y += delta.y;
130 |         if (current_index.y == overflow_index.y) {
131 |             return false;
132 |         }
133 |     } else {
134 |         // Z-axis traversal.
135 |         current_index.z += step_index.z;
136 |         tdist.z += delta.z;
137 |         if (current_index.z == overflow_index.z) {
138 |             return false;
139 |         }
140 |     }
141 |     return true;
142 | }
143 | 
144 | 
145 | } // namespace device
146 | } // namespace


--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/nerfacc.cpp:
--------------------------------------------------------------------------------
  1 | // This file contains only Python bindings
  2 | #include "include/data_spec.hpp"
  3 | 
  4 | #include <torch/extension.h>
  5 | 
  6 | 
  7 | // scan
  8 | torch::Tensor inclusive_sum(
  9 |     torch::Tensor chunk_starts,
 10 |     torch::Tensor chunk_cnts,
 11 |     torch::Tensor inputs,
 12 |     bool normalize,
 13 |     bool backward);
 14 | torch::Tensor exclusive_sum(
 15 |     torch::Tensor chunk_starts,
 16 |     torch::Tensor chunk_cnts,
 17 |     torch::Tensor inputs,
 18 |     bool normalize,
 19 |     bool backward);
 20 | torch::Tensor inclusive_prod_forward(
 21 |     torch::Tensor chunk_starts,
 22 |     torch::Tensor chunk_cnts,
 23 |     torch::Tensor inputs);
 24 | torch::Tensor inclusive_prod_backward(
 25 |     torch::Tensor chunk_starts,
 26 |     torch::Tensor chunk_cnts,
 27 |     torch::Tensor inputs,
 28 |     torch::Tensor outputs,
 29 |     torch::Tensor grad_outputs);
 30 | torch::Tensor exclusive_prod_forward(
 31 |     torch::Tensor chunk_starts,
 32 |     torch::Tensor chunk_cnts,
 33 |     torch::Tensor inputs);
 34 | torch::Tensor exclusive_prod_backward(
 35 |     torch::Tensor chunk_starts,
 36 |     torch::Tensor chunk_cnts,
 37 |     torch::Tensor inputs,
 38 |     torch::Tensor outputs,
 39 |     torch::Tensor grad_outputs);
 40 | 
 41 | bool is_cub_available();
 42 | torch::Tensor inclusive_sum_cub(
 43 |     torch::Tensor ray_indices,
 44 |     torch::Tensor inputs,
 45 |     bool backward);
 46 | torch::Tensor exclusive_sum_cub(
 47 |     torch::Tensor indices,
 48 |     torch::Tensor inputs,
 49 |     bool backward);
 50 | torch::Tensor inclusive_prod_cub_forward(
 51 |     torch::Tensor indices,
 52 |     torch::Tensor inputs);
 53 | torch::Tensor inclusive_prod_cub_backward(
 54 |     torch::Tensor indices,
 55 |     torch::Tensor inputs,
 56 |     torch::Tensor outputs,
 57 |     torch::Tensor grad_outputs);
 58 | torch::Tensor exclusive_prod_cub_forward(
 59 |     torch::Tensor indices,
 60 |     torch::Tensor inputs);
 61 | torch::Tensor exclusive_prod_cub_backward(
 62 |     torch::Tensor indices,
 63 |     torch::Tensor inputs,
 64 |     torch::Tensor outputs,
 65 |     torch::Tensor grad_outputs);
 66 | 
 67 | // grid
 68 | std::vector<torch::Tensor> ray_aabb_intersect(
 69 |     const torch::Tensor rays_o, // [n_rays, 3]
 70 |     const torch::Tensor rays_d, // [n_rays, 3]
 71 |     const torch::Tensor aabbs,  // [n_aabbs, 6]
 72 |     const float near_plane,
 73 |     const float far_plane, 
 74 |     const float miss_value);
 75 | std::tuple<RaySegmentsSpec, RaySegmentsSpec, torch::Tensor> traverse_grids(
 76 |     // rays
 77 |     const torch::Tensor rays_o, // [n_rays, 3]
 78 |     const torch::Tensor rays_d, // [n_rays, 3]
 79 |     const torch::Tensor rays_mask,   // [n_rays]
 80 |     // grids
 81 |     const torch::Tensor binaries,  // [n_grids, resx, resy, resz]
 82 |     const torch::Tensor aabbs,     // [n_grids, 6]
 83 |     // intersections
 84 |     const torch::Tensor t_sorted,  // [n_rays, n_grids * 2]
 85 |     const torch::Tensor t_indices,  // [n_rays, n_grids * 2]
 86 |     const torch::Tensor hits,    // [n_rays, n_grids]
 87 |     // options
 88 |     const torch::Tensor near_planes,
 89 |     const torch::Tensor far_planes,
 90 |     const float step_size,
 91 |     const float cone_angle,
 92 |     const bool compute_intervals,
 93 |     const bool compute_samples,
 94 |     const bool compute_terminate_planes,
 95 |     const int32_t traverse_steps_limit, // <= 0 means no limit
 96 |     const bool over_allocate); // over allocate the memory for intervals and samples
 97 | 
 98 | // pdf
 99 | std::vector<RaySegmentsSpec> importance_sampling(
100 |     RaySegmentsSpec ray_segments,
101 |     torch::Tensor cdfs,                 
102 |     torch::Tensor n_intervels_per_ray,  
103 |     bool stratified);
104 | std::vector<RaySegmentsSpec> importance_sampling(
105 |     RaySegmentsSpec ray_segments,
106 |     torch::Tensor cdfs,                  
107 |     int64_t n_intervels_per_ray,
108 |     bool stratified);
109 | std::vector<torch::Tensor> searchsorted(
110 |     RaySegmentsSpec query,
111 |     RaySegmentsSpec key);
112 | 
113 | // cameras
114 | torch::Tensor opencv_lens_undistortion(
115 |     const torch::Tensor& uv,      // [..., 2]
116 |     const torch::Tensor& params,  // [..., 6]
117 |     const float eps,
118 |     const int max_iterations);
119 | torch::Tensor opencv_lens_undistortion_fisheye(
120 |     const torch::Tensor& uv,      // [..., 2]
121 |     const torch::Tensor& params,  // [..., 4]
122 |     const float criteria_eps,
123 |     const int criteria_iters);
124 | 
125 | 
126 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
127 | #define _REG_FUNC(funname) m.def(#funname, &funname)
128 |     _REG_FUNC(inclusive_sum);
129 |     _REG_FUNC(exclusive_sum);
130 |     _REG_FUNC(inclusive_prod_forward);
131 |     _REG_FUNC(inclusive_prod_backward);
132 |     _REG_FUNC(exclusive_prod_forward);
133 |     _REG_FUNC(exclusive_prod_backward);
134 | 
135 |     _REG_FUNC(is_cub_available);
136 |     _REG_FUNC(inclusive_sum_cub);
137 |     _REG_FUNC(exclusive_sum_cub);
138 |     _REG_FUNC(inclusive_prod_cub_forward);
139 |     _REG_FUNC(inclusive_prod_cub_backward);
140 |     _REG_FUNC(exclusive_prod_cub_forward);
141 |     _REG_FUNC(exclusive_prod_cub_backward);
142 | 
143 |     _REG_FUNC(ray_aabb_intersect);
144 |     _REG_FUNC(traverse_grids);
145 |     _REG_FUNC(searchsorted);
146 | 
147 |     _REG_FUNC(opencv_lens_undistortion);
148 |     _REG_FUNC(opencv_lens_undistortion_fisheye);  // TODO: check this function.
149 | #undef _REG_FUNC
150 | 
151 |     m.def("importance_sampling", py::overload_cast<RaySegmentsSpec, torch::Tensor, torch::Tensor, bool>(&importance_sampling));
152 |     m.def("importance_sampling", py::overload_cast<RaySegmentsSpec, torch::Tensor, int64_t, bool>(&importance_sampling));
153 | 
154 |     py::class_<RaySegmentsSpec>(m, "RaySegmentsSpec")
155 |         .def(py::init<>())
156 |         .def_readwrite("vals", &RaySegmentsSpec::vals)
157 |         .def_readwrite("is_left", &RaySegmentsSpec::is_left)
158 |         .def_readwrite("is_right", &RaySegmentsSpec::is_right)
159 |         .def_readwrite("is_valid", &RaySegmentsSpec::is_valid)
160 |         .def_readwrite("chunk_starts", &RaySegmentsSpec::chunk_starts)
161 |         .def_readwrite("chunk_cnts", &RaySegmentsSpec::chunk_cnts)
162 |         .def_readwrite("ray_indices", &RaySegmentsSpec::ray_indices);
163 | }


--------------------------------------------------------------------------------
/nerfacc/data_specs.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
  3 | """
  4 | from dataclasses import dataclass
  5 | from typing import Optional
  6 | 
  7 | import torch
  8 | 
  9 | from . import cuda as _C
 10 | 
 11 | 
 12 | @dataclass
 13 | class RaySamples:
 14 |     """Ray samples that supports batched and flattened data.
 15 | 
 16 |     Note:
 17 |         When `vals` is flattened, either `packed_info` or `ray_indices` must
 18 |         be provided.
 19 | 
 20 |     Args:
 21 |         vals: Batched data with shape (n_rays, n_samples) or flattened data
 22 |             with shape (all_samples,)
 23 |         packed_info: Optional. A tensor of shape (n_rays, 2) that specifies
 24 |             the start and count of each chunk in flattened `vals`, with in
 25 |             total n_rays chunks. Only needed when `vals` is flattened.
 26 |         ray_indices: Optional. A tensor of shape (all_samples,) that specifies
 27 |             the ray index of each sample. Only needed when `vals` is flattened.
 28 | 
 29 |     Examples:
 30 | 
 31 |     .. code-block:: python
 32 | 
 33 |         >>> # Batched data
 34 |         >>> ray_samples = RaySamples(torch.rand(10, 100))
 35 |         >>> # Flattened data
 36 |         >>> ray_samples = RaySamples(
 37 |         >>>     torch.rand(1000),
 38 |         >>>     packed_info=torch.tensor([[0, 100], [100, 200], [300, 700]]),
 39 |         >>> )
 40 | 
 41 |     """
 42 | 
 43 |     vals: torch.Tensor
 44 |     packed_info: Optional[torch.Tensor] = None
 45 |     ray_indices: Optional[torch.Tensor] = None
 46 |     is_valid: Optional[torch.Tensor] = None
 47 | 
 48 |     def _to_cpp(self):
 49 |         """
 50 |         Generate object to pass to C++
 51 |         """
 52 | 
 53 |         spec = _C.RaySegmentsSpec()
 54 |         spec.vals = self.vals.contiguous()
 55 |         if self.packed_info is not None:
 56 |             spec.chunk_starts = self.packed_info[:, 0].contiguous()
 57 |         if self.chunk_cnts is not None:
 58 |             spec.chunk_cnts = self.packed_info[:, 1].contiguous()
 59 |         if self.ray_indices is not None:
 60 |             spec.ray_indices = self.ray_indices.contiguous()
 61 |         return spec
 62 | 
 63 |     @classmethod
 64 |     def _from_cpp(cls, spec):
 65 |         """
 66 |         Generate object from C++
 67 |         """
 68 |         if spec.chunk_starts is not None and spec.chunk_cnts is not None:
 69 |             packed_info = torch.stack([spec.chunk_starts, spec.chunk_cnts], -1)
 70 |         else:
 71 |             packed_info = None
 72 |         ray_indices = spec.ray_indices
 73 |         if spec.is_valid is not None:
 74 |             is_valid = spec.is_valid
 75 |         else:
 76 |             is_valid = None
 77 |         vals = spec.vals
 78 |         return cls(
 79 |             vals=vals,
 80 |             packed_info=packed_info,
 81 |             ray_indices=ray_indices,
 82 |             is_valid=is_valid,
 83 |         )
 84 | 
 85 |     @property
 86 |     def device(self) -> torch.device:
 87 |         return self.vals.device
 88 | 
 89 | 
 90 | @dataclass
 91 | class RayIntervals:
 92 |     """Ray intervals that supports batched and flattened data.
 93 | 
 94 |     Each interval is defined by two edges (left and right). The attribute `vals`
 95 |     stores the edges of all intervals along the rays. The attributes `is_left`
 96 |     and `is_right` are for indicating whether each edge is a left or right edge.
 97 |     This class unifies the representation of both continuous and non-continuous ray
 98 |     intervals.
 99 | 
100 |     Note:
101 |         When `vals` is flattened, either `packed_info` or `ray_indices` must
102 |         be provided. Also both `is_left` and `is_right` must be provided.
103 | 
104 |     Args:
105 |         vals: Batched data with shape (n_rays, n_edges) or flattened data
106 |             with shape (all_edges,)
107 |         packed_info: Optional. A tensor of shape (n_rays, 2) that specifies
108 |             the start and count of each chunk in flattened `vals`, with in
109 |             total n_rays chunks. Only needed when `vals` is flattened.
110 |         ray_indices: Optional. A tensor of shape (all_edges,) that specifies
111 |             the ray index of each edge. Only needed when `vals` is flattened.
112 |         is_left: Optional. A boolen tensor of shape (all_edges,) that specifies
113 |             whether each edge is a left edge. Only needed when `vals` is flattened.
114 |         is_right: Optional. A boolen tensor of shape (all_edges,) that specifies
115 |             whether each edge is a right edge. Only needed when `vals` is flattened.
116 | 
117 |     Examples:
118 | 
119 |     .. code-block:: python
120 | 
121 |         >>> # Batched data
122 |         >>> ray_intervals = RayIntervals(torch.rand(10, 100))
123 |         >>> # Flattened data
124 |         >>> ray_intervals = RayIntervals(
125 |         >>>     torch.rand(6),
126 |         >>>     packed_info=torch.tensor([[0, 2], [2, 0], [2, 4]]),
127 |         >>>     is_left=torch.tensor([True, False, True, True, True, False]),
128 |         >>>     is_right=torch.tensor([False, True, False, True, True, True]),
129 |         >>> )
130 | 
131 |     """
132 | 
133 |     vals: torch.Tensor
134 |     packed_info: Optional[torch.Tensor] = None
135 |     ray_indices: Optional[torch.Tensor] = None
136 |     is_left: Optional[torch.Tensor] = None
137 |     is_right: Optional[torch.Tensor] = None
138 | 
139 |     def _to_cpp(self):
140 |         """
141 |         Generate object to pass to C++
142 |         """
143 | 
144 |         spec = _C.RaySegmentsSpec()
145 |         spec.vals = self.vals.contiguous()
146 |         if self.packed_info is not None:
147 |             spec.chunk_starts = self.packed_info[:, 0].contiguous()
148 |         if self.packed_info is not None:
149 |             spec.chunk_cnts = self.packed_info[:, 1].contiguous()
150 |         if self.ray_indices is not None:
151 |             spec.ray_indices = self.ray_indices.contiguous()
152 |         if self.is_left is not None:
153 |             spec.is_left = self.is_left.contiguous()
154 |         if self.is_right is not None:
155 |             spec.is_right = self.is_right.contiguous()
156 |         return spec
157 | 
158 |     @classmethod
159 |     def _from_cpp(cls, spec):
160 |         """
161 |         Generate object from C++
162 |         """
163 |         if spec.chunk_starts is not None and spec.chunk_cnts is not None:
164 |             packed_info = torch.stack([spec.chunk_starts, spec.chunk_cnts], -1)
165 |         else:
166 |             packed_info = None
167 |         ray_indices = spec.ray_indices
168 |         is_left = spec.is_left
169 |         is_right = spec.is_right
170 |         return cls(
171 |             vals=spec.vals,
172 |             packed_info=packed_info,
173 |             ray_indices=ray_indices,
174 |             is_left=is_left,
175 |             is_right=is_right,
176 |         )
177 | 
178 |     @property
179 |     def device(self) -> torch.device:
180 |         return self.vals.device
181 | 


--------------------------------------------------------------------------------
/nerfacc/estimators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/nerfacc/estimators/__init__.py


--------------------------------------------------------------------------------
/nerfacc/estimators/base.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class AbstractEstimator(nn.Module):
 8 |     """An abstract Transmittance Estimator class for Sampling."""
 9 | 
10 |     def __init__(self) -> None:
11 |         super().__init__()
12 |         self.register_buffer("_dummy", torch.empty(0), persistent=False)
13 | 
14 |     @property
15 |     def device(self) -> torch.device:
16 |         return self._dummy.device
17 | 
18 |     def sampling(self, *args, **kwargs) -> Any:
19 |         raise NotImplementedError
20 | 
21 |     def update_every_n_steps(self, *args, **kwargs) -> None:
22 |         raise NotImplementedError
23 | 


--------------------------------------------------------------------------------
/nerfacc/losses.py:
--------------------------------------------------------------------------------
 1 | from torch import Tensor
 2 | 
 3 | from .scan import exclusive_sum
 4 | from .volrend import accumulate_along_rays
 5 | 
 6 | 
 7 | def distortion(
 8 |     weights: Tensor,
 9 |     t_starts: Tensor,
10 |     t_ends: Tensor,
11 |     ray_indices: Tensor,
12 |     n_rays: int,
13 | ) -> Tensor:
14 |     """Distortion Regularization proposed in Mip-NeRF 360.
15 | 
16 |     Args:
17 |         weights: The flattened weights of the samples. Shape (n_samples,)
18 |         t_starts: The start points of the samples. Shape (n_samples,)
19 |         t_ends: The end points of the samples. Shape (n_samples,)
20 |         ray_indices: The ray indices of the samples. LongTensor with shape (n_samples,)
21 |         n_rays: The total number of rays.
22 | 
23 |     Returns:
24 |         The per-ray distortion loss with the shape (n_rays, 1).
25 |     """
26 |     assert (
27 |         weights.shape == t_starts.shape == t_ends.shape == ray_indices.shape
28 |     ), (
29 |         f"the shape of the inputs are not the same: "
30 |         f"weights {weights.shape}, t_starts {t_starts.shape}, "
31 |         f"t_ends {t_ends.shape}, ray_indices {ray_indices.shape}"
32 |     )
33 |     t_mids = 0.5 * (t_starts + t_ends)
34 |     t_deltas = t_ends - t_starts
35 |     loss_uni = (1 / 3) * (t_deltas * weights.pow(2))
36 |     loss_bi_0 = weights * t_mids * exclusive_sum(weights, indices=ray_indices)
37 |     loss_bi_1 = weights * exclusive_sum(weights * t_mids, indices=ray_indices)
38 |     loss_bi = 2 * (loss_bi_0 - loss_bi_1)
39 |     loss = loss_uni + loss_bi
40 |     loss = accumulate_along_rays(loss, None, ray_indices, n_rays)
41 |     return loss
42 | 


--------------------------------------------------------------------------------
/nerfacc/pack.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
 3 | """
 4 | from typing import Optional
 5 | 
 6 | import torch
 7 | from torch import Tensor
 8 | 
 9 | 
10 | @torch.no_grad()
11 | def pack_info(ray_indices: Tensor, n_rays: Optional[int] = None) -> Tensor:
12 |     """Pack `ray_indices` to `packed_info`. Useful for converting per sample data to per ray data.
13 | 
14 |     Note:
15 |         this function is not differentiable to any inputs.
16 | 
17 |     Args:
18 |         ray_indices: Ray indices of the samples. LongTensor with shape (n_sample).
19 |         n_rays: Number of rays. If None, it is inferred from `ray_indices`. Default is None.
20 | 
21 |     Returns:
22 |         A LongTensor of shape (n_rays, 2) that specifies the start and count
23 |         of each chunk in the flattened input tensor, with in total n_rays chunks.
24 | 
25 |     Example:
26 | 
27 |     .. code-block:: python
28 | 
29 |         >>> ray_indices = torch.tensor([0, 0, 1, 1, 1, 2, 2, 2, 2], device="cuda")
30 |         >>> packed_info = pack_info(ray_indices, n_rays=3)
31 |         >>> packed_info
32 |         tensor([[0, 2], [2, 3], [5, 4]], device='cuda:0')
33 | 
34 |     """
35 |     assert (
36 |         ray_indices.dim() == 1
37 |     ), "ray_indices must be a 1D tensor with shape (n_samples)."
38 |     if ray_indices.is_cuda:
39 |         device = ray_indices.device
40 |         dtype = ray_indices.dtype
41 |         if n_rays is None:
42 |             n_rays = ray_indices.max().item() + 1
43 |         chunk_cnts = torch.zeros((n_rays,), device=device, dtype=dtype)
44 |         chunk_cnts.index_add_(0, ray_indices, torch.ones_like(ray_indices))
45 |         chunk_starts = chunk_cnts.cumsum(dim=0, dtype=dtype) - chunk_cnts
46 |         packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
47 |     else:
48 |         raise NotImplementedError("Only support cuda inputs.")
49 |     return packed_info
50 | 


--------------------------------------------------------------------------------
/nerfacc/pdf.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
  3 | """
  4 | from typing import Tuple, Union
  5 | 
  6 | import torch
  7 | from torch import Tensor
  8 | 
  9 | from . import cuda as _C
 10 | from .data_specs import RayIntervals, RaySamples
 11 | 
 12 | 
 13 | def searchsorted(
 14 |     sorted_sequence: Union[RayIntervals, RaySamples],
 15 |     values: Union[RayIntervals, RaySamples],
 16 | ) -> Tuple[Tensor, Tensor]:
 17 |     """Searchsorted that supports flattened tensor.
 18 | 
 19 |     This function returns {`ids_left`, `ids_right`} such that:
 20 | 
 21 |     `sorted_sequence.vals.gather(-1, ids_left) <= values.vals < sorted_sequence.vals.gather(-1, ids_right)`
 22 | 
 23 |     Note:
 24 |         When values is out of range of sorted_sequence, we return the
 25 |         corresponding ids as if the values is clipped to the range of
 26 |         sorted_sequence. See the example below.
 27 | 
 28 |     Args:
 29 |         sorted_sequence: A :class:`RayIntervals` or :class:`RaySamples` object. We assume
 30 |             the `sorted_sequence.vals` is acendingly sorted for each ray.
 31 |         values: A :class:`RayIntervals` or :class:`RaySamples` object.
 32 | 
 33 |     Returns:
 34 |         A tuple of LongTensor:
 35 | 
 36 |         - **ids_left**: A LongTensor with the same shape as `values.vals`.
 37 |         - **ids_right**: A LongTensor with the same shape as `values.vals`.
 38 | 
 39 |     Example:
 40 |         >>> sorted_sequence = RayIntervals(
 41 |         ...     vals=torch.tensor([0.0, 1.0, 0.0, 1.0, 2.0], device="cuda"),
 42 |         ...     packed_info=torch.tensor([[0, 2], [2, 3]], device="cuda"),
 43 |         ... )
 44 |         >>> values = RayIntervals(
 45 |         ...     vals=torch.tensor([0.5, 1.5, 2.5], device="cuda"),
 46 |         ...     packed_info=torch.tensor([[0, 1], [1, 2]], device="cuda"),
 47 |         ... )
 48 |         >>> ids_left, ids_right = searchsorted(sorted_sequence, values)
 49 |         >>> ids_left
 50 |         tensor([0, 3, 3], device='cuda:0')
 51 |         >>> ids_right
 52 |         tensor([1, 4, 4], device='cuda:0')
 53 |         >>> sorted_sequence.vals.gather(-1, ids_left)
 54 |         tensor([0., 1., 1.], device='cuda:0')
 55 |         >>> sorted_sequence.vals.gather(-1, ids_right)
 56 |         tensor([1., 2., 2.], device='cuda:0')
 57 |     """
 58 | 
 59 |     ids_left, ids_right = _C.searchsorted(
 60 |         values._to_cpp(), sorted_sequence._to_cpp()
 61 |     )
 62 |     return ids_left, ids_right
 63 | 
 64 | 
 65 | def importance_sampling(
 66 |     intervals: RayIntervals,
 67 |     cdfs: Tensor,
 68 |     n_intervals_per_ray: Union[Tensor, int],
 69 |     stratified: bool = False,
 70 | ) -> Tuple[RayIntervals, RaySamples]:
 71 |     """Importance sampling that supports flattened tensor.
 72 | 
 73 |     Given a set of intervals and the corresponding CDFs at the interval edges,
 74 |     this function performs inverse transform sampling to create a new set of
 75 |     intervals and samples. Stratified sampling is also supported.
 76 | 
 77 |     Args:
 78 |         intervals: A :class:`RayIntervals` object that specifies the edges of the
 79 |             intervals along the rays.
 80 |         cdfs: The CDFs at the interval edges. It has the same shape as
 81 |             `intervals.vals`.
 82 |         n_intervals_per_ray: Resample each ray to have this many intervals.
 83 |             If it is a tensor, it must be of shape (n_rays,). If it is an int,
 84 |             it is broadcasted to all rays.
 85 |         stratified: If True, perform stratified sampling.
 86 | 
 87 |     Returns:
 88 |         A tuple of {:class:`RayIntervals`, :class:`RaySamples`}:
 89 | 
 90 |         - **intervals**: A :class:`RayIntervals` object. If `n_intervals_per_ray` is an int, \
 91 |             `intervals.vals` will has the shape of (n_rays, n_intervals_per_ray + 1). \
 92 |             If `n_intervals_per_ray` is a tensor, we assume each ray results \
 93 |             in a different number of intervals. In this case, `intervals.vals` \
 94 |             will has the shape of (all_edges,), the attributes `packed_info`, \
 95 |             `ray_indices`, `is_left` and `is_right` will be accessable.
 96 | 
 97 |         - **samples**: A :class:`RaySamples` object. If `n_intervals_per_ray` is an int, \
 98 |             `samples.vals` will has the shape of (n_rays, n_intervals_per_ray). \
 99 |             If `n_intervals_per_ray` is a tensor, we assume each ray results \
100 |             in a different number of intervals. In this case, `samples.vals` \
101 |             will has the shape of (all_samples,), the attributes `packed_info` and  \
102 |             `ray_indices` will be accessable.
103 | 
104 |     Example:
105 | 
106 |     .. code-block:: python
107 | 
108 |         >>> intervals = RayIntervals(
109 |         ...     vals=torch.tensor([0.0, 1.0, 0.0, 1.0, 2.0], device="cuda"),
110 |         ...     packed_info=torch.tensor([[0, 2], [2, 3]], device="cuda"),
111 |         ... )
112 |         >>> cdfs = torch.tensor([0.0, 0.5, 0.0, 0.5, 1.0], device="cuda")
113 |         >>> n_intervals_per_ray = 2
114 |         >>> intervals, samples = importance_sampling(intervals, cdfs, n_intervals_per_ray)
115 |         >>> intervals.vals
116 |         tensor([[0.0000, 0.5000, 1.0000],
117 |                 [0.0000, 1.0000, 2.0000]], device='cuda:0')
118 |         >>> samples.vals
119 |         tensor([[0.2500, 0.7500],
120 |                 [0.5000, 1.5000]], device='cuda:0')
121 | 
122 |     """
123 |     if isinstance(n_intervals_per_ray, Tensor):
124 |         n_intervals_per_ray = n_intervals_per_ray.contiguous()
125 |     intervals, samples = _C.importance_sampling(
126 |         intervals._to_cpp(),
127 |         cdfs.contiguous(),
128 |         n_intervals_per_ray,
129 |         stratified,
130 |     )
131 |     return RayIntervals._from_cpp(intervals), RaySamples._from_cpp(samples)
132 | 
133 | 
134 | def _sample_from_weighted(
135 |     bins: Tensor,
136 |     weights: Tensor,
137 |     num_samples: int,
138 |     stratified: bool = False,
139 |     vmin: float = -torch.inf,
140 |     vmax: float = torch.inf,
141 | ) -> Tuple[Tensor, Tensor]:
142 |     import torch.nn.functional as F
143 | 
144 |     """
145 |     Args:
146 |         bins: (..., B + 1).
147 |         weights: (..., B).
148 | 
149 |     Returns:
150 |         samples: (..., S + 1).
151 |     """
152 |     B = weights.shape[-1]
153 |     S = num_samples
154 |     assert bins.shape[-1] == B + 1
155 | 
156 |     dtype, device = bins.dtype, bins.device
157 |     eps = torch.finfo(weights.dtype).eps
158 | 
159 |     # (..., B).
160 |     pdf = F.normalize(weights, p=1, dim=-1)
161 |     # (..., B + 1).
162 |     cdf = torch.cat(
163 |         [
164 |             torch.zeros_like(pdf[..., :1]),
165 |             torch.cumsum(pdf[..., :-1], dim=-1),
166 |             torch.ones_like(pdf[..., :1]),
167 |         ],
168 |         dim=-1,
169 |     )
170 | 
171 |     # (..., S). Sample positions between [0, 1).
172 |     if not stratified:
173 |         pad = 1 / (2 * S)
174 |         # Get the center of each pdf bins.
175 |         u = torch.linspace(pad, 1 - pad - eps, S, dtype=dtype, device=device)
176 |         u = u.broadcast_to(bins.shape[:-1] + (S,))
177 |     else:
178 |         # `u` is in [0, 1) --- it can be zero, but it can never be 1.
179 |         u_max = eps + (1 - eps) / S
180 |         max_jitter = (1 - u_max) / (S - 1) - eps
181 |         # Only perform one jittering per ray (`single_jitter` in the original
182 |         # implementation.)
183 |         u = (
184 |             torch.linspace(0, 1 - u_max, S, dtype=dtype, device=device)
185 |             + torch.rand(
186 |                 *bins.shape[:-1],
187 |                 1,
188 |                 dtype=dtype,
189 |                 device=device,
190 |             )
191 |             * max_jitter
192 |         )
193 | 
194 |     # (..., S).
195 |     ceil = torch.searchsorted(cdf.contiguous(), u.contiguous(), side="right")
196 |     floor = ceil - 1
197 |     # (..., S * 2).
198 |     inds = torch.cat([floor, ceil], dim=-1)
199 | 
200 |     # (..., S).
201 |     cdf0, cdf1 = cdf.gather(-1, inds).split(S, dim=-1)
202 |     b0, b1 = bins.gather(-1, inds).split(S, dim=-1)
203 | 
204 |     # (..., S). Linear interpolation in 1D.
205 |     t = (u - cdf0) / torch.clamp(cdf1 - cdf0, min=eps)
206 |     # Sample centers.
207 |     centers = b0 + t * (b1 - b0)
208 | 
209 |     samples = (centers[..., 1:] + centers[..., :-1]) / 2
210 |     samples = torch.cat(
211 |         [
212 |             (2 * centers[..., :1] - samples[..., :1]).clamp_min(vmin),
213 |             samples,
214 |             (2 * centers[..., -1:] - samples[..., -1:]).clamp_max(vmax),
215 |         ],
216 |         dim=-1,
217 |     )
218 | 
219 |     return samples, centers
220 | 


--------------------------------------------------------------------------------
/nerfacc/version.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 | 
5 | __version__ = "0.5.3"
6 | 


--------------------------------------------------------------------------------
/scripts/run_aws_listing.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from collections import defaultdict
 3 | 
 4 | from boto3 import resource
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument("--bucket", type=str, required=True)
 8 | parser.add_argument("--region", type=str, required=True)
 9 | args = parser.parse_args()
10 | 
11 | ROOT_URL = f"https://{args.bucket}.s3.{args.region}.amazonaws.com/whl"
12 | html = "<!DOCTYPE html>\n<html>\n<body>\n{}\n</body>\n</html>"
13 | href = '  <a href="{}">{}</a><br/>'
14 | html_args = {
15 |     "ContentType": "text/html",
16 |     "CacheControl": "max-age=300",
17 |     "ACL": "public-read",
18 | }
19 | 
20 | bucket = resource("s3").Bucket(name="nerfacc-bucket")
21 | 
22 | wheels_dict = defaultdict(list)
23 | for obj in bucket.objects.filter(Prefix="whl"):
24 |     if obj.key[-3:] != "whl":
25 |         continue
26 |     torch_version, wheel = obj.key.split("/")[-2:]
27 |     wheel = f"{torch_version}/{wheel}"
28 |     wheels_dict[torch_version].append(wheel)
29 | 
30 | index_html = html.format(
31 |     "\n".join(
32 |         [
33 |             href.format(
34 |                 f"{torch_version}.html".replace("+", "%2B"), torch_version
35 |             )
36 |             for torch_version in wheels_dict
37 |         ]
38 |     )
39 | )
40 | 
41 | with open("index.html", "w") as f:
42 |     f.write(index_html)
43 | bucket.Object("whl/index.html").upload_file("index.html", html_args)
44 | 
45 | for torch_version, wheel_names in wheels_dict.items():
46 |     torch_version_html = html.format(
47 |         "\n".join(
48 |             [
49 |                 href.format(
50 |                     f"{ROOT_URL}/{wheel_name}".replace("+", "%2B"),
51 |                     wheel_name.split("/")[-1],
52 |                 )
53 |                 for wheel_name in wheel_names
54 |             ]
55 |         )
56 |     )
57 | 
58 |     with open(f"{torch_version}.html", "w") as f:
59 |         f.write(torch_version_html)
60 |     bucket.Object(f"whl/{torch_version}.html").upload_file(
61 |         f"{torch_version}.html", html_args
62 |     )
63 | 


--------------------------------------------------------------------------------
/scripts/run_dev_checks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Simple yaml debugger"""
 3 | import subprocess
 4 | 
 5 | import yaml
 6 | from rich.console import Console
 7 | from rich.style import Style
 8 | 
 9 | console = Console(width=120)
10 | 
11 | LOCAL_TESTS = [
12 |     "Run license checks",
13 |     "Run isort",
14 |     "Run Black",
15 |     "Python Pylint",
16 |     "Test with pytest",
17 | ]
18 | 
19 | 
20 | def run_command(command: str) -> bool:
21 |     """Run a command kill actions if it fails
22 | 
23 |     Args:
24 |         command: command to run
25 |         continue_on_fail: whether to continue running commands if the current one fails.
26 |     """
27 |     ret_code = subprocess.call(command, shell=True)
28 |     if ret_code != 0:
29 |         console.print(f"[bold red]Error: `{command}` failed.")
30 |     return ret_code == 0
31 | 
32 | 
33 | def run_github_actions_file(filename: str):
34 |     """Run a github actions file locally.
35 | 
36 |     Args:
37 |         filename: Which yml github actions file to run.
38 |     """
39 |     with open(filename, "rb") as f:
40 |         my_dict = yaml.safe_load(f)
41 |     steps = my_dict["jobs"]["build"]["steps"]
42 | 
43 |     success = True
44 | 
45 |     for step in steps:
46 |         if "name" in step and step["name"] in LOCAL_TESTS:
47 |             compressed = step["run"].replace("\n", ";").replace("\\", "")
48 |             compressed = compressed.replace("--check", "")
49 |             curr_command = f"{compressed}"
50 | 
51 |             console.line()
52 |             console.rule(f"[bold green]Running: {curr_command}")
53 |             success = success and run_command(curr_command)
54 |         else:
55 |             skip_name = step["name"] if "name" in step else step["uses"]
56 |             console.print(f"Skipping {skip_name}")
57 | 
58 |     # Code Testing
59 |     console.line()
60 |     console.rule("[bold green]Running pytest")
61 |     success = success and run_command("pytest")
62 | 
63 |     # Add checks for building documentation
64 |     console.line()
65 |     console.rule("[bold green]Building Documentation")
66 |     success = success and run_command(
67 |         "cd docs/; make clean; make html SPHINXOPTS='-W;'"
68 |     )
69 | 
70 |     if success:
71 |         console.line()
72 |         console.rule(characters="=")
73 |         console.print(
74 |             "[bold green]:TADA: :TADA: :TADA: ALL CHECKS PASSED :TADA: :TADA: :TADA:",
75 |             justify="center",
76 |         )
77 |         console.rule(characters="=")
78 |     else:
79 |         console.line()
80 |         console.rule(characters="=", style=Style(color="red"))
81 |         console.print(
82 |             "[bold red]:skull: :skull: :skull: ERRORS FOUND :skull: :skull: :skull:",
83 |             justify="center",
84 |         )
85 |         console.rule(characters="=", style=Style(color="red"))
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     run_github_actions_file(filename=".github/workflows/code_checks.yml")
90 | 


--------------------------------------------------------------------------------
/scripts/run_profiler.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable
  2 | 
  3 | import torch
  4 | import tqdm
  5 | 
  6 | import nerfacc
  7 | 
  8 | # timing
  9 | # https://github.com/pytorch/pytorch/commit/d2784c233bfc57a1d836d961694bcc8ec4ed45e4
 10 | 
 11 | 
 12 | class Profiler:
 13 |     def __init__(self, warmup=10, repeat=1000):
 14 |         self.warmup = warmup
 15 |         self.repeat = repeat
 16 | 
 17 |     def __call__(self, func: Callable):
 18 |         # warmup
 19 |         for _ in range(self.warmup):
 20 |             func()
 21 |         torch.cuda.synchronize()
 22 | 
 23 |         # profile
 24 |         with torch.profiler.profile(
 25 |             activities=[
 26 |                 torch.profiler.ProfilerActivity.CPU,
 27 |                 torch.profiler.ProfilerActivity.CUDA,
 28 |             ],
 29 |             profile_memory=True,
 30 |         ) as prof:
 31 |             for _ in range(self.repeat):
 32 |                 func()
 33 |             torch.cuda.synchronize()
 34 | 
 35 |         # return
 36 |         events = prof.key_averages()
 37 |         # print(events.table(sort_by="self_cpu_time_total", row_limit=10))
 38 |         self_cpu_time_total = (
 39 |             sum([event.self_cpu_time_total for event in events]) / self.repeat
 40 |         )
 41 |         self_cuda_time_total = (
 42 |             sum([event.self_cuda_time_total for event in events]) / self.repeat
 43 |         )
 44 |         self_cuda_memory_usage = max(
 45 |             [event.self_cuda_memory_usage for event in events]
 46 |         )
 47 |         return (
 48 |             self_cpu_time_total,  # in us
 49 |             self_cuda_time_total,  # in us
 50 |             self_cuda_memory_usage,  # in bytes
 51 |         )
 52 | 
 53 | 
 54 | def main():
 55 |     device = "cuda:0"
 56 |     torch.manual_seed(42)
 57 |     profiler = Profiler(warmup=10, repeat=100)
 58 | 
 59 |     # # contract
 60 |     # print("* contract")
 61 |     # x = torch.rand([1024, 3], device=device)
 62 |     # roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)
 63 |     # fn = lambda: nerfacc.contract(
 64 |     #     x, roi=roi, type=nerfacc.ContractionType.UN_BOUNDED_TANH
 65 |     # )
 66 |     # cpu_t, cuda_t, cuda_bytes = profiler(fn)
 67 |     # print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB")
 68 | 
 69 |     # rendering
 70 |     print("* rendering")
 71 |     batch_size = 81920
 72 |     rays_o = torch.rand((batch_size, 3), device=device)
 73 |     rays_d = torch.randn((batch_size, 3), device=device)
 74 |     rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
 75 | 
 76 |     ray_indices, t_starts, t_ends = nerfacc._ray_marching(
 77 |         rays_o,
 78 |         rays_d,
 79 |         near_plane=0.1,
 80 |         far_plane=1.0,
 81 |         render_step_size=1e-1,
 82 |     )
 83 |     sigmas = torch.randn_like(t_starts, requires_grad=True)
 84 |     fn = (
 85 |         lambda: nerfacc.render_weight_from_density(
 86 |             ray_indices, t_starts, t_ends, sigmas
 87 |         )
 88 |         .sum()
 89 |         .backward()
 90 |     )
 91 |     fn()
 92 |     torch.cuda.synchronize()
 93 |     for _ in tqdm.tqdm(range(100)):
 94 |         fn()
 95 |         torch.cuda.synchronize()
 96 | 
 97 |     cpu_t, cuda_t, cuda_bytes = profiler(fn)
 98 |     print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB")
 99 | 
100 |     packed_info = nerfacc.pack_info(ray_indices, n_rays=batch_size)
101 |     fn = (
102 |         lambda: nerfacc._vol_rendering._RenderingDensity.apply(
103 |             packed_info, t_starts, t_ends, sigmas, 0
104 |         )
105 |         .sum()
106 |         .backward()
107 |     )
108 |     fn()
109 |     torch.cuda.synchronize()
110 |     for _ in tqdm.tqdm(range(100)):
111 |         fn()
112 |         torch.cuda.synchronize()
113 |     cpu_t, cuda_t, cuda_bytes = profiler(fn)
114 |     print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB")
115 | 
116 | 
117 | if __name__ == "__main__":
118 |     main()
119 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | multi_line_output = 3
3 | line_length = 80
4 | include_trailing_comma = true
5 | skip=./examples/pycolmap,./benchmarks


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | import os.path as osp
  4 | import platform
  5 | import sys
  6 | 
  7 | from setuptools import find_packages, setup
  8 | 
  9 | __version__ = None
 10 | exec(open("nerfacc/version.py", "r").read())
 11 | 
 12 | URL = "https://github.com/nerfstudio-project/nerfacc"
 13 | 
 14 | BUILD_NO_CUDA = os.getenv("BUILD_NO_CUDA", "0") == "1"
 15 | WITH_SYMBOLS = os.getenv("WITH_SYMBOLS", "0") == "1"
 16 | 
 17 | 
 18 | def get_ext():
 19 |     from torch.utils.cpp_extension import BuildExtension
 20 | 
 21 |     return BuildExtension.with_options(
 22 |         no_python_abi_suffix=True, use_ninja=False
 23 |     )
 24 | 
 25 | 
 26 | def get_extensions():
 27 |     import torch
 28 |     from torch.__config__ import parallel_info
 29 |     from torch.utils.cpp_extension import CUDAExtension
 30 | 
 31 |     extensions_dir = osp.join("nerfacc", "cuda", "csrc")
 32 |     sources = glob.glob(osp.join(extensions_dir, "*.cu")) + glob.glob(
 33 |         osp.join(extensions_dir, "*.cpp")
 34 |     )
 35 |     # remove generated 'hip' files, in case of rebuilds
 36 |     sources = [path for path in sources if "hip" not in path]
 37 | 
 38 |     undef_macros = []
 39 |     define_macros = []
 40 | 
 41 |     if sys.platform == "win32":
 42 |         define_macros += [("nerfacc_EXPORTS", None)]
 43 | 
 44 |     extra_compile_args = {"cxx": ["-O3"]}
 45 |     if not os.name == "nt":  # Not on Windows:
 46 |         extra_compile_args["cxx"] += ["-Wno-sign-compare"]
 47 |     extra_link_args = [] if WITH_SYMBOLS else ["-s"]
 48 | 
 49 |     info = parallel_info()
 50 |     if (
 51 |         "backend: OpenMP" in info
 52 |         and "OpenMP not found" not in info
 53 |         and sys.platform != "darwin"
 54 |     ):
 55 |         extra_compile_args["cxx"] += ["-DAT_PARALLEL_OPENMP"]
 56 |         if sys.platform == "win32":
 57 |             extra_compile_args["cxx"] += ["/openmp"]
 58 |         else:
 59 |             extra_compile_args["cxx"] += ["-fopenmp"]
 60 |     else:
 61 |         print("Compiling without OpenMP...")
 62 | 
 63 |     # Compile for mac arm64
 64 |     if sys.platform == "darwin" and platform.machine() == "arm64":
 65 |         extra_compile_args["cxx"] += ["-arch", "arm64"]
 66 |         extra_link_args += ["-arch", "arm64"]
 67 | 
 68 |     nvcc_flags = os.getenv("NVCC_FLAGS", "")
 69 |     nvcc_flags = [] if nvcc_flags == "" else nvcc_flags.split(" ")
 70 |     nvcc_flags += ["-O3"]
 71 |     if torch.version.hip:
 72 |         # USE_ROCM was added to later versions of PyTorch.
 73 |         # Define here to support older PyTorch versions as well:
 74 |         define_macros += [("USE_ROCM", None)]
 75 |         undef_macros += ["__HIP_NO_HALF_CONVERSIONS__"]
 76 |     else:
 77 |         nvcc_flags += ["--expt-relaxed-constexpr"]
 78 |     extra_compile_args["nvcc"] = nvcc_flags
 79 | 
 80 |     extension = CUDAExtension(
 81 |         f"nerfacc.csrc",
 82 |         sources,
 83 |         include_dirs=[osp.join(extensions_dir, "include")],
 84 |         define_macros=define_macros,
 85 |         undef_macros=undef_macros,
 86 |         extra_compile_args=extra_compile_args,
 87 |         extra_link_args=extra_link_args,
 88 |     )
 89 | 
 90 |     return [extension]
 91 | 
 92 | 
 93 | # work-around hipify abs paths
 94 | include_package_data = True
 95 | # if torch.cuda.is_available() and torch.version.hip:
 96 | #     include_package_data = False
 97 | 
 98 | setup(
 99 |     name="nerfacc",
100 |     version=__version__,
101 |     description="A General NeRF Acceleration Toolbox",
102 |     author="Ruilong",
103 |     author_email="ruilongli94@gmail.com",
104 |     url=URL,
105 |     download_url=f"{URL}/archive/{__version__}.tar.gz",
106 |     keywords=[],
107 |     python_requires=">=3.7",
108 |     install_requires=[
109 |         "rich>=12",
110 |         "torch",
111 |         "typing_extensions; python_version<'3.8'",
112 |     ],
113 |     extras_require={
114 |         # dev dependencies. Install them by `pip install nerfacc[dev]`
115 |         "dev": [
116 |             "black[jupyter]==22.3.0",
117 |             "isort==5.10.1",
118 |             "pylint==2.13.4",
119 |             "pytest==7.1.2",
120 |             "pytest-xdist==2.5.0",
121 |             "typeguard>=2.13.3",
122 |             "pyyaml==6.0",
123 |             "build",
124 |             "twine",
125 |             "ninja",
126 |         ],
127 |     },
128 |     ext_modules=get_extensions() if not BUILD_NO_CUDA else [],
129 |     cmdclass={"build_ext": get_ext()} if not BUILD_NO_CUDA else {},
130 |     packages=find_packages(),
131 |     include_package_data=include_package_data,
132 | )
133 | 


--------------------------------------------------------------------------------
/tests/test_camera.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import pytest
 4 | import torch
 5 | import tqdm
 6 | from torch import Tensor
 7 | 
 8 | device = "cuda:0"
 9 | 
10 | 
11 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
12 | @torch.no_grad()
13 | def test_opencv_lens_undistortion():
14 |     from nerfacc.cameras import (
15 |         _opencv_lens_distortion,
16 |         _opencv_lens_distortion_fisheye,
17 |         _opencv_lens_undistortion,
18 |         opencv_lens_undistortion,
19 |         opencv_lens_undistortion_fisheye,
20 |     )
21 | 
22 |     torch.manual_seed(42)
23 | 
24 |     x = torch.rand((3, 1000, 2), device=device)
25 | 
26 |     params = torch.rand((8), device=device) * 0.01
27 |     x_undistort = opencv_lens_undistortion(x, params, 1e-5, 10)
28 |     _x_undistort = _opencv_lens_undistortion(x, params, 1e-5, 10)
29 |     assert torch.allclose(x_undistort, _x_undistort, atol=1e-5)
30 |     x_distort = _opencv_lens_distortion(x_undistort, params)
31 |     assert torch.allclose(x, x_distort, atol=1e-5), (x - x_distort).abs().max()
32 |     # print(x[0, 0], x_distort[0, 0], x_undistort[0, 0])
33 | 
34 |     params = torch.rand((4), device=device) * 0.01
35 |     x_undistort = opencv_lens_undistortion_fisheye(x, params, 1e-5, 10)
36 |     x_distort = _opencv_lens_distortion_fisheye(x_undistort, params)
37 |     assert torch.allclose(x, x_distort, atol=1e-5), (x - x_distort).abs().max()
38 |     # print(x[0, 0], x_distort[0, 0], x_undistort[0, 0])
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     test_opencv_lens_undistortion()
43 | 


--------------------------------------------------------------------------------
/tests/test_pack.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch
 3 | 
 4 | device = "cuda:0"
 5 | 
 6 | 
 7 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 8 | def test_pack_info():
 9 |     from nerfacc.pack import pack_info
10 | 
11 |     _packed_info = torch.tensor(
12 |         [[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device
13 |     )
14 |     ray_indices = torch.tensor(
15 |         [0, 2, 2, 2, 2], dtype=torch.int64, device=device
16 |     )
17 |     packed_info = pack_info(ray_indices, n_rays=_packed_info.shape[0])
18 |     assert (packed_info == _packed_info).all()
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     test_pack_info()
23 | 


--------------------------------------------------------------------------------
/tests/test_pdf.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import torch
  3 | 
  4 | device = "cuda:0"
  5 | 
  6 | 
  7 | def _create_intervals(n_rays, n_samples, flat=False):
  8 |     from nerfacc.data_specs import RayIntervals
  9 | 
 10 |     torch.manual_seed(42)
 11 |     vals = torch.rand((n_rays, n_samples + 1), device=device)
 12 |     vals = torch.sort(vals, -1)[0]
 13 | 
 14 |     sample_masks = torch.rand((n_rays, n_samples), device=device) > 0.5
 15 |     is_lefts = torch.cat(
 16 |         [
 17 |             sample_masks,
 18 |             torch.zeros((n_rays, 1), device=device, dtype=torch.bool),
 19 |         ],
 20 |         dim=-1,
 21 |     )
 22 |     is_rights = torch.cat(
 23 |         [
 24 |             torch.zeros((n_rays, 1), device=device, dtype=torch.bool),
 25 |             sample_masks,
 26 |         ],
 27 |         dim=-1,
 28 |     )
 29 |     if not flat:
 30 |         return RayIntervals(vals=vals)
 31 |     else:
 32 |         interval_masks = is_lefts | is_rights
 33 |         vals = vals[interval_masks]
 34 |         is_lefts = is_lefts[interval_masks]
 35 |         is_rights = is_rights[interval_masks]
 36 |         chunk_cnts = (interval_masks).long().sum(-1)
 37 |         chunk_starts = torch.cumsum(chunk_cnts, 0) - chunk_cnts
 38 |         packed_info = torch.stack([chunk_starts, chunk_cnts], -1)
 39 | 
 40 |         return RayIntervals(
 41 |             vals, packed_info, is_left=is_lefts, is_right=is_rights
 42 |         )
 43 | 
 44 | 
 45 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 46 | def test_searchsorted():
 47 |     from nerfacc.data_specs import RayIntervals
 48 |     from nerfacc.pdf import searchsorted
 49 | 
 50 |     torch.manual_seed(42)
 51 |     query: RayIntervals = _create_intervals(10, 100, flat=False)
 52 |     key: RayIntervals = _create_intervals(10, 100, flat=False)
 53 | 
 54 |     ids_left, ids_right = searchsorted(key, query)
 55 |     y = key.vals.gather(-1, ids_right)
 56 | 
 57 |     _ids_right = torch.searchsorted(key.vals, query.vals, right=True)
 58 |     _ids_right = torch.clamp(_ids_right, 0, key.vals.shape[-1] - 1)
 59 |     _y = key.vals.gather(-1, _ids_right)
 60 | 
 61 |     assert torch.allclose(ids_right, _ids_right)
 62 |     assert torch.allclose(y, _y)
 63 | 
 64 | 
 65 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 66 | def test_importance_sampling():
 67 |     from nerfacc.data_specs import RayIntervals
 68 |     from nerfacc.pdf import _sample_from_weighted, importance_sampling
 69 | 
 70 |     torch.manual_seed(42)
 71 |     intervals: RayIntervals = _create_intervals(5, 100, flat=False)
 72 |     cdfs = torch.rand_like(intervals.vals)
 73 |     cdfs = torch.sort(cdfs, -1)[0]
 74 |     n_intervels_per_ray = 100
 75 |     stratified = False
 76 | 
 77 |     _intervals, _samples = importance_sampling(
 78 |         intervals,
 79 |         cdfs,
 80 |         n_intervels_per_ray,
 81 |         stratified,
 82 |     )
 83 | 
 84 |     for i in range(intervals.vals.shape[0]):
 85 |         _vals, _mids = _sample_from_weighted(
 86 |             intervals.vals[i : i + 1],
 87 |             cdfs[i : i + 1, 1:] - cdfs[i : i + 1, :-1],
 88 |             n_intervels_per_ray,
 89 |             stratified,
 90 |             intervals.vals[i].min(),
 91 |             intervals.vals[i].max(),
 92 |         )
 93 |         assert torch.allclose(_intervals.vals[i : i + 1], _vals, atol=1e-4)
 94 |         assert torch.allclose(_samples.vals[i : i + 1], _mids, atol=1e-4)
 95 | 
 96 | 
 97 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 98 | def test_pdf_loss():
 99 |     from nerfacc.data_specs import RayIntervals
100 |     from nerfacc.estimators.prop_net import _lossfun_outer, _pdf_loss
101 |     from nerfacc.pdf import _sample_from_weighted, importance_sampling
102 | 
103 |     torch.manual_seed(42)
104 |     intervals: RayIntervals = _create_intervals(5, 100, flat=False)
105 |     cdfs = torch.rand_like(intervals.vals)
106 |     cdfs = torch.sort(cdfs, -1)[0]
107 |     n_intervels_per_ray = 10
108 |     stratified = False
109 | 
110 |     _intervals, _samples = importance_sampling(
111 |         intervals,
112 |         cdfs,
113 |         n_intervels_per_ray,
114 |         stratified,
115 |     )
116 |     _cdfs = torch.rand_like(_intervals.vals)
117 |     _cdfs = torch.sort(_cdfs, -1)[0]
118 | 
119 |     loss = _pdf_loss(intervals, cdfs, _intervals, _cdfs)
120 | 
121 |     loss2 = _lossfun_outer(
122 |         intervals.vals,
123 |         cdfs[:, 1:] - cdfs[:, :-1],
124 |         _intervals.vals,
125 |         _cdfs[:, 1:] - _cdfs[:, :-1],
126 |     )
127 |     assert torch.allclose(loss, loss2, atol=1e-4)
128 | 
129 | 
130 | if __name__ == "__main__":
131 |     test_importance_sampling()
132 |     test_searchsorted()
133 |     test_pdf_loss()
134 | 


--------------------------------------------------------------------------------
/tests/test_rendering.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import torch
  3 | 
  4 | device = "cuda:0"
  5 | 
  6 | 
  7 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
  8 | def test_render_visibility():
  9 |     from nerfacc.volrend import render_visibility_from_alpha
 10 | 
 11 |     ray_indices = torch.tensor(
 12 |         [0, 2, 2, 2, 2], dtype=torch.int64, device=device
 13 |     )  # (all_samples,)
 14 |     alphas = torch.tensor(
 15 |         [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
 16 |     )  # (all_samples,)
 17 | 
 18 |     # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]
 19 |     vis = render_visibility_from_alpha(
 20 |         alphas, ray_indices=ray_indices, early_stop_eps=0.03, alpha_thre=0.0
 21 |     )
 22 |     vis_tgt = torch.tensor(
 23 |         [True, True, True, True, False], dtype=torch.bool, device=device
 24 |     )
 25 |     assert torch.allclose(vis, vis_tgt)
 26 | 
 27 |     # transmittance: [1.0, 1.0, 1.0, 0.2, 0.04]
 28 |     vis = render_visibility_from_alpha(
 29 |         alphas, ray_indices=ray_indices, early_stop_eps=0.05, alpha_thre=0.35
 30 |     )
 31 |     vis_tgt = torch.tensor(
 32 |         [True, False, True, True, False], dtype=torch.bool, device=device
 33 |     )
 34 |     assert torch.allclose(vis, vis_tgt)
 35 | 
 36 | 
 37 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 38 | def test_render_weight_from_alpha():
 39 |     from nerfacc.volrend import render_weight_from_alpha
 40 | 
 41 |     ray_indices = torch.tensor(
 42 |         [0, 2, 2, 2, 2], dtype=torch.int64, device=device
 43 |     )  # (all_samples,)
 44 |     alphas = torch.tensor(
 45 |         [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
 46 |     )  # (all_samples,)
 47 | 
 48 |     # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]
 49 |     weights, _ = render_weight_from_alpha(
 50 |         alphas, ray_indices=ray_indices, n_rays=3
 51 |     )
 52 |     weights_tgt = torch.tensor(
 53 |         [1.0 * 0.4, 1.0 * 0.3, 0.7 * 0.8, 0.14 * 0.8, 0.028 * 0.5],
 54 |         dtype=torch.float32,
 55 |         device=device,
 56 |     )
 57 |     assert torch.allclose(weights, weights_tgt)
 58 | 
 59 | 
 60 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 61 | def test_render_weight_from_density():
 62 |     from nerfacc.volrend import (
 63 |         render_weight_from_alpha,
 64 |         render_weight_from_density,
 65 |     )
 66 | 
 67 |     ray_indices = torch.tensor(
 68 |         [0, 2, 2, 2, 2], dtype=torch.int64, device=device
 69 |     )  # (all_samples,)
 70 |     sigmas = torch.rand(
 71 |         (ray_indices.shape[0],), device=device
 72 |     )  # (all_samples,)
 73 |     t_starts = torch.rand_like(sigmas)
 74 |     t_ends = torch.rand_like(sigmas) + 1.0
 75 |     alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
 76 | 
 77 |     weights, _, _ = render_weight_from_density(
 78 |         t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
 79 |     )
 80 |     weights_tgt, _ = render_weight_from_alpha(
 81 |         alphas, ray_indices=ray_indices, n_rays=3
 82 |     )
 83 |     assert torch.allclose(weights, weights_tgt)
 84 | 
 85 | 
 86 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 87 | def test_accumulate_along_rays():
 88 |     from nerfacc.volrend import accumulate_along_rays
 89 | 
 90 |     ray_indices = torch.tensor(
 91 |         [0, 2, 2, 2, 2], dtype=torch.int64, device=device
 92 |     )  # (all_samples,)
 93 |     weights = torch.tensor(
 94 |         [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
 95 |     )  # (all_samples,)
 96 |     values = torch.rand((5, 2), device=device)  # (all_samples, 2)
 97 | 
 98 |     ray_values = accumulate_along_rays(
 99 |         weights, values=values, ray_indices=ray_indices, n_rays=3
100 |     )
101 |     assert ray_values.shape == (3, 2)
102 |     assert torch.allclose(ray_values[0, :], weights[0, None] * values[0, :])
103 |     assert (ray_values[1, :] == 0).all()
104 |     assert torch.allclose(
105 |         ray_values[2, :], (weights[1:, None] * values[1:]).sum(dim=0)
106 |     )
107 | 
108 | 
109 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
110 | def test_grads():
111 |     from nerfacc.volrend import (
112 |         render_transmittance_from_density,
113 |         render_weight_from_alpha,
114 |         render_weight_from_density,
115 |     )
116 | 
117 |     ray_indices = torch.tensor(
118 |         [0, 2, 2, 2, 2], dtype=torch.int64, device=device
119 |     )  # (all_samples,)
120 |     packed_info = torch.tensor(
121 |         [[0, 1], [1, 0], [1, 4]], dtype=torch.long, device=device
122 |     )
123 |     sigmas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1], device=device)
124 |     sigmas.requires_grad = True
125 |     t_starts = torch.rand_like(sigmas)
126 |     t_ends = t_starts + 1.0
127 | 
128 |     weights_ref = torch.tensor(
129 |         [0.3297, 0.5507, 0.0428, 0.2239, 0.0174], device=device
130 |     )
131 |     sigmas_grad_ref = torch.tensor(
132 |         [0.6703, 0.1653, 0.1653, 0.1653, 0.1653], device=device
133 |     )
134 | 
135 |     # naive impl. trans from sigma
136 |     trans, _ = render_transmittance_from_density(
137 |         t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
138 |     )
139 |     weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))
140 |     weights.sum().backward()
141 |     sigmas_grad = sigmas.grad.clone()
142 |     sigmas.grad.zero_()
143 |     assert torch.allclose(weights_ref, weights, atol=1e-4)
144 |     assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
145 | 
146 |     # naive impl. trans from alpha
147 |     trans, _ = render_transmittance_from_density(
148 |         t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3
149 |     )
150 |     weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))
151 |     weights.sum().backward()
152 |     sigmas_grad = sigmas.grad.clone()
153 |     sigmas.grad.zero_()
154 |     assert torch.allclose(weights_ref, weights, atol=1e-4)
155 |     assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
156 | 
157 |     weights, _, _ = render_weight_from_density(
158 |         t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
159 |     )
160 |     weights.sum().backward()
161 |     sigmas_grad = sigmas.grad.clone()
162 |     sigmas.grad.zero_()
163 |     assert torch.allclose(weights_ref, weights, atol=1e-4)
164 |     assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
165 | 
166 |     weights, _, _ = render_weight_from_density(
167 |         t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3
168 |     )
169 |     weights.sum().backward()
170 |     sigmas_grad = sigmas.grad.clone()
171 |     sigmas.grad.zero_()
172 |     assert torch.allclose(weights_ref, weights, atol=1e-4)
173 |     assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
174 | 
175 |     alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
176 |     weights, _ = render_weight_from_alpha(
177 |         alphas, ray_indices=ray_indices, n_rays=3
178 |     )
179 |     weights.sum().backward()
180 |     sigmas_grad = sigmas.grad.clone()
181 |     sigmas.grad.zero_()
182 |     assert torch.allclose(weights_ref, weights, atol=1e-4)
183 |     assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
184 | 
185 |     alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
186 |     weights, _ = render_weight_from_alpha(
187 |         alphas, packed_info=packed_info, n_rays=3
188 |     )
189 |     weights.sum().backward()
190 |     sigmas_grad = sigmas.grad.clone()
191 |     sigmas.grad.zero_()
192 |     assert torch.allclose(weights_ref, weights, atol=1e-4)
193 |     assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
194 | 
195 | 
196 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
197 | def test_rendering():
198 |     from nerfacc.volrend import rendering
199 | 
200 |     def rgb_sigma_fn(t_starts, t_ends, ray_indices):
201 |         return torch.stack([t_starts] * 3, dim=-1), t_starts
202 | 
203 |     ray_indices = torch.tensor(
204 |         [0, 2, 2, 2, 2], dtype=torch.int64, device=device
205 |     )  # (all_samples,)
206 |     sigmas = torch.rand(
207 |         (ray_indices.shape[0],), device=device
208 |     )  # (all_samples,)
209 |     t_starts = torch.rand_like(sigmas)
210 |     t_ends = torch.rand_like(sigmas) + 1.0
211 | 
212 |     _, _, _, _ = rendering(
213 |         t_starts,
214 |         t_ends,
215 |         ray_indices=ray_indices,
216 |         n_rays=3,
217 |         rgb_sigma_fn=rgb_sigma_fn,
218 |     )
219 | 
220 | 
221 | if __name__ == "__main__":
222 |     test_render_visibility()
223 |     test_render_weight_from_alpha()
224 |     test_render_weight_from_density()
225 |     test_accumulate_along_rays()
226 |     test_grads()
227 |     test_rendering()
228 | 


--------------------------------------------------------------------------------
/tests/test_scan.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import torch
  3 | 
  4 | device = "cuda:0"
  5 | 
  6 | 
  7 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
  8 | def test_inclusive_sum():
  9 |     from nerfacc.scan import inclusive_sum
 10 | 
 11 |     torch.manual_seed(42)
 12 | 
 13 |     data = torch.rand((5, 1000), device=device, requires_grad=True)
 14 |     outputs1 = inclusive_sum(data)
 15 |     outputs1 = outputs1.flatten()
 16 |     outputs1.sum().backward()
 17 |     grad1 = data.grad.clone()
 18 |     data.grad.zero_()
 19 | 
 20 |     chunk_starts = torch.arange(
 21 |         0, data.numel(), data.shape[1], device=device, dtype=torch.long
 22 |     )
 23 |     chunk_cnts = torch.full(
 24 |         (data.shape[0],), data.shape[1], dtype=torch.long, device=device
 25 |     )
 26 |     packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
 27 |     flatten_data = data.flatten()
 28 |     outputs2 = inclusive_sum(flatten_data, packed_info=packed_info)
 29 |     outputs2.sum().backward()
 30 |     grad2 = data.grad.clone()
 31 |     data.grad.zero_()
 32 | 
 33 |     indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
 34 |     indices = indices.repeat_interleave(data.shape[1])
 35 |     indices = indices.flatten()
 36 |     outputs3 = inclusive_sum(flatten_data, indices=indices)
 37 |     outputs3.sum().backward()
 38 |     grad3 = data.grad.clone()
 39 |     data.grad.zero_()
 40 | 
 41 |     assert torch.allclose(outputs1, outputs2)
 42 |     assert torch.allclose(grad1, grad2)
 43 | 
 44 |     assert torch.allclose(outputs1, outputs3)
 45 |     assert torch.allclose(grad1, grad3)
 46 | 
 47 | 
 48 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 49 | def test_exclusive_sum():
 50 |     from nerfacc.scan import exclusive_sum
 51 | 
 52 |     torch.manual_seed(42)
 53 | 
 54 |     data = torch.rand((5, 1000), device=device, requires_grad=True)
 55 |     outputs1 = exclusive_sum(data)
 56 |     outputs1 = outputs1.flatten()
 57 |     outputs1.sum().backward()
 58 |     grad1 = data.grad.clone()
 59 |     data.grad.zero_()
 60 | 
 61 |     chunk_starts = torch.arange(
 62 |         0, data.numel(), data.shape[1], device=device, dtype=torch.long
 63 |     )
 64 |     chunk_cnts = torch.full(
 65 |         (data.shape[0],), data.shape[1], dtype=torch.long, device=device
 66 |     )
 67 |     packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
 68 |     flatten_data = data.flatten()
 69 |     outputs2 = exclusive_sum(flatten_data, packed_info=packed_info)
 70 |     outputs2.sum().backward()
 71 |     grad2 = data.grad.clone()
 72 |     data.grad.zero_()
 73 | 
 74 |     indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
 75 |     indices = indices.repeat_interleave(data.shape[1])
 76 |     indices = indices.flatten()
 77 |     outputs3 = exclusive_sum(flatten_data, indices=indices)
 78 |     outputs3.sum().backward()
 79 |     grad3 = data.grad.clone()
 80 |     data.grad.zero_()
 81 | 
 82 |     # TODO: check exclusive sum. numeric error?
 83 |     # print((outputs1 - outputs2).abs().max())  # 0.0002
 84 |     assert torch.allclose(outputs1, outputs2, atol=3e-4)
 85 |     assert torch.allclose(grad1, grad2)
 86 | 
 87 |     assert torch.allclose(outputs1, outputs3, atol=3e-4)
 88 |     assert torch.allclose(grad1, grad3)
 89 | 
 90 | 
 91 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 92 | def test_inclusive_prod():
 93 |     from nerfacc.scan import inclusive_prod
 94 | 
 95 |     torch.manual_seed(42)
 96 | 
 97 |     data = torch.rand((5, 1000), device=device, requires_grad=True)
 98 |     outputs1 = inclusive_prod(data)
 99 |     outputs1 = outputs1.flatten()
100 |     outputs1.sum().backward()
101 |     grad1 = data.grad.clone()
102 |     data.grad.zero_()
103 | 
104 |     chunk_starts = torch.arange(
105 |         0, data.numel(), data.shape[1], device=device, dtype=torch.long
106 |     )
107 |     chunk_cnts = torch.full(
108 |         (data.shape[0],), data.shape[1], dtype=torch.long, device=device
109 |     )
110 |     packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
111 |     flatten_data = data.flatten()
112 |     outputs2 = inclusive_prod(flatten_data, packed_info=packed_info)
113 |     outputs2.sum().backward()
114 |     grad2 = data.grad.clone()
115 |     data.grad.zero_()
116 | 
117 |     indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
118 |     indices = indices.repeat_interleave(data.shape[1])
119 |     indices = indices.flatten()
120 |     outputs3 = inclusive_prod(flatten_data, indices=indices)
121 |     outputs3.sum().backward()
122 |     grad3 = data.grad.clone()
123 |     data.grad.zero_()
124 | 
125 |     assert torch.allclose(outputs1, outputs2)
126 |     assert torch.allclose(grad1, grad2)
127 | 
128 |     assert torch.allclose(outputs1, outputs3)
129 |     assert torch.allclose(grad1, grad3)
130 | 
131 | 
132 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
133 | def test_exclusive_prod():
134 |     from nerfacc.scan import exclusive_prod
135 | 
136 |     torch.manual_seed(42)
137 | 
138 |     data = torch.rand((5, 1000), device=device, requires_grad=True)
139 |     outputs1 = exclusive_prod(data)
140 |     outputs1 = outputs1.flatten()
141 |     outputs1.sum().backward()
142 |     grad1 = data.grad.clone()
143 |     data.grad.zero_()
144 | 
145 |     chunk_starts = torch.arange(
146 |         0, data.numel(), data.shape[1], device=device, dtype=torch.long
147 |     )
148 |     chunk_cnts = torch.full(
149 |         (data.shape[0],), data.shape[1], dtype=torch.long, device=device
150 |     )
151 |     packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
152 |     flatten_data = data.flatten()
153 |     outputs2 = exclusive_prod(flatten_data, packed_info=packed_info)
154 |     outputs2.sum().backward()
155 |     grad2 = data.grad.clone()
156 |     data.grad.zero_()
157 | 
158 |     indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
159 |     indices = indices.repeat_interleave(data.shape[1])
160 |     indices = indices.flatten()
161 |     outputs3 = exclusive_prod(flatten_data, indices=indices)
162 |     outputs3.sum().backward()
163 |     grad3 = data.grad.clone()
164 |     data.grad.zero_()
165 | 
166 |     # TODO: check exclusive sum. numeric error?
167 |     # print((outputs1 - outputs2).abs().max())
168 |     assert torch.allclose(outputs1, outputs2)
169 |     assert torch.allclose(grad1, grad2)
170 | 
171 |     assert torch.allclose(outputs1, outputs3)
172 |     assert torch.allclose(grad1, grad3)
173 | 
174 | 
175 | def profile():
176 |     import tqdm
177 | 
178 |     from nerfacc.scan import inclusive_sum
179 | 
180 |     torch.manual_seed(42)
181 | 
182 |     data = torch.rand((8192, 8192), device=device, requires_grad=True)
183 | 
184 |     chunk_starts = torch.arange(
185 |         0, data.numel(), data.shape[1], device=device, dtype=torch.long
186 |     )
187 |     chunk_cnts = torch.full(
188 |         (data.shape[0],), data.shape[1], dtype=torch.long, device=device
189 |     )
190 |     packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
191 |     flatten_data = data.flatten()
192 |     torch.cuda.synchronize()
193 |     for _ in tqdm.trange(2000):
194 |         outputs2 = inclusive_sum(flatten_data, packed_info=packed_info)
195 |         outputs2.sum().backward()
196 | 
197 |     indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
198 |     indices = indices.repeat_interleave(data.shape[1])
199 |     indices = indices.flatten()
200 |     torch.cuda.synchronize()
201 |     for _ in tqdm.trange(2000):
202 |         outputs3 = inclusive_sum(flatten_data, indices=indices)
203 |         outputs3.sum().backward()
204 | 
205 | 
206 | if __name__ == "__main__":
207 |     test_inclusive_sum()
208 |     test_exclusive_sum()
209 |     test_inclusive_prod()
210 |     test_exclusive_prod()
211 |     profile()
212 | 


--------------------------------------------------------------------------------
/tests/test_vdb.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import pytest
  4 | import torch
  5 | 
  6 | device = "cuda:0"
  7 | 
  8 | 
  9 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 10 | def test_traverse_vdbs():
 11 |     try:
 12 |         import fvdb
 13 |     except ImportError:
 14 |         warnings.warn("fVDB is not installed. Skip the test.")
 15 |         return
 16 | 
 17 |     from nerfacc.estimators.vdb import traverse_vdbs
 18 |     from nerfacc.grid import _enlarge_aabb, traverse_grids
 19 | 
 20 |     torch.manual_seed(42)
 21 |     n_rays = 100
 22 |     n_aabbs = 1
 23 |     reso = 32
 24 |     cone_angle = 1e-3
 25 | 
 26 |     rays_o = torch.randn((n_rays, 3), device=device)
 27 |     rays_d = torch.randn((n_rays, 3), device=device)
 28 |     rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
 29 | 
 30 |     base_aabb = torch.tensor([-1.0, -1.0, -1.0, 1.0, 1.0, 1.0], device=device)
 31 |     aabbs = torch.stack(
 32 |         [_enlarge_aabb(base_aabb, 2**i) for i in range(n_aabbs)]
 33 |     )
 34 | 
 35 |     # Traverse 1-level cascaded grid
 36 |     binaries = torch.rand((n_aabbs, reso, reso, reso), device=device) > 0.5
 37 | 
 38 |     intervals, samples, _ = traverse_grids(
 39 |         rays_o, rays_d, binaries, aabbs, cone_angle=cone_angle
 40 |     )
 41 |     ray_indices = samples.ray_indices
 42 |     t_starts = intervals.vals[intervals.is_left]
 43 |     t_ends = intervals.vals[intervals.is_right]
 44 | 
 45 |     # Traverse a single fvdb grid
 46 |     grid = fvdb.GridBatch(device=device)
 47 |     voxel_sizes = (aabbs[:, 3:] - aabbs[:, :3]) / reso
 48 |     origins = aabbs[:, :3] + voxel_sizes / 2
 49 |     ijks = torch.stack(torch.where(binaries[0]), dim=-1)
 50 |     grid.set_from_ijk(ijks, voxel_sizes=voxel_sizes, origins=origins)
 51 | 
 52 |     t_starts, t_ends, ray_indices = traverse_vdbs(
 53 |         rays_o, rays_d, grid, cone_angle=cone_angle
 54 |     )
 55 | 
 56 |     assert torch.allclose(t_starts, t_starts)
 57 |     assert torch.allclose(t_ends, t_ends)
 58 |     assert torch.all(ray_indices == ray_indices)
 59 | 
 60 | 
 61 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
 62 | def test_base_vdb_estimator():
 63 |     try:
 64 |         import fvdb
 65 |     except ImportError:
 66 |         warnings.warn("fVDB is not installed. Skip the test.")
 67 |         return
 68 | 
 69 |     import tqdm
 70 | 
 71 |     from nerfacc.estimators.occ_grid import OccGridEstimator
 72 |     from nerfacc.estimators.vdb import VDBEstimator
 73 |     from nerfacc.grid import _query
 74 | 
 75 |     torch.manual_seed(42)
 76 | 
 77 |     profile = True
 78 |     n_aabbs = 1
 79 |     reso = 32
 80 |     base_aabb = torch.tensor([-1.0, -1.0, -1.0, 1.0, 1.0, 1.0], device=device)
 81 |     occ_thre = 0.1
 82 | 
 83 |     # Create the target occ grid
 84 |     occs = torch.rand((n_aabbs, reso, reso, reso), device=device)
 85 | 
 86 |     def occ_eval_fn(x):
 87 |         return _query(x, occs, base_aabb)[0]
 88 | 
 89 |     # Create the OccGridEstimator
 90 |     estimator1 = OccGridEstimator(base_aabb, reso, n_aabbs).to(device)
 91 |     for _ in tqdm.trange(1000) if profile else range(1):
 92 |         estimator1._update(step=0, occ_eval_fn=occ_eval_fn, occ_thre=occ_thre)
 93 |     occs1 = estimator1.occs.reshape_as(occs)
 94 |     err = (occs - occs1).abs().max()
 95 |     if not profile:
 96 |         assert err == 0
 97 | 
 98 |     # Create the OccGridEstimator
 99 |     voxel_sizes = (base_aabb[3:] - base_aabb[:3]) / reso
100 |     origins = base_aabb[:3] + voxel_sizes / 2
101 |     grid = fvdb.sparse_grid_from_dense(
102 |         1, (reso, reso, reso), voxel_sizes=voxel_sizes, origins=origins
103 |     )
104 |     estimator2 = VDBEstimator(grid).to(device)
105 |     for _ in tqdm.trange(1000) if profile else range(1):
106 |         estimator2._update(step=0, occ_eval_fn=occ_eval_fn, occ_thre=occ_thre)
107 | 
108 |     ijks = estimator1.grid_coords
109 |     index = estimator2.grid.ijk_to_index(ijks).jdata
110 |     occs2 = estimator2.occs[index].reshape_as(occs)
111 |     err = (occs - occs2).abs().max()
112 |     if not profile:
113 |         assert err == 0
114 | 
115 |     # ray marching in sparse grid
116 |     n_rays = 100
117 |     n_aabbs = 1
118 |     reso = 32
119 |     cone_angle = 1e-3
120 | 
121 |     rays_o = torch.randn((n_rays, 3), device=device)
122 |     rays_d = torch.randn((n_rays, 3), device=device)
123 |     rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
124 | 
125 |     for _ in tqdm.trange(1000) if profile else range(1):
126 |         ray_indices1, t_starts1, t_ends1 = estimator1.sampling(
127 |             rays_o, rays_d, render_step_size=0.1, cone_angle=cone_angle
128 |         )
129 |     for _ in tqdm.trange(1000) if profile else range(1):
130 |         ray_indices2, t_starts2, t_ends2 = estimator2.sampling(
131 |             rays_o, rays_d, render_step_size=0.1, cone_angle=cone_angle
132 |         )
133 |     assert torch.all(ray_indices1 == ray_indices2)
134 |     assert torch.allclose(t_starts1, t_starts2)
135 |     assert torch.allclose(t_ends1, t_ends2)
136 | 
137 | 
138 | if __name__ == "__main__":
139 |     test_traverse_vdbs()
140 |     test_base_vdb_estimator()
141 | 


--------------------------------------------------------------------------------