├── .github
└── workflows
│ ├── aws
│ └── update_index.py
│ ├── building.yml
│ ├── code_checks.yml
│ ├── cuda
│ ├── Linux-env.sh
│ ├── Linux.sh
│ ├── Windows-env.sh
│ └── Windows.sh
│ ├── doc.yml
│ └── publish.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
├── Makefile
├── requirements.txt
└── source
│ ├── _static
│ ├── css
│ │ └── readthedocs.css
│ └── images
│ │ ├── coding
│ │ └── gpu_util.png
│ │ ├── illustration_occgrid.png
│ │ ├── illustration_propnet.png
│ │ ├── logo.png
│ │ ├── logo4x.png
│ │ ├── plot_occgrid.png
│ │ ├── plot_propnet.png
│ │ └── teaser.jpg
│ ├── apis
│ ├── estimators.rst
│ ├── generated
│ │ ├── nerfacc.accumulate_along_rays.rst
│ │ ├── nerfacc.exclusive_prod.rst
│ │ ├── nerfacc.exclusive_sum.rst
│ │ ├── nerfacc.importance_sampling.rst
│ │ ├── nerfacc.inclusive_prod.rst
│ │ ├── nerfacc.inclusive_sum.rst
│ │ ├── nerfacc.pack_info.rst
│ │ ├── nerfacc.ray_aabb_intersect.rst
│ │ ├── nerfacc.render_transmittance_from_alpha.rst
│ │ ├── nerfacc.render_transmittance_from_density.rst
│ │ ├── nerfacc.render_visibility_from_alpha.rst
│ │ ├── nerfacc.render_visibility_from_density.rst
│ │ ├── nerfacc.render_weight_from_alpha.rst
│ │ ├── nerfacc.render_weight_from_density.rst
│ │ ├── nerfacc.searchsorted.rst
│ │ └── nerfacc.traverse_grids.rst
│ ├── rendering.rst
│ └── utils.rst
│ ├── conf.py
│ ├── examples
│ ├── camera.rst
│ ├── camera
│ │ └── barf.rst
│ ├── dynamic.rst
│ ├── dynamic
│ │ ├── kplanes.rst
│ │ ├── tineuvox.rst
│ │ └── tnerf.rst
│ ├── static.rst
│ └── static
│ │ ├── nerf.rst
│ │ ├── ngp.rst
│ │ └── tensorf.rst
│ ├── index.rst
│ └── methodology
│ ├── coding.rst
│ └── sampling.rst
├── examples
├── datasets
│ ├── __init__.py
│ ├── dnerf_synthetic.py
│ ├── nerf_360_v2.py
│ ├── nerf_synthetic.py
│ └── utils.py
├── radiance_fields
│ ├── __init__.py
│ ├── mlp.py
│ └── ngp.py
├── requirements.txt
├── train_mlp_nerf.py
├── train_mlp_tnerf.py
├── train_ngp_nerf_occ.py
├── train_ngp_nerf_prop.py
└── utils.py
├── nerfacc
├── __init__.py
├── cameras.py
├── cuda
│ ├── __init__.py
│ ├── _backend.py
│ └── csrc
│ │ ├── camera.cu
│ │ ├── grid.cu
│ │ ├── include
│ │ ├── data_spec.hpp
│ │ ├── data_spec_packed.cuh
│ │ ├── utils.cub.cuh
│ │ ├── utils_camera.cuh
│ │ ├── utils_contraction.cuh
│ │ ├── utils_cuda.cuh
│ │ ├── utils_grid.cuh
│ │ ├── utils_math.cuh
│ │ └── utils_scan.cuh
│ │ ├── nerfacc.cpp
│ │ ├── pdf.cu
│ │ ├── scan.cu
│ │ └── scan_cub.cu
├── data_specs.py
├── estimators
│ ├── __init__.py
│ ├── base.py
│ ├── n3tree.py
│ ├── occ_grid.py
│ ├── prop_net.py
│ └── vdb.py
├── grid.py
├── losses.py
├── pack.py
├── pdf.py
├── scan.py
├── version.py
└── volrend.py
├── scripts
├── run_aws_listing.py
├── run_dev_checks.py
└── run_profiler.py
├── setup.cfg
├── setup.py
└── tests
├── test_camera.py
├── test_grid.py
├── test_pack.py
├── test_pdf.py
├── test_rendering.py
├── test_scan.py
└── test_vdb.py
/.github/workflows/aws/update_index.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 |
3 | import boto3
4 |
5 | ROOT_URL = 'https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl'
6 | html = '\n\n
\n{}\n\n'
7 | href = ' {}
'
8 | args = {
9 | 'ContentType': 'text/html',
10 | 'CacheControl': 'max-age=300',
11 | 'ACL': 'public-read',
12 | }
13 |
14 | bucket = boto3.resource('s3').Bucket(name='nerfacc-bucket')
15 |
16 | wheels_dict = defaultdict(list)
17 | for obj in bucket.objects.filter(Prefix='whl'):
18 | if obj.key[-3:] != 'whl':
19 | continue
20 | torch_version, wheel = obj.key.split('/')[-2:]
21 | wheel = f'{torch_version}/{wheel}'
22 | wheels_dict[torch_version].append(wheel)
23 |
24 | index_html = html.format('\n'.join([
25 | href.format(f'{torch_version}.html'.replace('+', '%2B'), version)
26 | for version in wheels_dict
27 | ]))
28 |
29 | with open('index.html', 'w') as f:
30 | f.write(index_html)
31 | bucket.Object('whl/index.html').upload_file('index.html', args)
32 |
33 | for torch_version, wheel_names in wheels_dict.items():
34 | torch_version_html = html.format('\n'.join([
35 | href.format(f'{ROOT_URL}/{wheel_name}'.replace('+', '%2B'), wheel_name)
36 | for wheel_name in wheel_names
37 | ]))
38 |
39 | with open(f'{torch_version}.html', 'w') as f:
40 | f.write(torch_version_html)
41 | bucket.Object(f'whl/{torch_version}.html').upload_file(
42 | f'{torch_version}.html', args)
--------------------------------------------------------------------------------
/.github/workflows/building.yml:
--------------------------------------------------------------------------------
1 | name: Building Wheels
2 |
3 | on: [workflow_dispatch]
4 |
5 | jobs:
6 |
7 | wheel:
8 | runs-on: ${{ matrix.os }}
9 | environment: production
10 |
11 | strategy:
12 | fail-fast: false
13 | matrix:
14 | os: [ubuntu-20.04, windows-2019]
15 | # support version based on: https://download.pytorch.org/whl/torch/
16 | python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
17 | torch-version: [1.11.0, 1.12.0, 1.13.0, 2.0.0]
18 | cuda-version: ['cu113', 'cu115', 'cu116', 'cu117', 'cu118']
19 | exclude:
20 | - torch-version: 1.11.0
21 | python-version: '3.11'
22 | - torch-version: 1.11.0
23 | cuda-version: 'cu116'
24 | - torch-version: 1.11.0
25 | cuda-version: 'cu117'
26 | - torch-version: 1.11.0
27 | cuda-version: 'cu118'
28 |
29 | - torch-version: 1.12.0
30 | python-version: '3.11'
31 | - torch-version: 1.12.0
32 | cuda-version: 'cu115'
33 | - torch-version: 1.12.0
34 | cuda-version: 'cu117'
35 | - torch-version: 1.12.0
36 | cuda-version: 'cu118'
37 |
38 | - torch-version: 1.13.0
39 | cuda-version: 'cu102'
40 | - torch-version: 1.13.0
41 | cuda-version: 'cu113'
42 | - torch-version: 1.13.0
43 | cuda-version: 'cu115'
44 | - torch-version: 1.13.0
45 | cuda-version: 'cu118'
46 |
47 | - torch-version: 2.0.0
48 | python-version: '3.7'
49 | - torch-version: 2.0.0
50 | cuda-version: 'cu102'
51 | - torch-version: 2.0.0
52 | cuda-version: 'cu113'
53 | - torch-version: 2.0.0
54 | cuda-version: 'cu115'
55 | - torch-version: 2.0.0
56 | cuda-version: 'cu116'
57 |
58 | - os: windows-2019
59 | cuda-version: 'cu102'
60 | - os: windows-2019
61 | torch-version: 1.13.0
62 | python-version: '3.11'
63 |
64 | # - os: windows-2019
65 | # torch-version: 1.13.0
66 | # cuda-version: 'cu117'
67 | # python-version: '3.9'
68 |
69 |
70 |
71 | steps:
72 | - uses: actions/checkout@v3
73 |
74 | - name: Set up Python ${{ matrix.python-version }}
75 | uses: actions/setup-python@v4
76 | with:
77 | python-version: ${{ matrix.python-version }}
78 |
79 | - name: Upgrade pip
80 | run: |
81 | pip install --upgrade setuptools
82 | pip install ninja
83 |
84 | - name: Free up disk space
85 | if: ${{ runner.os == 'Linux' }}
86 | run: |
87 | sudo rm -rf /usr/share/dotnet
88 |
89 | - name: Install CUDA ${{ matrix.cuda-version }}
90 | if: ${{ matrix.cuda-version != 'cpu' }}
91 | run: |
92 | bash .github/workflows/cuda/${{ runner.os }}.sh ${{ matrix.cuda-version }}
93 |
94 | - name: Install PyTorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }}
95 | run: |
96 | pip install torch==${{ matrix.torch-version }} --extra-index-url https://download.pytorch.org/whl/${{ matrix.cuda-version }}
97 | python -c "import torch; print('PyTorch:', torch.__version__)"
98 | python -c "import torch; print('CUDA:', torch.version.cuda)"
99 | python -c "import torch; print('CUDA Available:', torch.cuda.is_available())"
100 |
101 | - name: Patch PyTorch static constexpr on Windows
102 | if: ${{ runner.os == 'Windows' }}
103 | run: |
104 | Torch_DIR=`python -c 'import os; import torch; print(os.path.dirname(torch.__file__))'`
105 | sed -i '31,38c\
106 | TORCH_API void lazy_init_num_threads();' ${Torch_DIR}/include/ATen/Parallel.h
107 | shell: bash
108 |
109 | - name: Set version
110 | if: ${{ runner.os != 'macOS' }}
111 | run: |
112 | VERSION=`sed -n 's/^__version__ = "\(.*\)"/\1/p' nerfacc/version.py`
113 | TORCH_VERSION=`echo "pt${{ matrix.torch-version }}" | sed "s/..$//" | sed "s/\.//g"`
114 | CUDA_VERSION=`echo ${{ matrix.cuda-version }}`
115 | echo "New version name: $VERSION+$TORCH_VERSION$CUDA_VERSION"
116 | sed -i "s/$VERSION/$VERSION+$TORCH_VERSION$CUDA_VERSION/" nerfacc/version.py
117 | shell:
118 | bash
119 |
120 | - name: Install main package for CPU
121 | if: ${{ matrix.cuda-version == 'cpu' }}
122 | run: |
123 | BUILD_NO_CUDA=1 pip install .
124 |
125 | - name: Build wheel
126 | run: |
127 | pip install wheel
128 | source .github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }}
129 | python setup.py bdist_wheel --dist-dir=dist
130 | shell: bash # `source` does not exist in windows powershell
131 |
132 | - name: Test wheel
133 | run: |
134 | cd dist
135 | ls -lah
136 | pip install *.whl
137 | python -c "import nerfacc; print('nerfacc:', nerfacc.__version__)"
138 | cd ..
139 | shell: bash # `ls -lah` does not exist in windows powershell
140 |
141 | - name: Configure AWS
142 | uses: aws-actions/configure-aws-credentials@v2
143 | with:
144 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
145 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
146 | aws-region: us-west-2
147 |
148 | - name: Upload wheel
149 | run: |
150 | aws s3 sync dist s3://nerfacc-bucket/whl/torch-${{ matrix.torch-version }}_${{ matrix.cuda-version }} --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers
151 |
152 | update_aws_listing:
153 | needs: [wheel]
154 | runs-on: ubuntu-latest
155 | environment: production
156 |
157 | steps:
158 | - uses: actions/checkout@v3
159 |
160 | - name: Set up Python
161 | uses: actions/setup-python@v4
162 | with:
163 | python-version: 3.9
164 |
165 | - name: Upgrade pip
166 | run: |
167 | pip install boto3
168 |
169 | - name: Update AWS listing
170 | run: |
171 | python scripts/run_aws_listing.py \
172 | --bucket="nerfacc-bucket" \
173 | --region="us-west-2"
174 | env:
175 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
176 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
177 |
--------------------------------------------------------------------------------
/.github/workflows/code_checks.yml:
--------------------------------------------------------------------------------
1 | name: Core Tests.
2 |
3 | on:
4 | push:
5 | branches: [master]
6 | pull_request:
7 | branches: [master]
8 |
9 | permissions:
10 | contents: read
11 |
12 | jobs:
13 | build:
14 | runs-on: ubuntu-latest
15 |
16 | steps:
17 | - uses: actions/checkout@v3
18 | - name: Set up Python 3.9
19 | uses: actions/setup-python@v4
20 | with:
21 | python-version: "3.9"
22 | - name: Install dependencies
23 | run: |
24 | pip install isort==5.10.1 black[jupyter]==22.3.0
25 | - name: Run isort
26 | run: isort docs/ nerfacc/ scripts/ examples/ tests/ --profile black --skip examples/pycolmap --line-length 80 --check
27 | - name: Run Black
28 | run: black docs/ nerfacc/ scripts/ examples/ tests/ --exclude examples/pycolmap --line-length 80 --check
29 | # - name: Python Pylint
30 | # run: |
31 | # pylint nerfacc/ tests/ scripts/ examples/
32 |
--------------------------------------------------------------------------------
/.github/workflows/cuda/Linux-env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Took from https://github.com/pyg-team/pyg-lib/
4 |
5 | case ${1} in
6 | cu118)
7 | export CUDA_HOME=/usr/local/cuda-11.8
8 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
9 | export PATH=${CUDA_HOME}/bin:${PATH}
10 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
11 | ;;
12 | cu117)
13 | export CUDA_HOME=/usr/local/cuda-11.7
14 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
15 | export PATH=${CUDA_HOME}/bin:${PATH}
16 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
17 | ;;
18 | cu116)
19 | export CUDA_HOME=/usr/local/cuda-11.6
20 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
21 | export PATH=${CUDA_HOME}/bin:${PATH}
22 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
23 | ;;
24 | cu115)
25 | export CUDA_HOME=/usr/local/cuda-11.5
26 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
27 | export PATH=${CUDA_HOME}/bin:${PATH}
28 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
29 | ;;
30 | cu113)
31 | export CUDA_HOME=/usr/local/cuda-11.3
32 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
33 | export PATH=${CUDA_HOME}/bin:${PATH}
34 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6"
35 | ;;
36 | cu102)
37 | export CUDA_HOME=/usr/local/cuda-10.2
38 | export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
39 | export PATH=${CUDA_HOME}/bin:${PATH}
40 | export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5"
41 | ;;
42 | *)
43 | ;;
44 | esac
--------------------------------------------------------------------------------
/.github/workflows/cuda/Linux.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Took from https://github.com/pyg-team/pyg-lib/
4 |
5 | OS=ubuntu2004
6 |
7 | case ${1} in
8 | cu118)
9 | CUDA=11.8
10 | APT_KEY=${OS}-${CUDA/./-}-local
11 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.0-520.61.05-1_amd64.deb
12 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.0/local_installers
13 | ;;
14 | cu117)
15 | CUDA=11.7
16 | APT_KEY=${OS}-${CUDA/./-}-local
17 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.1-515.65.01-1_amd64.deb
18 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.1/local_installers
19 | ;;
20 | cu116)
21 | CUDA=11.6
22 | APT_KEY=${OS}-${CUDA/./-}-local
23 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.2-510.47.03-1_amd64.deb
24 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.2/local_installers
25 | ;;
26 | cu115)
27 | CUDA=11.5
28 | APT_KEY=${OS}-${CUDA/./-}-local
29 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.2-495.29.05-1_amd64.deb
30 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.2/local_installers
31 | ;;
32 | cu113)
33 | CUDA=11.3
34 | APT_KEY=${OS}-${CUDA/./-}-local
35 | FILENAME=cuda-repo-${APT_KEY}_${CUDA}.0-465.19.01-1_amd64.deb
36 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.0/local_installers
37 | ;;
38 | cu102)
39 | CUDA=10.2
40 | APT_KEY=${CUDA/./-}-local-${CUDA}.89-440.33.01
41 | FILENAME=cuda-repo-${OS}-${APT_KEY}_1.0-1_amd64.deb
42 | URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}/Prod/local_installers
43 | ;;
44 | *)
45 | echo "Unrecognized CUDA_VERSION=${1}"
46 | exit 1
47 | ;;
48 | esac
49 |
50 | wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin
51 | sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
52 | wget -nv ${URL}/${FILENAME}
53 | sudo dpkg -i ${FILENAME}
54 |
55 | if [ "${1}" = "cu117" ] || [ "${1}" = "cu118" ]; then
56 | sudo cp /var/cuda-repo-${APT_KEY}/cuda-*-keyring.gpg /usr/share/keyrings/
57 | else
58 | sudo apt-key add /var/cuda-repo-${APT_KEY}/7fa2af80.pub
59 | fi
60 |
61 | sudo apt-get update
62 | sudo apt-get -y install cuda
63 |
64 | rm -f ${FILENAME}
--------------------------------------------------------------------------------
/.github/workflows/cuda/Windows-env.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Took from https://github.com/pyg-team/pyg-lib/
4 |
5 | case ${1} in
6 | cu118)
7 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.8
8 | PATH=${CUDA_HOME}/bin:$PATH
9 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
10 | export TORCH_CUDA_ARCH_LIST="6.0+PTX"
11 | ;;
12 | cu117)
13 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.7
14 | PATH=${CUDA_HOME}/bin:$PATH
15 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
16 | export TORCH_CUDA_ARCH_LIST="6.0+PTX"
17 | ;;
18 | cu116)
19 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.6
20 | PATH=${CUDA_HOME}/bin:$PATH
21 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
22 | export TORCH_CUDA_ARCH_LIST="6.0+PTX"
23 | ;;
24 | cu115)
25 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.5
26 | PATH=${CUDA_HOME}/bin:$PATH
27 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
28 | export TORCH_CUDA_ARCH_LIST="6.0+PTX"
29 | ;;
30 | cu113)
31 | CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3
32 | PATH=${CUDA_HOME}/bin:$PATH
33 | PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
34 | export TORCH_CUDA_ARCH_LIST="6.0+PTX"
35 | ;;
36 | *)
37 | ;;
38 | esac
--------------------------------------------------------------------------------
/.github/workflows/cuda/Windows.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Took from https://github.com/pyg-team/pyg-lib/
4 |
5 | # Install NVIDIA drivers, see:
6 | # https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
7 | curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
8 | 7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
9 |
10 | case ${1} in
11 | cu118)
12 | CUDA_SHORT=11.8
13 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
14 | CUDA_FILE=cuda_${CUDA_SHORT}.0_522.06_windows.exe
15 | ;;
16 | cu117)
17 | CUDA_SHORT=11.7
18 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.1/local_installers
19 | CUDA_FILE=cuda_${CUDA_SHORT}.1_516.94_windows.exe
20 | ;;
21 | cu116)
22 | CUDA_SHORT=11.3
23 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
24 | CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
25 | ;;
26 | cu115)
27 | CUDA_SHORT=11.3
28 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
29 | CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
30 | ;;
31 | cu113)
32 | CUDA_SHORT=11.3
33 | CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.0/local_installers
34 | CUDA_FILE=cuda_${CUDA_SHORT}.0_465.89_win10.exe
35 | ;;
36 | *)
37 | echo "Unrecognized CUDA_VERSION=${1}"
38 | exit 1
39 | ;;
40 | esac
41 |
42 | curl -k -L "${CUDA_URL}/${CUDA_FILE}" --output "${CUDA_FILE}"
43 | echo ""
44 | echo "Installing from ${CUDA_FILE}..."
45 | PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} thrust_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
46 | echo "Done!"
47 | rm -f "${CUDA_FILE}"
--------------------------------------------------------------------------------
/.github/workflows/doc.yml:
--------------------------------------------------------------------------------
1 | name: Docs
2 | on:
3 | push:
4 | branches: [main]
5 | pull_request:
6 | branches: [main]
7 | workflow_dispatch:
8 |
9 | permissions:
10 | contents: write
11 | jobs:
12 | docs:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v3
16 | with:
17 | submodules: 'recursive'
18 | - uses: actions/setup-python@v3
19 | with:
20 | python-version: '3.9'
21 | - name: Install dependencies
22 | run: |
23 | pip install -r docs/requirements.txt
24 | pip install torch --index-url https://download.pytorch.org/whl/cpu
25 | BUILD_NO_CUDA=1 pip install .
26 | - name: Sphinx build
27 | run: |
28 | sphinx-build docs/source _build
29 | - name: Deploy
30 | uses: peaceiris/actions-gh-pages@v3
31 | with:
32 | publish_branch: gh-pages
33 | github_token: ${{ secrets.GITHUB_TOKEN }}
34 | publish_dir: _build/
35 | force_orphan: true
36 | cname: www.nerfacc.com
37 | if: github.event_name != 'pull_request'
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | release:
8 | types: [created]
9 | branches: [master]
10 |
11 | jobs:
12 | deploy:
13 | runs-on: ubuntu-latest
14 | environment: production
15 |
16 | steps:
17 | - uses: actions/checkout@v3
18 | - name: Set up Python
19 | uses: actions/setup-python@v4
20 | with:
21 | python-version: '3.7'
22 | - name: Install dependencies
23 | run: |
24 | python -m pip install build twine
25 | - name: Strip unsupported tags in README
26 | run: |
27 | sed -i '//,//d' README.md
28 | - name: Build and publish
29 | env:
30 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
31 | run: |
32 | BUILD_NO_CUDA=1 python -m build
33 | twine upload --username __token__ --password $PYPI_TOKEN dist/*
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Visual Studio Code configs.
2 | .vscode/
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | # lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # pyenv
79 | .python-version
80 |
81 | # celery beat schedule file
82 | celerybeat-schedule
83 |
84 | # SageMath parsed files
85 | *.sage.py
86 |
87 | # Environments
88 | .env
89 | .venv
90 | env/
91 | venv/
92 | ENV/
93 | env.bak/
94 | venv.bak/
95 |
96 | # Spyder project settings
97 | .spyderproject
98 | .spyproject
99 |
100 | # Rope project settings
101 | .ropeproject
102 |
103 | # mkdocs documentation
104 | /site
105 |
106 | # mypy
107 | .mypy_cache/
108 |
109 | .DS_Store
110 |
111 | # Direnv config.
112 | .envrc
113 |
114 | # line_profiler
115 | *.lprof
116 |
117 | # vscode
118 | .vsocde
119 |
120 | outputs/
121 | data
122 |
123 | benchmarks/*
124 | !benchmarks/barf
125 | !benchmarks/kplanes
126 | !benchmarks/tensorf
127 | !benchmarks/tineuvox
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "examples/pycolmap"]
2 | path = examples/pycolmap
3 | url = https://github.com/rmbrualla/pycolmap.git
4 |
5 | [submodule "benchmarks/tensorf"]
6 | path = benchmarks/tensorf
7 | url = https://github.com/liruilong940607/tensorf.git
8 | branch = nerfacc
9 |
10 | [submodule "benchmarks/barf"]
11 | path = benchmarks/barf
12 | url = https://github.com/liruilong940607/barf.git
13 | branch = nerfacc
14 |
15 | [submodule "benchmarks/tineuvox"]
16 | path = benchmarks/tineuvox
17 | url = https://github.com/liruilong940607/tineuvox.git
18 | branch = nerfacc
19 |
20 | [submodule "benchmarks/kplanes"]
21 | path = benchmarks/kplanes
22 | url = https://github.com/liruilong940607/kplanes.git
23 | branch = nerfacc
24 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v2.3.0
4 | hooks:
5 | - id: end-of-file-fixer
6 | - id: trailing-whitespace
7 | - id: check-yaml
8 | - id: check-merge-conflict
9 | - id: requirements-txt-fixer
10 | - repo: https://github.com/psf/black
11 | rev: 22.10.0
12 | hooks:
13 | - id: black
14 | language_version: python3.8.12
15 | args: # arguments to configure black
16 | - --line-length=80
17 |
18 | - repo: https://github.com/pycqa/isort
19 | rev: 5.10.1
20 | hooks:
21 | - id: isort
22 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-20.04
5 | tools:
6 | python: "3.9"
7 |
8 | sphinx:
9 | fail_on_warning: true
10 | configuration: docs/source/conf.py
11 |
12 | python:
13 | install:
14 | # Equivalent to 'pip install .'
15 | - method: pip
16 | path: .
17 | # Equivalent to 'pip install -r docs/requirements.txt'
18 | - requirements: docs/requirements.txt
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
2 | # Change Log
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](http://keepachangelog.com/)
6 | and this project adheres to [Semantic Versioning](http://semver.org/).
7 |
8 | ## [0.5.0] - 2023-04-04
9 |
10 | This is a major upgrade in which 90% of the code has been rewritten. In this version
11 | we achieves:
12 |
13 | 
14 |
15 | Links:
16 | - Documentation: https://www.nerfacc.com/en/v0.5.0/
17 | - ArXiv Report: Coming Soon.
18 |
19 | Methodologies:
20 | - Upgrade Occupancy Grid to support multiple levels.
21 | - Support Proposal Network from Mip-NeRF 360.
22 | - Update examples on unbounded scenes to use Multi-level Occupancy Grid or Proposal Network.
23 | - Contraction for Occupancy Grid is no longer supported due to it's inefficiency for ray traversal.
24 |
25 | API Changes:
26 | - [Changed] `OccupancyGrid()` -> `OccGridEstimator()`.
27 | - [Added] Argument `levels=1` for multi-level support.
28 | - [Added] Function `self.sampling()` that does basically the same thing with the old `nerfacc.ray_marching`.
29 | - [Renamed] Function `self.every_n_step()` -> `self.update_every_n_steps()`
30 | - [Added] `PropNetEstimator()`. With functions `self.sampling()`, `self.update_every_n_steps()`
31 | and `self.compute_loss()`.
32 | - [Removed] `ray_marching()`. Ray marching is now implemented through calling `sampling()` of
33 | the `OccGridEstimator()` / `PropNetEstimator()`.
34 | - [Changed] `ray_aabb_intersect()` now supports multiple aabb, and supports new argument `near_plane`, `far_plane`, `miss_value`.
35 | - [Changed] `render_*_from_*()`. The input shape changes from `(all_samples, 1)` to `(all_samples)`. And the function will returns all intermediate results so it might be a tuple.
36 | - [Changed] `rendering()`. The input shape changes from `(all_samples, 1)` to `(all_samples)`, including the shape assumption for the `rgb_sigma_fn` and `rgb_alpha_fn`. Be aware of this shape change.
37 | - [Changed] `accumulate_along_rays()`. The shape of the `weights` in the inputs should be `(all_samples)` now.
38 | - [Removed] `unpack_info()`, `pack_data()`, `unpack_data()` are temporally removed due to in-compatibility
39 | with the new backend implementation. Will add them back later.
40 | - [Added] Some basic functions that support both batched tensor and flattened tensor: `inclusive_prod()`, `inclusive_sum()`, `exclusive_prod()`, `exclusive_sum()`, `importance_sampling()`, `searchsorted()`.
41 |
42 | Examples & Benchmarks:
43 | - More benchmarks and examples. See folder `examples/` and `benchmarks/`.
44 |
45 | ## [0.3.5] - 2023-02-23
46 |
47 | A stable version that achieves:
48 | - The vanilla Nerf model with 8-layer MLPs can be trained to better quality (+0.5 PNSR) in 1 hour rather than days as in the paper.
49 | - The Instant-NGP Nerf model can be trained to equal quality in 4.5 minutes, comparing to the official pure-CUDA implementation.
50 | - The D-Nerf model for dynamic objects can also be trained in 1 hour rather than 2 days as in the paper, and with better quality (+~2.5 PSNR).
51 | - Both bounded and unbounded scenes are supported.
52 |
53 | Links:
54 | - Documentation: https://www.nerfacc.com/en/v0.3.5/
55 | - ArXiv Report: https://arxiv.org/abs/2210.04847v2/
56 |
57 | Methodologies:
58 | - Single resolution `nerfacc.OccupancyGrid` for synthetic scenes.
59 | - Contraction methods `nerfacc.ContractionType` for unbounded scenes.
60 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Ruilong Li
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | THIS SOFTWARE AND/OR DATA WAS DEPOSITED IN THE BAIR OPEN RESEARCH COMMONS
24 | REPOSITORY ON 05/08/2023.
25 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include nerfacc/cuda/csrc/include/*
2 | include nerfacc/cuda/csrc/*
3 |
4 | include nerfacc/_cuda/csrc/include/*
5 | include nerfacc/_cuda/csrc/*
6 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | pytorch_sphinx_theme @ git+https://github.com/liruilong940607/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
2 | sphinx==5.2.1
3 | sphinx-copybutton==0.5.0
4 | sphinx-design==0.2.0
--------------------------------------------------------------------------------
/docs/source/_static/css/readthedocs.css:
--------------------------------------------------------------------------------
1 | .header-logo {
2 | background-image: url("../images/logo4x.png");
3 | background-size: 156px 35px;
4 | height: 35px;
5 | width: 156px;
6 | }
7 | code {
8 | word-break: normal;
9 | }
--------------------------------------------------------------------------------
/docs/source/_static/images/coding/gpu_util.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/coding/gpu_util.png
--------------------------------------------------------------------------------
/docs/source/_static/images/illustration_occgrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/illustration_occgrid.png
--------------------------------------------------------------------------------
/docs/source/_static/images/illustration_propnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/illustration_propnet.png
--------------------------------------------------------------------------------
/docs/source/_static/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/logo.png
--------------------------------------------------------------------------------
/docs/source/_static/images/logo4x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/logo4x.png
--------------------------------------------------------------------------------
/docs/source/_static/images/plot_occgrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/plot_occgrid.png
--------------------------------------------------------------------------------
/docs/source/_static/images/plot_propnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/plot_propnet.png
--------------------------------------------------------------------------------
/docs/source/_static/images/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/docs/source/_static/images/teaser.jpg
--------------------------------------------------------------------------------
/docs/source/apis/estimators.rst:
--------------------------------------------------------------------------------
1 | .. _`Estimators`:
2 |
3 | Estimators
4 | ===================================
5 |
6 | .. currentmodule:: nerfacc
7 |
8 | .. autoclass:: OccGridEstimator
9 | :members:
10 |
11 | .. autoclass:: PropNetEstimator
12 | :members:
13 |
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.accumulate_along_rays.rst:
--------------------------------------------------------------------------------
1 | nerfacc.accumulate\_along\_rays
2 | ===============================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: accumulate_along_rays
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.exclusive_prod.rst:
--------------------------------------------------------------------------------
1 | nerfacc.exclusive\_prod
2 | =======================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: exclusive_prod
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.exclusive_sum.rst:
--------------------------------------------------------------------------------
1 | nerfacc.exclusive\_sum
2 | ======================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: exclusive_sum
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.importance_sampling.rst:
--------------------------------------------------------------------------------
1 | nerfacc.importance\_sampling
2 | ============================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: importance_sampling
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.inclusive_prod.rst:
--------------------------------------------------------------------------------
1 | nerfacc.inclusive\_prod
2 | =======================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: inclusive_prod
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.inclusive_sum.rst:
--------------------------------------------------------------------------------
1 | nerfacc.inclusive\_sum
2 | ======================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: inclusive_sum
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.pack_info.rst:
--------------------------------------------------------------------------------
1 | nerfacc.pack\_info
2 | ==================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: pack_info
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.ray_aabb_intersect.rst:
--------------------------------------------------------------------------------
1 | nerfacc.ray\_aabb\_intersect
2 | ============================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: ray_aabb_intersect
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_transmittance_from_alpha.rst:
--------------------------------------------------------------------------------
1 | nerfacc.render\_transmittance\_from\_alpha
2 | ==========================================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: render_transmittance_from_alpha
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_transmittance_from_density.rst:
--------------------------------------------------------------------------------
1 | nerfacc.render\_transmittance\_from\_density
2 | ============================================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: render_transmittance_from_density
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_visibility_from_alpha.rst:
--------------------------------------------------------------------------------
1 | nerfacc.render\_visibility\_from\_alpha
2 | =======================================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: render_visibility_from_alpha
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_visibility_from_density.rst:
--------------------------------------------------------------------------------
1 | nerfacc.render\_visibility\_from\_density
2 | =========================================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: render_visibility_from_density
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_weight_from_alpha.rst:
--------------------------------------------------------------------------------
1 | nerfacc.render\_weight\_from\_alpha
2 | ===================================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: render_weight_from_alpha
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.render_weight_from_density.rst:
--------------------------------------------------------------------------------
1 | nerfacc.render\_weight\_from\_density
2 | =====================================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: render_weight_from_density
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.searchsorted.rst:
--------------------------------------------------------------------------------
1 | nerfacc.searchsorted
2 | ====================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: searchsorted
--------------------------------------------------------------------------------
/docs/source/apis/generated/nerfacc.traverse_grids.rst:
--------------------------------------------------------------------------------
1 | nerfacc.traverse\_grids
2 | =======================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: traverse_grids
--------------------------------------------------------------------------------
/docs/source/apis/rendering.rst:
--------------------------------------------------------------------------------
1 | Rendering
2 | ===================================
3 |
4 | .. currentmodule:: nerfacc
5 |
6 | .. autofunction:: rendering
7 |
--------------------------------------------------------------------------------
/docs/source/apis/utils.rst:
--------------------------------------------------------------------------------
1 | Utils
2 | ===================================
3 |
4 | Below are the basic functions that supports sampling and rendering.
5 |
6 | .. currentmodule:: nerfacc
7 |
8 | .. autosummary::
9 | :nosignatures:
10 | :toctree: generated/
11 |
12 | inclusive_prod
13 | exclusive_prod
14 | inclusive_sum
15 | exclusive_sum
16 |
17 | pack_info
18 |
19 | render_visibility_from_alpha
20 | render_visibility_from_density
21 | render_weight_from_alpha
22 | render_weight_from_density
23 | render_transmittance_from_alpha
24 | render_transmittance_from_density
25 | accumulate_along_rays
26 |
27 | importance_sampling
28 | searchsorted
29 |
30 | ray_aabb_intersect
31 | traverse_grids
32 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | import pytorch_sphinx_theme
2 |
3 | __version__ = None
4 | exec(open("../../nerfacc/version.py", "r").read())
5 |
6 | # -- Project information
7 |
8 | project = "nerfacc"
9 | copyright = "2022, Ruilong"
10 | author = "Ruilong"
11 |
12 | release = __version__
13 |
14 | # -- General configuration
15 |
16 | extensions = [
17 | "sphinx.ext.napoleon",
18 | "sphinx.ext.duration",
19 | "sphinx.ext.doctest",
20 | "sphinx.ext.autodoc",
21 | "sphinx.ext.autosummary",
22 | "sphinx.ext.intersphinx",
23 | ]
24 |
25 | intersphinx_mapping = {
26 | "python": ("https://docs.python.org/3/", None),
27 | "sphinx": ("https://www.sphinx-doc.org/en/master/", None),
28 | }
29 | intersphinx_disabled_domains = ["std"]
30 |
31 | templates_path = ["_templates"]
32 |
33 | # -- Options for HTML output
34 |
35 | # html_theme = "furo"
36 |
37 | html_theme = "pytorch_sphinx_theme"
38 | html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
39 | html_static_path = ["_static"]
40 | html_css_files = ["css/readthedocs.css"]
41 |
42 | # Ignore >>> when copying code
43 | copybutton_prompt_text = r">>> |\.\.\. "
44 | copybutton_prompt_is_regexp = True
45 |
46 | # Theme options are theme-specific and customize the look and feel of a theme
47 | # further. For a list of options available for each theme, see the
48 | # documentation.
49 | html_theme_options = {
50 | # The target url that the logo directs to. Unset to do nothing
51 | "logo_url": "https://www.nerfacc.com/",
52 | # "menu" is a list of dictionaries where you can specify the content and the
53 | # behavior of each item in the menu. Each item can either be a link or a
54 | # dropdown menu containing a list of links.
55 | "menu": [
56 | # A link
57 | {
58 | "name": "GitHub",
59 | "url": "https://github.com/nerfstudio-project/nerfacc",
60 | },
61 | # A dropdown menu
62 | # {
63 | # "name": "Projects",
64 | # "children": [
65 | # # A vanilla dropdown item
66 | # {
67 | # "name": "nerfstudio",
68 | # "url": "https://docs.nerf.studio/",
69 | # "description": "The all-in-one repo for NeRFs",
70 | # },
71 | # ],
72 | # # Optional, determining whether this dropdown menu will always be
73 | # # highlighted.
74 | # # "active": True,
75 | # },
76 | ],
77 | }
78 | # html_theme_options = {
79 | # "canonical_url": "",
80 | # "analytics_id": "",
81 | # "logo_only": False,
82 | # "display_version": True,
83 | # "prev_next_buttons_location": "bottom",
84 | # "style_external_links": False,
85 | # # Toc options
86 | # "collapse_navigation": True,
87 | # "sticky_navigation": True,
88 | # "navigation_depth": 4,
89 | # "includehidden": True,
90 | # "titles_only": False
91 | # }
92 |
93 | # -- Options for EPUB output
94 | epub_show_urls = "footnote"
95 |
96 | # typehints
97 | autodoc_typehints = "description"
98 |
--------------------------------------------------------------------------------
/docs/source/examples/camera.rst:
--------------------------------------------------------------------------------
1 | Camera Optimization NeRFs
2 | ===================================
3 |
4 | Performance Overview
5 | --------------------
6 |
7 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+------------------------+------------------------+
8 | | Methods | Dataset | Training Time :math:`\downarrow` | PSNR :math:`\uparrow` | LPIPS :math:`\downarrow` | E_R :math:`\downarrow` | E_T :math:`\downarrow` |
9 | +======================+================+==================================+=======================+==========================+========================+========================+
10 | | BARF `[1]`_ | NeRF-Synthetic | 586min | 28.83 | 0.054 | 0.19 | 0.74 |
11 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+------------------------+------------------------+
12 | | *+nerfacc (occgrid)* | | 130min | 30.11 | 0.044 | 0.07 | 0.35 |
13 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+------------------------+------------------------+
14 |
15 | Implementation Details
16 | ----------------------
17 |
18 | .. toctree::
19 | :glob:
20 | :maxdepth: 1
21 |
22 | camera/*
23 |
24 | .. _`[1]`: https://arxiv.org/abs/2104.06405
25 |
--------------------------------------------------------------------------------
/docs/source/examples/camera/barf.rst:
--------------------------------------------------------------------------------
1 | BARF
2 | ====================
3 |
4 | In this example we showcase how to plug the nerfacc library into the *official* codebase
5 | of `BARF `_. See
6 | `our forked repo `_
7 | for details.
8 |
9 |
10 | Benchmark: NeRF-Synthetic Dataset
11 | ---------------------------------
12 | *updated on 2023-04-04 with nerfacc==0.5.0*
13 |
14 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti.
15 |
16 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
17 | | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN |
18 | | | | | | | | | | | |
19 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
20 | | BARF | 31.16 | 23.87 | 26.28 | 34.48 | 28.4 | 27.86 | 31.07 | 27.55 | 28.83 |
21 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
22 | | training time | 9.5hrs| 9.5hrs| 9.2hrs | 9.3hrs|12.3hrs| 9.3hrs| 9.3hrs| 9.5hrs| 9.8hrs|
23 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
24 | | camera errors (R) | 0.105 | 0.047 | 0.085 | 0.226 | 0.071 | 0.846 | 0.068 | 0.089 | 0.192 |
25 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
26 | | camera errors (T) | 0.0043| 0.0021| 0.0040 | 0.0120| 0.0026| 0.0272| 0.0025| 0.0044| 0.0074|
27 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
28 | | Ours (occ) | 32.25 | 24.77 | 27.73 | 35.84 | 29.98 | 28.83 | 32.84 | 28.62 | 30.11 |
29 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
30 | | training time | 1.5hrs| 2.0hrs| 2.0hrs | 2.3hrs| 2.2hrs| 1.9hrs| 2.2hrs| 2.3hrs| 2.0hrs|
31 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
32 | | camera errors (R) | 0.081 | 0.036 | 0.056 | 0.171 | 0.058 | 0.039 | 0.039 | 0.079 | 0.070 |
33 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
34 | | camera errors (T) | 0.0038| 0.0019| 0.0031 | 0.0106| 0.0021| 0.0013| 0.0014| 0.0041| 0.0035|
35 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
36 |
--------------------------------------------------------------------------------
/docs/source/examples/dynamic.rst:
--------------------------------------------------------------------------------
1 | Dynamic NeRFs
2 | ===================================
3 |
4 |
5 | The :class:`nerfacc.PropNetEstimator` can natually work with dynamic NeRFs. To make the
6 | :class:`nerfacc.OccGridEstimator` also work with dynamic NeRFs, we need to make some compromises.
7 | In these examples, we use the :class:`nerfacc.OccGridEstimator` to estimate the
8 | `maximum` opacity at each area `over all the timestamps`. This allows us to share the same estimator
9 | across all the timestamps, including those timestamps that are not in the training set.
10 | In other words, we use it to cache the union of the occupancy at all timestamps.
11 | It is not optimal but still makes the rendering very efficient if the motion is not crazyly significant.
12 |
13 |
14 | Performance Overview
15 | --------------------
16 | *updated on 2023-04-04*
17 |
18 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
19 | | Methods | Dataset | Training Time :math:`\downarrow` | PSNR :math:`\uparrow` | LPIPS :math:`\downarrow` |
20 | +======================+===========+==================================+=======================+==========================+
21 | | TiNeuVox `[1]`_ | HyperNeRF | 56.3min | 24.19 | 0.425 |
22 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
23 | | *+nerfacc (occgrid)* | | 33.0min | 24.19 | 0.434 |
24 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
25 | | *+nerfacc (propnet)* | | 34.3min | 24.26 | 0.398 |
26 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
27 | | TiNeuVox `[1]`_ | D-NeRF | 11.8min | 31.14 | 0.050 |
28 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
29 | | *+nerfacc (occgrid)* | | 4.2min | 31.75 | 0.038 |
30 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
31 | | K-Planes `[2]`_ | D-NeRF | 63.9min | 30.28 | 0.043 |
32 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
33 | | *+nerfacc (occgrid)* | | 38.8min | 30.35 | 0.042 |
34 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
35 | | T-NeRF `[3]`_ | D-NeRF | 20hours | 28.78 | 0.069 |
36 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
37 | | *+nerfacc (occgrid)* | | 58min | 32.22 | 0.040 |
38 | +----------------------+-----------+----------------------------------+-----------------------+--------------------------+
39 |
40 | Implementation Details
41 | ----------------------
42 |
43 | .. toctree::
44 | :glob:
45 | :maxdepth: 1
46 |
47 | dynamic/*
48 |
49 | |
50 |
51 | 3rd-Party Use Cases
52 | -------------------
53 |
54 | - `Representing Volumetric Videos as Dynamic MLP Maps, CVPR 2023 `_.
55 |
56 | .. _`[1]`: https://arxiv.org/abs/2205.15285
57 | .. _`[2]`: https://arxiv.org/abs/2301.10241
58 | .. _`[3]`: https://arxiv.org/abs/2011.13961
--------------------------------------------------------------------------------
/docs/source/examples/dynamic/kplanes.rst:
--------------------------------------------------------------------------------
1 | K-Planes
2 | ====================
3 |
4 | In this example we showcase how to plug the nerfacc library into the *official* codebase
5 | of `K-Planes `_. See
6 | `our forked repo `_
7 | for details.
8 |
9 |
10 | Benchmark: D-NeRF Dataset
11 | ---------------------------------
12 | *updated on 2023-04-04 with nerfacc==0.5.0*
13 |
14 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti.
15 |
16 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
17 | | PSNR | bouncing | hell | hook | jumping | lego | mutant | standup | trex | MEAN |
18 | | | balls | warrior | | jacks | | | | | |
19 | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+
20 | | K-Planes | 39.10 | 23.95 | 27.76 | 31.11 | 25.18 | 32.44 | 32.51 | 30.25 | 30.29 |
21 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
22 | | training time | 68min | 70min | 70min | 70min | 70min | 71min | 72min | 71min | 70min |
23 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
24 | | Ours (occ) | 38.95 | 24.00 | 27.74 | 30.46 | 25.25 | 32.58 | 32.84 | 30.49 | 30.29 |
25 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
26 | | training time | 41min | 41min | 40min | 40min | 39min | 39min | 40min | 38min | 40min |
27 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
28 |
--------------------------------------------------------------------------------
/docs/source/examples/dynamic/tineuvox.rst:
--------------------------------------------------------------------------------
1 | .. _`TiNeuVox Example`:
2 |
3 | TiNeuVox
4 | ====================
5 |
6 | In this example we showcase how to plug the nerfacc library into the *official* codebase
7 | of `TiNeuVox `_. See
8 | `our forked repo `_
9 | for details.
10 |
11 |
12 | Benchmark: D-NeRF Dataset
13 | ---------------------------------
14 | *updated on 2023-04-04 with nerfacc==0.5.0*
15 |
16 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti.
17 |
18 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
19 | | PSNR | bouncing | hell | hook | jumping | lego | mutant | standup | trex | MEAN |
20 | | | balls | warrior | | jacks | | | | | |
21 | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+
22 | | TiNeuVox | 39.37 | 27.05 | 29.61 | 32.92 | 24.32 | 31.47 | 33.59 | 30.01 | 31.04 |
23 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
24 | | Training Time | 832s | 829s | 833s | 841s | 824s | 833s | 827s | 840s | 833s |
25 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
26 | | Ours (occ) | 40.56 | 27.17 | 31.35 | 33.44 | 25.17 | 34.05 | 35.35 | 32.29 | 32.42 |
27 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
28 | | Training Time | 378s | 302s | 342s | 325s | 355s | 360s | 346s | 362s | 346s |
29 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
30 |
31 |
32 | Benchmark: HyperNeRF Dataset
33 | ---------------------------------
34 | *updated on 2023-04-04 with nerfacc==0.5.0*
35 |
36 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti.
37 |
38 | +----------------------+----------+---------+-------+-------------+-------+
39 | | PSNR | 3dprinter| broom |chicken| peel-banana | MEAN |
40 | | | | | | | |
41 | +======================+==========+=========+=======+=============+=======+
42 | | TiNeuVox | 22.77 | 21.30 | 28.29 | 24.50 | 24.22 |
43 | +----------------------+----------+---------+-------+-------------+-------+
44 | | Training Time | 3253s | 2811s | 3933s | 2705s | 3175s |
45 | +----------------------+----------+---------+-------+-------------+-------+
46 | | Ours (occ) | 22.72 | 21.27 | 28.27 | 24.54 | 24.20 |
47 | +----------------------+----------+---------+-------+-------------+-------+
48 | | Training Time | 2265s | 2221s | 2157s | 2101s | 2186s |
49 | +----------------------+----------+---------+-------+-------------+-------+
50 | | Ours (prop) | 22.75 | 21.17 | 28.27 | 24.97 | 24.29 |
51 | +----------------------+----------+---------+-------+-------------+-------+
52 | | Training Time | 2307s | 2281s | 2267s | 2510s | 2341s |
53 | +----------------------+----------+---------+-------+-------------+-------+
54 |
--------------------------------------------------------------------------------
/docs/source/examples/dynamic/tnerf.rst:
--------------------------------------------------------------------------------
1 | .. _`T-NeRF Example`:
2 |
3 | T-NeRF
4 | ====================
5 | See code `examples/train_mlp_dnerf.py` at our `github repository`_ for details.
6 |
7 | Radiance Field
8 | --------------
9 | Here we implement a very basic time-conditioned NeRF (T-NeRF) model (`examples/radiance_fields/mlp.py`)
10 | for dynamic scene reconstruction.
11 | The implementation is mostly follow the T-NeRF described in the `D-NeRF`_ paper, with a 8-layer-MLP
12 | for the radiance field and a 4-layer-MLP for the warping field. The only major difference is that
13 | we reduce the max frequency of the positional encoding from 10 to 4, to respect the fact that the
14 | motion of the object is relatively smooth.
15 |
16 |
17 | Benchmarks: D-NeRF Dataset
18 | ---------------------------
19 | *updated on 2022-10-08*
20 |
21 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU.
22 | The training memory footprint is about 11GB.
23 |
24 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
25 | | PSNR | bouncing | hell | hook | jumping | lego | mutant | standup | trex | MEAN |
26 | | | balls | warrior | | jacks | | | | | |
27 | +======================+==========+=========+=======+=========+=======+========+=========+=======+=======+
28 | | D-NeRF (~ days) | 32.80 | 25.02 | 29.25 | 32.80 | 21.64 | 31.29 | 32.79 | 31.75 | 29.67 |
29 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
30 | | Ours (~ 1 hr) | 39.49 | 25.58 | 31.86 | 32.73 | 24.32 | 35.55 | 35.90 | 32.33 | 32.22 |
31 | +----------------------+----------+---------+-------+---------+-------+--------+---------+-------+-------+
32 |
33 | .. _`D-NeRF`: https://arxiv.org/abs/2011.13961
34 | .. _`github repository`: https://github.com/nerfstudio-project/nerfacc/
35 |
36 |
--------------------------------------------------------------------------------
/docs/source/examples/static.rst:
--------------------------------------------------------------------------------
1 | Static NeRFs
2 | ===================================
3 |
4 | Performance Overview
5 | --------------------
6 |
7 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
8 | | Methods | Dataset | Training Time :math:`\downarrow` | PSNR :math:`\uparrow` | LPIPS :math:`\downarrow` |
9 | +======================+================+==================================+=======================+==========================+
10 | | TensoRF `[1]`_ | Tanks&Temple | 19min | 28.11 | 0.167 |
11 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
12 | | *+nerfacc (occgrid)* | | 14min | 28.06 | 0.174 |
13 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
14 | | TensoRF `[1]`_ | NeRF-Synthetic | 10.3min | 32.73 | 0.049 |
15 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
16 | | *+nerfacc (occgrid)* | | 7.1min | 32.52 | 0.054 |
17 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
18 | | NeRF `[2]`_ | NeRF-Synthetic | days | 31.00 | 0.047 |
19 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
20 | | *+nerfacc (occgrid)* | | 1hr | 31.55 | 0.072 |
21 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
22 | | Instant-NGP `[3]`_ | NeRF-Synthetic | 4.4min | 32.35 | |
23 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
24 | | *+nerfacc (occgrid)* | | 4.5min | 33.11 | 0.053 |
25 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
26 | | *+nerfacc (propnet)* | | 4.0min | 31.76 | 0.062 |
27 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
28 | | Instant-NGP `[3]`_ | Mip-NeRF 360 | 5.3min | 25.93 | |
29 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
30 | | *+nerfacc (occgrid)* | | 5.0min | 26.41 | 0.353 |
31 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
32 | | *+nerfacc (propnet)* | | 4.9min | 27.58 | 0.292 |
33 | +----------------------+----------------+----------------------------------+-----------------------+--------------------------+
34 |
35 | Implementation Details
36 | ----------------------
37 |
38 | .. toctree::
39 | :glob:
40 | :maxdepth: 1
41 |
42 | static/*
43 |
44 | |
45 |
46 | 3rd-Party Use Cases
47 | -------------------
48 |
49 | - `nerfstudio `_: A collaboration friendly studio for NeRFs.
50 | - `modelscope `_: A collection of deep-learning algorithms.
51 | - `sdfstudio `_: A Unified Framework for Surface Reconstruction.
52 | - `instant-nsr-pl `_: Train NeuS in 10min.
53 |
54 | .. _`[1]`: https://arxiv.org/abs/2203.09517
55 | .. _`[2]`: https://arxiv.org/abs/2003.08934
56 | .. _`[3]`: https://arxiv.org/abs/2201.05989
--------------------------------------------------------------------------------
/docs/source/examples/static/nerf.rst:
--------------------------------------------------------------------------------
1 | .. _`Vanilla NeRF Example`:
2 |
3 | Vanilla NeRF
4 | ====================
5 | See code `examples/train_mlp_nerf.py` at our `github repository`_ for details.
6 |
7 |
8 | Radiance Field
9 | --------------
10 | We follow the original `NeRF`_ paper to implement a 8-layer-MLP radiance field (`examples/radiance_fields/mlp.py`)
11 | with positional encoding.
12 |
13 | .. note::
14 | The vanilla Nerf paper uses two MLPs for course-to-fine sampling. Instead here we only use a
15 | single MLP with more samples (1024). Both ways share the same spirit to do dense sampling
16 | around the surface. Our fast rendering inheritly skip samples away from the surface
17 | so we can simplly increase the number of samples with a single MLP, to achieve the same goal
18 | with the coarse-to-fine sampling, without runtime or memory issue.
19 |
20 |
21 | Benchmark: Nerf-Synthetic Dataset
22 | ---------------------------------
23 | *updated on 2022-10-08*
24 |
25 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU.
26 | The training memory footprint is about 10GB.
27 |
28 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
29 | | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN |
30 | | | | | | | | | | | |
31 | +======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
32 | | NeRF (~ days) | 32.54 | 32.91 | 29.62 | 33.00 | 36.18 | 30.13 | 25.01 | 28.65 | 31.00 |
33 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
34 | | Ours (~ 1 hr) | 33.69 | 33.76 | 29.73 | 33.32 | 35.80 | 32.52 | 25.39 | 28.18 | 31.55 |
35 | +----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
36 |
37 | .. _`github repository`: https://github.com/nerfstudio-project/nerfacc/
38 | .. _`NeRF`: https://arxiv.org/abs/2003.08934
39 |
--------------------------------------------------------------------------------
/docs/source/examples/static/ngp.rst:
--------------------------------------------------------------------------------
1 | .. _`Instant-NGP Example`:
2 |
3 | Instant-NGP
4 | ====================
5 |
6 | See code `examples/train_ngp_nerf_occ.py` and `examples/train_ngp_nerf_prop.py` at our
7 | `github repository`_ for details.
8 |
9 |
10 | Radiance Field
11 | --------------
12 | We follow the `Instant-NGP`_ paper to implement the radiance field (`examples/radiance_fields/ngp.py`),
13 | and aligns the hyperparameters (e.g., hashencoder, mlp) with the paper. It is build on top of the
14 | `tiny-cuda-nn`_ library.
15 |
16 |
17 | Benchmark: Nerf-Synthetic Dataset
18 | ---------------------------------
19 | *updated on 2023-04-04 with nerfacc==0.5.0*
20 |
21 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU.
22 | The training memory footprint is about 3GB.
23 |
24 | .. note::
25 |
26 | The Instant-NGP paper makes use of the alpha channel in the images to apply random background
27 | augmentation during training. For fair comparision, we rerun their code with a constant white
28 | background during both training and testing. Also it is worth to mention that we didn't strictly
29 | follow the training receipe in the Instant-NGP paper, such as the learning rate schedule etc, as
30 | the purpose of this benchmark is to showcase instead of reproducing the paper.
31 |
32 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
33 | | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN |
34 | | | | | | | | | | | |
35 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
36 | |Instant-NGP 35k steps | 35.87 | 36.22 | 29.08 | 35.10 | 37.48 | 30.61 | 23.85 | 30.62 | 32.35 |
37 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
38 | | training time | 309s | 258s | 256s | 316s | 292s | 207s | 218s | 250s | 263s |
39 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
40 | |Ours (occ) 20k steps | 35.67 | 36.85 | 29.60 | 35.71 | 37.37 | 33.95 | 25.44 | 30.29 | 33.11 |
41 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
42 | | training time | 288s | 260s | 253s | 326s | 272s | 249s | 252s | 251s | 269s |
43 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
44 | |Ours (prop) 20k steps | 34.04 | 34.56 | 28.76 | 34.21 | 36.44 | 31.41 | 24.81 | 29.85 | 31.76 |
45 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
46 | | training time | 225s | 235s | 235s | 240s | 239s | 242s | 258s | 247s | 240s |
47 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
48 |
49 |
50 | Benchmark: Mip-NeRF 360 Dataset
51 | ---------------------------------
52 | *updated on 2023-04-04 with nerfacc==0.5.0*
53 |
54 | Our experiments are conducted on a single NVIDIA TITAN RTX GPU.
55 |
56 | .. note::
57 | `Ours (prop)` combines the proposal network (:class:`nerfacc.PropNetEstimator`) with the
58 | Instant-NGP radiance field. This is exactly what the `Nerfacto`_ model is doing in the
59 | `nerfstudio`_ project. In fact, the hyperparameters for `Ours (prop)` in this experiment
60 | are aligned with the `Nerfacto`_ model.
61 |
62 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
63 | | PSNR |Bicycle| Garden| Stump | Bonsai|Counter|Kitchen| Room | MEAN |
64 | | | | | | | | | | |
65 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+
66 | |NeRF++ (~days) | 22.64 | 24.32 | 23.34 | 29.15 | 26.38 | 27.80 | 28.87 | 26.21 |
67 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
68 | |Mip-NeRF 360 (~days) | 24.37 | 26.98 | 26.40 | 33.46 | 29.55 | 32.23 | 31.63 | 29.23 |
69 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
70 | |Instant-NGP 35k steps | 22.40 | 24.86 | 23.17 | 24.41 | 27.38 | 29.07 | 30.24 | 25.93 |
71 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
72 | | training time | 301s | 339s | 295s | 279s | 339s | 366s | 317s | 319s |
73 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
74 | |Ours (occ) 20k steps | 22.40 | 23.94 | 22.98 | 30.09 | 26.84 | 28.03 | 30.60 | 26.41 |
75 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
76 | | training time | 277s | 302s | 299s | 278s | 315s | 331s | 301s | 300s |
77 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
78 | |Ours (prop) 20k steps | 23.23 | 25.42 | 25.24 | 30.71 | 26.74 | 30.70 | 30.99 | 27.58 |
79 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
80 | | training time | 276s | 293s | 291s | 291s | 291s | 295s | 287s | 289s |
81 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+
82 |
83 |
84 | .. _`github repository`: https://github.com/nerfstudio-project/nerfacc/
85 | .. _`Instant-NGP`: https://arxiv.org/abs/2201.05989
86 | .. _`tiny-cuda-nn`: https://github.com/NVlabs/tiny-cuda-nn
87 | .. _`Nerfacto`: https://docs.nerf.studio/nerfology/methods/nerfacto.html
88 | .. _`nerfstudio`: https://docs.nerf.studio/
--------------------------------------------------------------------------------
/docs/source/examples/static/tensorf.rst:
--------------------------------------------------------------------------------
1 | .. _`TensoRF Example`:
2 |
3 | TensoRF
4 | ====================
5 |
6 | In this example we showcase how to plug the nerfacc library into the *official* codebase
7 | of `TensoRF `_. See
8 | `our forked repo `_
9 | for details.
10 |
11 |
12 | Benchmark: NeRF-Synthetic Dataset
13 | ---------------------------------
14 | *updated on 2023-04-04 with nerfacc==0.5.0*
15 |
16 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti.
17 |
18 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
19 | | PSNR | Lego | Mic |Materials| Chair |Hotdog | Ficus | Drums | Ship | MEAN |
20 | | | | | | | | | | | |
21 | +=======================+=======+=======+=========+=======+=======+=======+=======+=======+=======+
22 | | TensoRF | 35.14 | 25.70 | 33.69 | 37.03 | 36.04 | 29.77 | 34.35 | 30.12 | 32.73 |
23 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
24 | | training time | 504s | 522s | 633s | 648s | 584s | 824s | 464s | 759s | 617s |
25 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
26 | | Ours (occ) | 35.05 | 25.70 | 33.54 | 36.99 | 35.62 | 29.76 | 34.08 | 29.39 | 32.52 |
27 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
28 | | training time | 310s | 312s | 463s | 433s | 363s | 750s | 303s | 468s | 425s |
29 | +-----------------------+-------+-------+---------+-------+-------+-------+-------+-------+-------+
30 |
31 |
32 | Benchmark: Tanks&Temples Dataset
33 | ---------------------------------
34 | *updated on 2023-04-04 with nerfacc==0.5.0*
35 |
36 | Our experiments are conducted on a single NVIDIA GeForce RTX 2080 Ti.
37 |
38 | +-----------------------+-------+-------------+--------+-------+-------+
39 | | PSNR | Barn | Caterpillar | Family | Truck | MEAN |
40 | | | | | | | |
41 | +=======================+=======+=============+========+=======+=======+
42 | | TensoRF | 26.88 | 25.48 | 33.48 | 26.59 | 28.11 |
43 | +-----------------------+-------+-------------+--------+-------+-------+
44 | | training time | 24min | 19min | 15min | 18min | 19min |
45 | +-----------------------+-------+-------------+--------+-------+-------+
46 | | Ours (occ) | 26.74 | 25.64 | 33.16 | 26.70 | 28.06 |
47 | +-----------------------+-------+-------------+--------+-------+-------+
48 | | training time | 19min | 15min | 11min | 13min | 14min |
49 | +-----------------------+-------+-------------+--------+-------+-------+
50 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | NerfAcc Documentation
2 | ===================================
3 |
4 | NerfAcc is a PyTorch Nerf acceleration toolbox for both training and inference. It focus on
5 | efficient sampling in the volumetric rendering pipeline of radiance fields, which is
6 | universal and plug-and-play for most of the NeRFs.
7 | With minimal modifications to the existing codebases, Nerfacc provides significant speedups
8 | in training various recent NeRF papers.
9 | **And it is pure Python interface with flexible APIs!**
10 |
11 | |
12 |
13 | .. image:: _static/images/teaser.jpg
14 | :align: center
15 |
16 | |
17 |
18 | | Github: https://github.com/nerfstudio-project/nerfacc
19 | | Paper: https://arxiv.org/pdf/2305.04966.pdf
20 | | Authors: `Ruilong Li`_, `Hang Gao`_, `Matthew Tancik`_, `Angjoo Kanazawa`_
21 |
22 | .. note::
23 |
24 | Though this repo only contains examples for single scene optimization,
25 | we believe generalizable NeRFs across multiple scenes can also be accelerate with our
26 | :class:`nerfacc.PropNetEstimator`. Examples will be added soon.
27 |
28 |
29 | Installation:
30 | -------------
31 |
32 | **Dependence**: Please install `Pytorch`_ first.
33 |
34 | The easist way is to install from PyPI. In this way it will build the CUDA code **on the first run** (JIT).
35 |
36 | .. code-block:: console
37 |
38 | $ pip install nerfacc
39 |
40 | Or install from source. In this way it will build the CUDA code during installation.
41 |
42 | .. code-block:: console
43 |
44 | $ pip install git+https://github.com/nerfstudio-project/nerfacc.git
45 |
46 | We also provide pre-built wheels covering major combinations of Pytorch + CUDA versions.
47 | See our `Github README`_ for what we support.
48 |
49 | .. code-block:: console
50 |
51 | // e.g. torch 1.13.0 + CUDA 11.7
52 | $ pip install nerfacc -f https://nerfacc-bucket.s3.us-west-2.amazonaws.com/whl/torch-1.13.0_cu117.html
53 |
54 |
55 | Usage:
56 | -------------
57 |
58 | The idea of NerfAcc is to perform efficient volumetric sampling with a computationally cheap estimator to discover surfaces.
59 | So NerfAcc can work with any user-defined radiance field. To plug the NerfAcc rendering pipeline into your code and enjoy
60 | the acceleration, you only need to define two functions with your radience field.
61 |
62 | - `sigma_fn`: Compute density at each sample. It will be used by the estimator
63 | (e.g., :class:`nerfacc.OccGridEstimator`, :class:`nerfacc.PropNetEstimator`) to discover surfaces.
64 | - `rgb_sigma_fn`: Compute color and density at each sample. It will be used by
65 | :func:`nerfacc.rendering` to conduct differentiable volumetric rendering. This function
66 | will receive gradients to update your radiance field.
67 |
68 | An simple example is like this:
69 |
70 | .. code-block:: python
71 |
72 | import torch
73 | from torch import Tensor
74 | import nerfacc
75 |
76 | radiance_field = ... # network: a NeRF model
77 | rays_o: Tensor = ... # ray origins. (n_rays, 3)
78 | rays_d: Tensor = ... # ray normalized directions. (n_rays, 3)
79 | optimizer = ... # optimizer
80 |
81 | estimator = nerfacc.OccGridEstimator(...)
82 |
83 | def sigma_fn(
84 | t_starts: Tensor, t_ends:Tensor, ray_indices: Tensor
85 | ) -> Tensor:
86 | """ Define how to query density for the estimator."""
87 | t_origins = rays_o[ray_indices] # (n_samples, 3)
88 | t_dirs = rays_d[ray_indices] # (n_samples, 3)
89 | positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0
90 | sigmas = radiance_field.query_density(positions)
91 | return sigmas # (n_samples,)
92 |
93 | def rgb_sigma_fn(
94 | t_starts: Tensor, t_ends: Tensor, ray_indices: Tensor
95 | ) -> Tuple[Tensor, Tensor]:
96 | """ Query rgb and density values from a user-defined radiance field. """
97 | t_origins = rays_o[ray_indices] # (n_samples, 3)
98 | t_dirs = rays_d[ray_indices] # (n_samples, 3)
99 | positions = t_origins + t_dirs * (t_starts + t_ends)[:, None] / 2.0
100 | rgbs, sigmas = radiance_field(positions, condition=t_dirs)
101 | return rgbs, sigmas # (n_samples, 3), (n_samples,)
102 |
103 | # Efficient Raymarching:
104 | # ray_indices: (n_samples,). t_starts: (n_samples,). t_ends: (n_samples,).
105 | ray_indices, t_starts, t_ends = estimator.sampling(
106 | rays_o, rays_d, sigma_fn=sigma_fn, near_plane=0.2, far_plane=1.0,
107 | early_stop_eps=1e-4, alpha_thre=1e-2,
108 | )
109 |
110 | # Differentiable Volumetric Rendering.
111 | # colors: (n_rays, 3). opaicity: (n_rays, 1). depth: (n_rays, 1).
112 | color, opacity, depth, extras = nerfacc.rendering(
113 | t_starts, t_ends, ray_indices, n_rays=rays_o.shape[0], rgb_sigma_fn=rgb_sigma_fn
114 | )
115 |
116 | # Optimize: Both the network and rays will receive gradients
117 | optimizer.zero_grad()
118 | loss = F.mse_loss(color, color_gt)
119 | loss.backward()
120 | optimizer.step()
121 |
122 |
123 | Links:
124 | -------------
125 |
126 | .. toctree::
127 | :glob:
128 | :maxdepth: 1
129 | :caption: Methodology
130 |
131 | methodology/*
132 |
133 | .. toctree::
134 | :glob:
135 | :maxdepth: 1
136 | :caption: Python API
137 |
138 | apis/*
139 |
140 | .. toctree::
141 | :glob:
142 | :maxdepth: 1
143 | :caption: Example Usages and Benchmarks
144 |
145 | examples/*
146 |
147 | .. toctree::
148 | :maxdepth: 1
149 | :caption: Projects
150 |
151 | nerfstudio
152 | sdfstudio
153 | instant-nsr-pl
154 |
155 | .. _`vanilla Nerf`: https://arxiv.org/abs/2003.08934
156 | .. _`Instant-NGP Nerf`: https://arxiv.org/abs/2201.05989
157 | .. _`D-Nerf`: https://arxiv.org/abs/2011.13961
158 | .. _`MipNerf360`: https://arxiv.org/abs/2111.12077
159 | .. _`pixel-Nerf`: https://arxiv.org/abs/2012.02190
160 | .. _`Nerf++`: https://arxiv.org/abs/2010.07492
161 |
162 | .. _`Ruilong Li`: https://www.liruilong.cn/
163 | .. _`Hang Gao`: https://hangg7.com/
164 | .. _`Matthew Tancik`: https://www.matthewtancik.com/
165 | .. _`Angjoo Kanazawa`: https://people.eecs.berkeley.edu/~kanazawa/
166 |
167 | .. _`Github README`: https://github.com/nerfstudio-project/nerfacc#readme
168 |
169 | .. _`PyTorch`: https://pytorch.org/get-started/locally/
170 |
--------------------------------------------------------------------------------
/docs/source/methodology/coding.rst:
--------------------------------------------------------------------------------
1 | .. _`Efficient Coding`:
2 |
3 | Efficient Coding
4 | ================
5 |
6 | Monitor the GPU Utilization
7 | ----------------------------
8 |
9 | The rule of thumb is to maximize the computation power you have. When working with GPU,
10 | that means to maximize the percentage of GPU kernels are running at the same time. This
11 | can be monitored by the GPU utilization (last column) reported by the `nvidia-smi` command:
12 |
13 | .. image:: ../_static/images/coding/gpu_util.png
14 | :align: center
15 |
16 | |
17 |
18 | If the GPU utilization is less than 100%, it means there are some GPU kernels are idling
19 | from time to time (if not all the time). This is of course not good for the performance.
20 | Under the hood, Pytorch will try to use all the GPU kernels to parellelize the computation,
21 | but in most of the case you don't see 100%. Why is that?
22 |
23 |
24 | The first reason is simplly that there might not be too much to parellelize, for which
25 | we don't have too much to do other than increasing batch size. For example
26 |
27 | .. code-block:: python
28 |
29 | # Only 1000 threads are running at the same time to create this tensor.
30 | # So we see 28% GPU utilization.
31 | while True: torch.zeros((1000), device="cuda")
32 | # Now we see 100% GPU utilization.
33 | while True: torch.zeros((10000000), device="cuda")
34 |
35 | The second reason, which is more common, is that there is *CPU-GPU synchronization*
36 | happening in the code. For example:
37 |
38 | .. code-block:: python
39 |
40 | data = torch.rand((10000000), device="cuda")
41 | mask = data > 0.5
42 | ids = torch.where(mask)[0]
43 | assert torch.all(data[mask] == data[ids])
44 |
45 | # 100% GPU utilization.
46 | while True: data[ids]
47 | # 95% GPU utilization.
48 | while True: data[mask]
49 |
50 | Besides, if there are many cpu operations in the pipeline, such as data loading and
51 | preprocessing, it might also cause the GPU utilization to be low. In this case, you
52 | can try to use `torch.utils.data.DataLoader` to overlap the data processing time
53 | with the GPU computation time.
54 |
55 | Avoid CPU-GPU Synchronization
56 | -----------------------------
57 |
58 | In the above example, if you time your code, you will see a significant difference:
59 |
60 | .. code-block:: python
61 |
62 | # 177 µs ± 15.4 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
63 | %timeit data[ids]
64 | # 355 µs ± 466 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
65 | %timeit data[mask]
66 |
67 | Explanation: In this case, the mask operation needs to decide the size of the output tensor, which
68 | lives on CPU, based on the number of `True` values in the mask that lives on GPU. So
69 | a synchronization is required. The index selection operation, on the other hand,
70 | already knows the size of the output tensor based the size of the `ids`, so no
71 | synchronization is required.
72 |
73 | You may argue in this case we can't really improve things because the `ids` are computed
74 | from the `mask`, which would require the synchronization anyway (`torch.where`). However,
75 | in many cases we can avoid the synchronization by carefully writing the code. For example:
76 |
77 | .. code-block:: python
78 |
79 | # no sync. 67.3 µs ± 5.01 ns per loop
80 | while True: torch.zeros((10000000), device="cuda")
81 | # sync. 13.7 ms ± 320 µs per loop
82 | while True: torch.zeros((10000000)).to("cuda")
83 |
84 | Operations that require synchronization including `torch.where`, `tensor.item()`,
85 | `print(tensor)`, `tensor.to(device)`, `torch.nonzero` etc. Just imagine those functions
86 | have a inherit `torch.cuda.synchronize()` called under the hood. See the
87 | `official guide `_
88 | if with more interests.
89 |
90 | In our library, :func:`nerfacc.traverse_grids` is a function that requires synchronization,
91 | because it needs to know the size of the output tensor when traversing the grids. As a result,
92 | sampling with :class:`nerfacc.OccGridEstimator` also requires synchronization. But there is
93 | no walkaround in this case so just be aware of it.
94 |
95 |
96 | Profiling
97 | -----------------------------
98 |
99 | There are plenty of tools for profiling. My personal favorite is
100 | `line_profiler `_ which will give you *per-line* runtime
101 | of a function with a simple decorator `@profile`. It is very useful for finding where the bottleneck
102 | is in your code. It is worth to note that due to the asynchronized nature of Pytorch code, you would
103 | need to set `CUDA_LAUNCH_BLOCKING=1` when profiling your code (no matter which profiling tool you are using).
104 | This variable will force CPU-GPU synchronization for every torch function (equavalent to add
105 | `torch.cuda.synchronize()` everywhere), which can reveal the true runtime of each line of code.
106 | And of course, with `CUDA_LAUNCH_BLOCKING=1` you would get slower total runtime, so don't forget to
107 | remove it when you are done profiling.
--------------------------------------------------------------------------------
/docs/source/methodology/sampling.rst:
--------------------------------------------------------------------------------
1 | .. _`Efficient Sampling`:
2 |
3 | Efficient Sampling
4 | ===================================
5 |
6 | Importance Sampling via Transmittance Estimator.
7 | -------------------------------------------------
8 |
9 | Efficient sampling is a well-explored problem in Graphics, wherein the
10 | emphasis is on identifying regions that make the most significant
11 | contribution to the final rendering. This objective is generally accomplished
12 | through importance sampling, which aims to distribute samples based on the
13 | probability density function (PDF), denoted as :math:`p(t)`, between the range
14 | of :math:`[t_n, t_f]`. By computing the cumulative distribution function (CDF)
15 | through integration, *i.e.*, :math:`F(t) = \int_{t_n}^{t} p(v)\,dv`,
16 | samples are generated using the inverse transform sampling method:
17 |
18 | .. math::
19 |
20 | t = F^{-1}(u) \quad \text{where} \quad u \sim \mathcal{U}[0,1].
21 |
22 | In volumetric rendering, the contribution of each sample to the final
23 | rendering is expressed by the accumulation weights :math:`T(t)\sigma(t)`:
24 |
25 | .. math::
26 |
27 | C(\mathbf{r}) = \int_{t_n}^{t_f} T(t)\,\sigma(t)\,c(t)\,dt
28 |
29 | Hence, the PDF for volumetric rendering is :math:`p(t) = T(t)\sigma(t)`
30 | and the CDF is:
31 |
32 | .. math::
33 |
34 | F(t) = \int_{t_n}^{t} T(v)\sigma(v)\,dv = 1 - T(t)
35 |
36 | Therefore, inverse sampling the CDF :math:`F(t)` is equivalent to inverse
37 | sampling the transmittance :math:`T(t)`. A transmittance estimator is sufficient
38 | to determine the optimal samples. We refer readers to the
39 | `SIGGRAPH 2017 Course: Production Volume Rendering`_ for more details about this
40 | concept if within interests.
41 |
42 | Occupancy Grid Estimator.
43 | ----------------------------
44 |
45 | .. image:: ../_static/images/illustration_occgrid.png
46 | :class: float-right
47 | :width: 200px
48 |
49 | The idea of Occupancy Grid is to cache the density in the scene with a binaraized voxel grid. When
50 | sampling, the ray marches through the grid with a preset step sizes, and skip the empty regions by querying
51 | the voxel grid. Intuitively, the binaraized voxel grid is an *estimator* of the radiance field, with much
52 | faster readout. This technique is proposed in `Instant-NGP`_ with highly optimized CUDA implementations.
53 | More formally, The estimator describes a binaraized density distribution :math:`\hat{\sigma}` along
54 | the ray with a conservative threshold :math:`\tau`:
55 |
56 | .. math::
57 |
58 | \hat{\sigma}(t_i) = \mathbb{1}\big[\sigma(t_i) > \tau\big]
59 |
60 | Consequently, the piece-wise constant PDF can be expressed as
61 |
62 | .. math::
63 |
64 | p(t_i) = \hat{\sigma}(t_i) / \sum_{j=1}^{n} \hat{\sigma}(t_j)
65 |
66 | and the piece-wise linear transmittance estimator is
67 |
68 | .. math::
69 |
70 | T(t_i) = 1 - \sum_{j=1}^{i-1}\hat{\sigma}(t_j) / \sum_{j=1}^{n} \hat{\sigma}(t_j)
71 |
72 | See the figure below for an illustration.
73 |
74 | .. rst-class:: clear-both
75 |
76 | .. image:: ../_static/images/plot_occgrid.png
77 | :align: center
78 |
79 | |
80 |
81 | In `nerfacc`, this is implemented via the :class:`nerfacc.OccGridEstimator` class.
82 |
83 | Proposal Network Estimator.
84 | -----------------------------
85 |
86 | .. image:: ../_static/images/illustration_propnet.png
87 | :class: float-right
88 | :width: 200px
89 |
90 | Another type of approach is to directly estimate the PDF along the ray with discrete samples.
91 | In `vanilla NeRF`_, the coarse MLP is trained using volumetric rendering loss to output a set of
92 | densities :math:`{\sigma(t_i)}`. This allows for the creation of a piece-wise constant PDF:
93 |
94 | .. math::
95 |
96 | p(t_i) = \sigma(t_i)\exp(-\sigma(t_i)\,dt)
97 |
98 | and a piece-wise linear transmittance estimator:
99 |
100 | .. math::
101 |
102 | T(t_i) = \exp(-\sum_{j=1}^{i-1}\sigma(t_i)\,dt)
103 |
104 | This approach was further improved in `Mip-NeRF 360`_ with a PDF matching loss, which allows for
105 | the use of a much smaller MLP in the coarse level, namely Proposal Network, to speedup the
106 | PDF construction.
107 |
108 | See the figure below for an illustration.
109 |
110 | .. image:: ../_static/images/plot_propnet.png
111 | :align: center
112 |
113 | |
114 |
115 | In `nerfacc`, this is implemented via the :class:`nerfacc.PropNetEstimator` class.
116 |
117 | Which Estimator to use?
118 | -----------------------
119 | - :class:`nerfacc.OccGridEstimator` is a generally more efficient when most of the space in the scene is empty, such as in the case of `NeRF-Synthetic`_ dataset. But it still places samples within occluded areas that contribute little to the final rendering (e.g., the last sample in the above illustration).
120 |
121 | - :class:`nerfacc.PropNetEstimator` generally provide more accurate transmittance estimation, enabling samples to concentrate more on high-contribution areas (e.g., surfaces) and to be more spread out in both empty and occluded regions. Also this method works nicely on unbouned scenes as it does not require a preset bounding box of the scene. Thus datasets like `Mip-NeRF 360`_ are better suited with this estimator.
122 |
123 | .. _`SIGGRAPH 2017 Course: Production Volume Rendering`: https://graphics.pixar.com/library/ProductionVolumeRendering/paper.pdf
124 | .. _`Instant-NGP`: https://arxiv.org/abs/2201.05989
125 | .. _`Mip-NeRF 360`: https://arxiv.org/abs/2111.12077
126 | .. _`vanilla NeRF`: https://arxiv.org/abs/2003.08934
127 | .. _`NeRF-Synthetic`: https://arxiv.org/abs/2003.08934
--------------------------------------------------------------------------------
/examples/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/examples/datasets/__init__.py
--------------------------------------------------------------------------------
/examples/datasets/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 |
5 | import collections
6 |
7 | Rays = collections.namedtuple("Rays", ("origins", "viewdirs"))
8 |
9 |
10 | def namedtuple_map(fn, tup):
11 | """Apply `fn` to each element of `tup` and cast to `tup`'s namedtuple."""
12 | return type(tup)(*(None if x is None else fn(x) for x in tup))
13 |
--------------------------------------------------------------------------------
/examples/radiance_fields/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/examples/radiance_fields/__init__.py
--------------------------------------------------------------------------------
/examples/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
2 | opencv-python
3 | imageio
4 | numpy
5 | tqdm
6 | scipy
7 | lpips
--------------------------------------------------------------------------------
/examples/train_mlp_nerf.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 |
5 | import argparse
6 | import pathlib
7 | import time
8 |
9 | import imageio
10 | import numpy as np
11 | import torch
12 | import torch.nn.functional as F
13 | import tqdm
14 | from datasets.nerf_synthetic import SubjectLoader
15 | from lpips import LPIPS
16 | from radiance_fields.mlp import VanillaNeRFRadianceField
17 |
18 | from examples.utils import (
19 | NERF_SYNTHETIC_SCENES,
20 | render_image_with_occgrid,
21 | set_random_seed,
22 | )
23 | from nerfacc.estimators.occ_grid import OccGridEstimator
24 |
25 | device = "cuda:0"
26 | set_random_seed(42)
27 |
28 | parser = argparse.ArgumentParser()
29 | parser.add_argument(
30 | "--data_root",
31 | type=str,
32 | default=str(pathlib.Path.cwd() / "data/nerf_synthetic"),
33 | help="the root dir of the dataset",
34 | )
35 | parser.add_argument(
36 | "--train_split",
37 | type=str,
38 | default="train",
39 | choices=["train", "trainval"],
40 | help="which train split to use",
41 | )
42 | parser.add_argument(
43 | "--model_path",
44 | type=str,
45 | default=None,
46 | help="the path of the pretrained model",
47 | )
48 | parser.add_argument(
49 | "--scene",
50 | type=str,
51 | default="lego",
52 | choices=NERF_SYNTHETIC_SCENES,
53 | help="which scene to use",
54 | )
55 | parser.add_argument(
56 | "--test_chunk_size",
57 | type=int,
58 | default=4096,
59 | )
60 | args = parser.parse_args()
61 |
62 | # training parameters
63 | max_steps = 50000
64 | init_batch_size = 1024
65 | target_sample_batch_size = 1 << 16
66 | # scene parameters
67 | aabb = torch.tensor([-1.5, -1.5, -1.5, 1.5, 1.5, 1.5], device=device)
68 | near_plane = 0.0
69 | far_plane = 1.0e10
70 | # model parameters
71 | grid_resolution = 128
72 | grid_nlvl = 1
73 | # render parameters
74 | render_step_size = 5e-3
75 |
76 | # setup the dataset
77 | train_dataset = SubjectLoader(
78 | subject_id=args.scene,
79 | root_fp=args.data_root,
80 | split=args.train_split,
81 | num_rays=init_batch_size,
82 | device=device,
83 | )
84 | test_dataset = SubjectLoader(
85 | subject_id=args.scene,
86 | root_fp=args.data_root,
87 | split="test",
88 | num_rays=None,
89 | device=device,
90 | )
91 |
92 | estimator = OccGridEstimator(
93 | roi_aabb=aabb, resolution=grid_resolution, levels=grid_nlvl
94 | ).to(device)
95 |
96 | # setup the radiance field we want to train.
97 | radiance_field = VanillaNeRFRadianceField().to(device)
98 | optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4)
99 | scheduler = torch.optim.lr_scheduler.MultiStepLR(
100 | optimizer,
101 | milestones=[
102 | max_steps // 2,
103 | max_steps * 3 // 4,
104 | max_steps * 5 // 6,
105 | max_steps * 9 // 10,
106 | ],
107 | gamma=0.33,
108 | )
109 |
110 | lpips_net = LPIPS(net="vgg").to(device)
111 | lpips_norm_fn = lambda x: x[None, ...].permute(0, 3, 1, 2) * 2 - 1
112 | lpips_fn = lambda x, y: lpips_net(lpips_norm_fn(x), lpips_norm_fn(y)).mean()
113 |
114 | if args.model_path is not None:
115 | checkpoint = torch.load(args.model_path)
116 | radiance_field.load_state_dict(checkpoint["radiance_field_state_dict"])
117 | optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
118 | scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
119 | estimator.load_state_dict(checkpoint["estimator_state_dict"])
120 | step = checkpoint["step"]
121 | else:
122 | step = 0
123 |
124 | # training
125 | tic = time.time()
126 | for step in range(max_steps + 1):
127 | radiance_field.train()
128 | estimator.train()
129 |
130 | i = torch.randint(0, len(train_dataset), (1,)).item()
131 | data = train_dataset[i]
132 |
133 | render_bkgd = data["color_bkgd"]
134 | rays = data["rays"]
135 | pixels = data["pixels"]
136 |
137 | def occ_eval_fn(x):
138 | density = radiance_field.query_density(x)
139 | return density * render_step_size
140 |
141 | # update occupancy grid
142 | estimator.update_every_n_steps(
143 | step=step,
144 | occ_eval_fn=occ_eval_fn,
145 | occ_thre=1e-2,
146 | )
147 |
148 | # render
149 | rgb, acc, depth, n_rendering_samples = render_image_with_occgrid(
150 | radiance_field,
151 | estimator,
152 | rays,
153 | # rendering options
154 | near_plane=near_plane,
155 | render_step_size=render_step_size,
156 | render_bkgd=render_bkgd,
157 | )
158 | if n_rendering_samples == 0:
159 | continue
160 |
161 | if target_sample_batch_size > 0:
162 | # dynamic batch size for rays to keep sample batch size constant.
163 | num_rays = len(pixels)
164 | num_rays = int(
165 | num_rays * (target_sample_batch_size / float(n_rendering_samples))
166 | )
167 | train_dataset.update_num_rays(num_rays)
168 |
169 | # compute loss
170 | loss = F.smooth_l1_loss(rgb, pixels)
171 |
172 | optimizer.zero_grad()
173 | loss.backward()
174 | optimizer.step()
175 | scheduler.step()
176 |
177 | if step % 5000 == 0:
178 | elapsed_time = time.time() - tic
179 | loss = F.mse_loss(rgb, pixels)
180 | psnr = -10.0 * torch.log(loss) / np.log(10.0)
181 | print(
182 | f"elapsed_time={elapsed_time:.2f}s | step={step} | "
183 | f"loss={loss:.5f} | psnr={psnr:.2f} | "
184 | f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} | "
185 | f"max_depth={depth.max():.3f} | "
186 | )
187 |
188 | if step > 0 and step % max_steps == 0:
189 | model_save_path = str(pathlib.Path.cwd() / f"mlp_nerf_{step}")
190 | torch.save(
191 | {
192 | "step": step,
193 | "radiance_field_state_dict": radiance_field.state_dict(),
194 | "optimizer_state_dict": optimizer.state_dict(),
195 | "scheduler_state_dict": scheduler.state_dict(),
196 | "estimator_state_dict": estimator.state_dict(),
197 | },
198 | model_save_path,
199 | )
200 |
201 | # evaluation
202 | radiance_field.eval()
203 | estimator.eval()
204 |
205 | psnrs = []
206 | lpips = []
207 | with torch.no_grad():
208 | for i in tqdm.tqdm(range(len(test_dataset))):
209 | data = test_dataset[i]
210 | render_bkgd = data["color_bkgd"]
211 | rays = data["rays"]
212 | pixels = data["pixels"]
213 |
214 | # rendering
215 | rgb, acc, depth, _ = render_image_with_occgrid(
216 | radiance_field,
217 | estimator,
218 | rays,
219 | # rendering options
220 | near_plane=near_plane,
221 | render_step_size=render_step_size,
222 | render_bkgd=render_bkgd,
223 | # test options
224 | test_chunk_size=args.test_chunk_size,
225 | )
226 | mse = F.mse_loss(rgb, pixels)
227 | psnr = -10.0 * torch.log(mse) / np.log(10.0)
228 | psnrs.append(psnr.item())
229 | lpips.append(lpips_fn(rgb, pixels).item())
230 | # if i == 0:
231 | # imageio.imwrite(
232 | # "rgb_test.png",
233 | # (rgb.cpu().numpy() * 255).astype(np.uint8),
234 | # )
235 | # imageio.imwrite(
236 | # "rgb_error.png",
237 | # (
238 | # (rgb - pixels).norm(dim=-1).cpu().numpy() * 255
239 | # ).astype(np.uint8),
240 | # )
241 | psnr_avg = sum(psnrs) / len(psnrs)
242 | lpips_avg = sum(lpips) / len(lpips)
243 | print(f"evaluation: psnr_avg={psnr_avg}, lpips_avg={lpips_avg}")
244 |
--------------------------------------------------------------------------------
/examples/train_mlp_tnerf.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 |
5 | import argparse
6 | import math
7 | import pathlib
8 | import time
9 |
10 | import imageio
11 | import numpy as np
12 | import torch
13 | import torch.nn.functional as F
14 | import tqdm
15 | from datasets.dnerf_synthetic import SubjectLoader
16 | from lpips import LPIPS
17 | from radiance_fields.mlp import TNeRFRadianceField
18 |
19 | from examples.utils import render_image_with_occgrid, set_random_seed
20 | from nerfacc.estimators.occ_grid import OccGridEstimator
21 |
22 | device = "cuda:0"
23 | set_random_seed(42)
24 |
25 | parser = argparse.ArgumentParser()
26 | parser.add_argument(
27 | "--data_root",
28 | type=str,
29 | default=str(pathlib.Path.cwd() / "data/dnerf"),
30 | help="the root dir of the dataset",
31 | )
32 | parser.add_argument(
33 | "--train_split",
34 | type=str,
35 | default="train",
36 | choices=["train"],
37 | help="which train split to use",
38 | )
39 | parser.add_argument(
40 | "--scene",
41 | type=str,
42 | default="lego",
43 | choices=[
44 | # dnerf
45 | "bouncingballs",
46 | "hellwarrior",
47 | "hook",
48 | "jumpingjacks",
49 | "lego",
50 | "mutant",
51 | "standup",
52 | "trex",
53 | ],
54 | help="which scene to use",
55 | )
56 | parser.add_argument(
57 | "--test_chunk_size",
58 | type=int,
59 | default=4096,
60 | )
61 | args = parser.parse_args()
62 |
63 | # training parameters
64 | max_steps = 30000
65 | init_batch_size = 1024
66 | target_sample_batch_size = 1 << 16
67 | # scene parameters
68 | aabb = torch.tensor([-1.5, -1.5, -1.5, 1.5, 1.5, 1.5], device=device)
69 | near_plane = 0.0
70 | far_plane = 1.0e10
71 | # model parameters
72 | grid_resolution = 128
73 | grid_nlvl = 1
74 | # render parameters
75 | render_step_size = 5e-3
76 |
77 | # setup the dataset
78 | train_dataset = SubjectLoader(
79 | subject_id=args.scene,
80 | root_fp=args.data_root,
81 | split=args.train_split,
82 | num_rays=init_batch_size,
83 | device=device,
84 | )
85 | test_dataset = SubjectLoader(
86 | subject_id=args.scene,
87 | root_fp=args.data_root,
88 | split="test",
89 | num_rays=None,
90 | device=device,
91 | )
92 |
93 | estimator = OccGridEstimator(
94 | roi_aabb=aabb, resolution=grid_resolution, levels=grid_nlvl
95 | ).to(device)
96 |
97 | # setup the radiance field we want to train.
98 | radiance_field = TNeRFRadianceField().to(device)
99 | optimizer = torch.optim.Adam(radiance_field.parameters(), lr=5e-4)
100 | scheduler = torch.optim.lr_scheduler.MultiStepLR(
101 | optimizer,
102 | milestones=[
103 | max_steps // 2,
104 | max_steps * 3 // 4,
105 | max_steps * 5 // 6,
106 | max_steps * 9 // 10,
107 | ],
108 | gamma=0.33,
109 | )
110 |
111 | lpips_net = LPIPS(net="vgg").to(device)
112 | lpips_norm_fn = lambda x: x[None, ...].permute(0, 3, 1, 2) * 2 - 1
113 | lpips_fn = lambda x, y: lpips_net(lpips_norm_fn(x), lpips_norm_fn(y)).mean()
114 |
115 | # training
116 | tic = time.time()
117 | for step in range(max_steps + 1):
118 | radiance_field.train()
119 | estimator.train()
120 |
121 | i = torch.randint(0, len(train_dataset), (1,)).item()
122 | data = train_dataset[i]
123 |
124 | render_bkgd = data["color_bkgd"]
125 | rays = data["rays"]
126 | pixels = data["pixels"]
127 | timestamps = data["timestamps"]
128 |
129 | # update occupancy grid
130 | estimator.update_every_n_steps(
131 | step=step,
132 | occ_eval_fn=lambda x: radiance_field.query_opacity(
133 | x, timestamps, render_step_size
134 | ),
135 | occ_thre=1e-2,
136 | )
137 |
138 | # render
139 | rgb, acc, depth, n_rendering_samples = render_image_with_occgrid(
140 | radiance_field,
141 | estimator,
142 | rays,
143 | # rendering options
144 | near_plane=near_plane,
145 | render_step_size=render_step_size,
146 | render_bkgd=render_bkgd,
147 | alpha_thre=0.01 if step > 1000 else 0.00,
148 | # t-nerf options
149 | timestamps=timestamps,
150 | )
151 | if n_rendering_samples == 0:
152 | continue
153 |
154 | if target_sample_batch_size > 0:
155 | # dynamic batch size for rays to keep sample batch size constant.
156 | num_rays = len(pixels)
157 | num_rays = int(
158 | num_rays * (target_sample_batch_size / float(n_rendering_samples))
159 | )
160 | train_dataset.update_num_rays(num_rays)
161 |
162 | # compute loss
163 | loss = F.smooth_l1_loss(rgb, pixels)
164 |
165 | optimizer.zero_grad()
166 | loss.backward()
167 | optimizer.step()
168 | scheduler.step()
169 |
170 | if step % 5000 == 0:
171 | elapsed_time = time.time() - tic
172 | loss = F.mse_loss(rgb, pixels)
173 | psnr = -10.0 * torch.log(loss) / np.log(10.0)
174 | print(
175 | f"elapsed_time={elapsed_time:.2f}s | step={step} | "
176 | f"loss={loss:.5f} | psnr={psnr:.2f} | "
177 | f"n_rendering_samples={n_rendering_samples:d} | num_rays={len(pixels):d} | "
178 | f"max_depth={depth.max():.3f} | "
179 | )
180 |
181 | if step > 0 and step % max_steps == 0:
182 | # evaluation
183 | radiance_field.eval()
184 | estimator.eval()
185 |
186 | psnrs = []
187 | lpips = []
188 | with torch.no_grad():
189 | for i in tqdm.tqdm(range(len(test_dataset))):
190 | data = test_dataset[i]
191 | render_bkgd = data["color_bkgd"]
192 | rays = data["rays"]
193 | pixels = data["pixels"]
194 | timestamps = data["timestamps"]
195 |
196 | # rendering
197 | rgb, acc, depth, _ = render_image_with_occgrid(
198 | radiance_field,
199 | estimator,
200 | rays,
201 | # rendering options
202 | near_plane=near_plane,
203 | render_step_size=render_step_size,
204 | render_bkgd=render_bkgd,
205 | alpha_thre=0.01,
206 | # test options
207 | test_chunk_size=args.test_chunk_size,
208 | # t-nerf options
209 | timestamps=timestamps,
210 | )
211 | mse = F.mse_loss(rgb, pixels)
212 | psnr = -10.0 * torch.log(mse) / np.log(10.0)
213 | psnrs.append(psnr.item())
214 | lpips.append(lpips_fn(rgb, pixels).item())
215 | # if i == 0:
216 | # imageio.imwrite(
217 | # "rgb_test.png",
218 | # (rgb.cpu().numpy() * 255).astype(np.uint8),
219 | # )
220 | # imageio.imwrite(
221 | # "rgb_error.png",
222 | # (
223 | # (rgb - pixels).norm(dim=-1).cpu().numpy() * 255
224 | # ).astype(np.uint8),
225 | # )
226 | psnr_avg = sum(psnrs) / len(psnrs)
227 | lpips_avg = sum(lpips) / len(lpips)
228 | print(f"evaluation: psnr_avg={psnr_avg}, lpips_avg={lpips_avg}")
229 |
--------------------------------------------------------------------------------
/nerfacc/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 |
5 | from .data_specs import RayIntervals, RaySamples
6 | from .estimators.occ_grid import OccGridEstimator
7 | from .estimators.prop_net import PropNetEstimator
8 | from .estimators.vdb import VDBEstimator, traverse_vdbs
9 | from .grid import ray_aabb_intersect, traverse_grids
10 | from .losses import distortion
11 | from .pack import pack_info
12 | from .pdf import importance_sampling, searchsorted
13 | from .scan import exclusive_prod, exclusive_sum, inclusive_prod, inclusive_sum
14 | from .version import __version__
15 | from .volrend import (
16 | accumulate_along_rays,
17 | render_transmittance_from_alpha,
18 | render_transmittance_from_density,
19 | render_visibility_from_alpha,
20 | render_visibility_from_density,
21 | render_weight_from_alpha,
22 | render_weight_from_density,
23 | rendering,
24 | )
25 |
26 | __all__ = [
27 | "__version__",
28 | "inclusive_prod",
29 | "exclusive_prod",
30 | "inclusive_sum",
31 | "exclusive_sum",
32 | "inclusive_prod_cub",
33 | "exclusive_prod_cub",
34 | "inclusive_sum_cub",
35 | "exclusive_sum_cub",
36 | "pack_info",
37 | "render_visibility_from_alpha",
38 | "render_visibility_from_density",
39 | "render_weight_from_alpha",
40 | "render_weight_from_density",
41 | "render_transmittance_from_alpha",
42 | "render_transmittance_from_density",
43 | "accumulate_along_rays",
44 | "rendering",
45 | "importance_sampling",
46 | "searchsorted",
47 | "RayIntervals",
48 | "RaySamples",
49 | "ray_aabb_intersect",
50 | "traverse_grids",
51 | "traverse_vdbs",
52 | "OccGridEstimator",
53 | "PropNetEstimator",
54 | "VDBEstimator",
55 | "distortion",
56 | ]
57 |
--------------------------------------------------------------------------------
/nerfacc/cameras.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 | from typing import Tuple
5 |
6 | import torch
7 | import torch.nn.functional as F
8 | from torch import Tensor
9 |
10 | from . import cuda as _C
11 |
12 |
13 | def opencv_lens_undistortion(
14 | uv: Tensor, params: Tensor, eps: float = 1e-6, iters: int = 10
15 | ) -> Tensor:
16 | """Undistort the opencv distortion.
17 |
18 | Note:
19 | This function is not differentiable to any inputs.
20 |
21 | Args:
22 | uv: (..., 2) UV coordinates.
23 | params: (..., N) or (N) OpenCV distortion parameters. We support
24 | N = 0, 1, 2, 4, 8. If N = 0, we return the input uv directly.
25 | If N = 1, we assume the input is {k1}. If N = 2, we assume the
26 | input is {k1, k2}. If N = 4, we assume the input is {k1, k2, p1, p2}.
27 | If N = 8, we assume the input is {k1, k2, p1, p2, k3, k4, k5, k6}.
28 |
29 | Returns:
30 | (..., 2) undistorted UV coordinates.
31 | """
32 | assert uv.shape[-1] == 2
33 | assert params.shape[-1] in [0, 1, 2, 4, 8]
34 |
35 | if params.shape[-1] == 0:
36 | return uv
37 | elif params.shape[-1] < 8:
38 | params = F.pad(params, (0, 8 - params.shape[-1]), "constant", 0)
39 | assert params.shape[-1] == 8
40 |
41 | batch_shape = uv.shape[:-1]
42 | params = torch.broadcast_to(params, batch_shape + (params.shape[-1],))
43 |
44 | return _C.opencv_lens_undistortion(
45 | uv.contiguous(), params.contiguous(), eps, iters
46 | )
47 |
48 |
49 | def opencv_lens_undistortion_fisheye(
50 | uv: Tensor, params: Tensor, eps: float = 1e-6, iters: int = 10
51 | ) -> Tensor:
52 | """Undistort the opencv distortion of {k1, k2, k3, k4}.
53 |
54 | Note:
55 | This function is not differentiable to any inputs.
56 |
57 | Args:
58 | uv: (..., 2) UV coordinates.
59 | params: (..., 4) or (4) OpenCV distortion parameters.
60 |
61 | Returns:
62 | (..., 2) undistorted UV coordinates.
63 | """
64 | assert uv.shape[-1] == 2
65 | assert params.shape[-1] == 4
66 | batch_shape = uv.shape[:-1]
67 | params = torch.broadcast_to(params, batch_shape + (params.shape[-1],))
68 |
69 | return _C.opencv_lens_undistortion_fisheye(
70 | uv.contiguous(), params.contiguous(), eps, iters
71 | )
72 |
73 |
74 | def _opencv_lens_distortion(uv: Tensor, params: Tensor) -> Tensor:
75 | """The opencv camera distortion of {k1, k2, p1, p2, k3, k4, k5, k6}.
76 |
77 | See https://docs.opencv.org/3.4/d9/d0c/group__calib3d.html for more details.
78 | """
79 | k1, k2, p1, p2, k3, k4, k5, k6 = torch.unbind(params, dim=-1)
80 | s1, s2, s3, s4 = 0, 0, 0, 0
81 | u, v = torch.unbind(uv, dim=-1)
82 | r2 = u * u + v * v
83 | r4 = r2**2
84 | r6 = r4 * r2
85 | ratial = (1 + k1 * r2 + k2 * r4 + k3 * r6) / (
86 | 1 + k4 * r2 + k5 * r4 + k6 * r6
87 | )
88 | fx = 2 * p1 * u * v + p2 * (r2 + 2 * u * u) + s1 * r2 + s2 * r4
89 | fy = 2 * p2 * u * v + p1 * (r2 + 2 * v * v) + s3 * r2 + s4 * r4
90 | return torch.stack([u * ratial + fx, v * ratial + fy], dim=-1)
91 |
92 |
93 | def _opencv_lens_distortion_fisheye(
94 | uv: Tensor, params: Tensor, eps: float = 1e-10
95 | ) -> Tensor:
96 | """The opencv camera distortion of {k1, k2, k3, p1, p2}.
97 |
98 | See https://docs.opencv.org/4.x/db/d58/group__calib3d__fisheye.html for more details.
99 |
100 | Args:
101 | uv: (..., 2) UV coordinates.
102 | params: (..., 4) or (4) OpenCV distortion parameters.
103 |
104 | Returns:
105 | (..., 2) distorted UV coordinates.
106 | """
107 | assert params.shape[-1] == 4, f"Invalid params shape: {params.shape}"
108 | k1, k2, k3, k4 = torch.unbind(params, dim=-1)
109 | u, v = torch.unbind(uv, dim=-1)
110 | r = torch.sqrt(u * u + v * v)
111 | theta = torch.atan(r)
112 | theta_d = theta * (
113 | 1
114 | + k1 * theta**2
115 | + k2 * theta**4
116 | + k3 * theta**6
117 | + k4 * theta**8
118 | )
119 | scale = theta_d / torch.clamp(r, min=eps)
120 | return uv * scale[..., None]
121 |
122 |
123 | @torch.jit.script
124 | def _compute_residual_and_jacobian(
125 | x: Tensor, y: Tensor, xd: Tensor, yd: Tensor, params: Tensor
126 | ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
127 | assert params.shape[-1] == 8
128 |
129 | k1, k2, p1, p2, k3, k4, k5, k6 = torch.unbind(params, dim=-1)
130 |
131 | # let r(x, y) = x^2 + y^2;
132 | # alpha(x, y) = 1 + k1 * r(x, y) + k2 * r(x, y) ^2 + k3 * r(x, y)^3;
133 | # beta(x, y) = 1 + k4 * r(x, y) + k5 * r(x, y) ^2 + k6 * r(x, y)^3;
134 | # d(x, y) = alpha(x, y) / beta(x, y);
135 | r = x * x + y * y
136 | alpha = 1.0 + r * (k1 + r * (k2 + r * k3))
137 | beta = 1.0 + r * (k4 + r * (k5 + r * k6))
138 | d = alpha / beta
139 |
140 | # The perfect projection is:
141 | # xd = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2);
142 | # yd = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2);
143 | #
144 | # Let's define
145 | #
146 | # fx(x, y) = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2) - xd;
147 | # fy(x, y) = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2) - yd;
148 | #
149 | # We are looking for a solution that satisfies
150 | # fx(x, y) = fy(x, y) = 0;
151 | fx = d * x + 2 * p1 * x * y + p2 * (r + 2 * x * x) - xd
152 | fy = d * y + 2 * p2 * x * y + p1 * (r + 2 * y * y) - yd
153 |
154 | # Compute derivative of alpha, beta over r.
155 | alpha_r = k1 + r * (2.0 * k2 + r * (3.0 * k3))
156 | beta_r = k4 + r * (2.0 * k5 + r * (3.0 * k6))
157 |
158 | # Compute derivative of d over [x, y]
159 | d_r = (alpha_r * beta - alpha * beta_r) / (beta * beta)
160 | d_x = 2.0 * x * d_r
161 | d_y = 2.0 * y * d_r
162 |
163 | # Compute derivative of fx over x and y.
164 | fx_x = d + d_x * x + 2.0 * p1 * y + 6.0 * p2 * x
165 | fx_y = d_y * x + 2.0 * p1 * x + 2.0 * p2 * y
166 |
167 | # Compute derivative of fy over x and y.
168 | fy_x = d_x * y + 2.0 * p2 * y + 2.0 * p1 * x
169 | fy_y = d + d_y * y + 2.0 * p2 * x + 6.0 * p1 * y
170 |
171 | return fx, fy, fx_x, fx_y, fy_x, fy_y
172 |
173 |
174 | @torch.jit.script
175 | def _opencv_lens_undistortion(
176 | uv: Tensor, params: Tensor, eps: float = 1e-6, iters: int = 10
177 | ) -> Tensor:
178 | """Same as opencv_lens_undistortion(), but native PyTorch.
179 |
180 | Took from with bug fix and modification.
181 | https://github.com/nerfstudio-project/nerfstudio/blob/ec603634edbd61b13bdf2c598fda8c993370b8f7/nerfstudio/cameras/camera_utils.py
182 | """
183 | assert uv.shape[-1] == 2
184 | assert params.shape[-1] in [0, 1, 2, 4, 8]
185 |
186 | if params.shape[-1] == 0:
187 | return uv
188 | elif params.shape[-1] < 8:
189 | params = F.pad(params, (0, 8 - params.shape[-1]), "constant", 0.0)
190 | assert params.shape[-1] == 8
191 |
192 | # Initialize from the distorted point.
193 | x, y = x0, y0 = torch.unbind(uv, dim=-1)
194 |
195 | zeros = torch.zeros_like(x)
196 | for _ in range(iters):
197 | fx, fy, fx_x, fx_y, fy_x, fy_y = _compute_residual_and_jacobian(
198 | x=x, y=y, xd=x0, yd=y0, params=params
199 | )
200 | denominator = fy_x * fx_y - fx_x * fy_y
201 | mask = torch.abs(denominator) > eps
202 |
203 | x_numerator = fx * fy_y - fy * fx_y
204 | y_numerator = fy * fx_x - fx * fy_x
205 | step_x = torch.where(mask, x_numerator / denominator, zeros)
206 | step_y = torch.where(mask, y_numerator / denominator, zeros)
207 |
208 | x = x + step_x
209 | y = y + step_y
210 |
211 | return torch.stack([x, y], dim=-1)
212 |
--------------------------------------------------------------------------------
/nerfacc/cuda/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 |
5 | from typing import Any, Callable
6 |
7 |
8 | def _make_lazy_cuda_func(name: str) -> Callable:
9 | def call_cuda(*args, **kwargs):
10 | # pylint: disable=import-outside-toplevel
11 | from ._backend import _C
12 |
13 | return getattr(_C, name)(*args, **kwargs)
14 |
15 | return call_cuda
16 |
17 |
18 | # data specs
19 | RaySegmentsSpec = _make_lazy_cuda_func("RaySegmentsSpec")
20 |
21 | # grid
22 | ray_aabb_intersect = _make_lazy_cuda_func("ray_aabb_intersect")
23 | traverse_grids = _make_lazy_cuda_func("traverse_grids")
24 |
25 | # scan
26 | inclusive_sum = _make_lazy_cuda_func("inclusive_sum")
27 | exclusive_sum = _make_lazy_cuda_func("exclusive_sum")
28 | inclusive_prod_forward = _make_lazy_cuda_func("inclusive_prod_forward")
29 | inclusive_prod_backward = _make_lazy_cuda_func("inclusive_prod_backward")
30 | exclusive_prod_forward = _make_lazy_cuda_func("exclusive_prod_forward")
31 | exclusive_prod_backward = _make_lazy_cuda_func("exclusive_prod_backward")
32 |
33 | is_cub_available = _make_lazy_cuda_func("is_cub_available")
34 | inclusive_sum_cub = _make_lazy_cuda_func("inclusive_sum_cub")
35 | exclusive_sum_cub = _make_lazy_cuda_func("exclusive_sum_cub")
36 | inclusive_prod_cub_forward = _make_lazy_cuda_func("inclusive_prod_cub_forward")
37 | inclusive_prod_cub_backward = _make_lazy_cuda_func(
38 | "inclusive_prod_cub_backward"
39 | )
40 | exclusive_prod_cub_forward = _make_lazy_cuda_func("exclusive_prod_cub_forward")
41 | exclusive_prod_cub_backward = _make_lazy_cuda_func(
42 | "exclusive_prod_cub_backward"
43 | )
44 |
45 | # pdf
46 | importance_sampling = _make_lazy_cuda_func("importance_sampling")
47 | searchsorted = _make_lazy_cuda_func("searchsorted")
48 |
49 | # camera
50 | opencv_lens_undistortion = _make_lazy_cuda_func("opencv_lens_undistortion")
51 | opencv_lens_undistortion_fisheye = _make_lazy_cuda_func(
52 | "opencv_lens_undistortion_fisheye"
53 | )
54 |
--------------------------------------------------------------------------------
/nerfacc/cuda/_backend.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 |
5 | import glob
6 | import json
7 | import os
8 | import shutil
9 | from subprocess import DEVNULL, call
10 |
11 | from rich.console import Console
12 | from torch.utils.cpp_extension import _get_build_directory, load
13 |
14 | PATH = os.path.dirname(os.path.abspath(__file__))
15 |
16 |
17 | def cuda_toolkit_available():
18 | """Check if the nvcc is avaiable on the machine."""
19 | try:
20 | call(["nvcc"], stdout=DEVNULL, stderr=DEVNULL)
21 | return True
22 | except FileNotFoundError:
23 | return False
24 |
25 |
26 | def cuda_toolkit_version():
27 | """Get the cuda toolkit version."""
28 | cuda_home = os.path.join(os.path.dirname(shutil.which("nvcc")), "..")
29 | if os.path.exists(os.path.join(cuda_home, "version.txt")):
30 | with open(os.path.join(cuda_home, "version.txt")) as f:
31 | cuda_version = f.read().strip().split()[-1]
32 | elif os.path.exists(os.path.join(cuda_home, "version.json")):
33 | with open(os.path.join(cuda_home, "version.json")) as f:
34 | cuda_version = json.load(f)["cuda"]["version"]
35 | else:
36 | raise RuntimeError("Cannot find the cuda version.")
37 | return cuda_version
38 |
39 |
40 | name = "nerfacc_cuda"
41 | build_dir = _get_build_directory(name, verbose=False)
42 | extra_include_paths = []
43 | extra_cflags = ["-O3"]
44 | extra_cuda_cflags = ["-O3"]
45 |
46 | _C = None
47 | sources = list(glob.glob(os.path.join(PATH, "csrc/*.cu"))) + list(
48 | glob.glob(os.path.join(PATH, "csrc/*.cpp"))
49 | )
50 |
51 | try:
52 | # try to import the compiled module (via setup.py)
53 | from nerfacc import csrc as _C
54 | except ImportError:
55 | # if failed, try with JIT compilation
56 | if cuda_toolkit_available():
57 | if os.listdir(build_dir) != []:
58 | # If the build exists, we assume the extension has been built
59 | # and we can load it.
60 |
61 | _C = load(
62 | name=name,
63 | sources=sources,
64 | extra_cflags=extra_cflags,
65 | extra_cuda_cflags=extra_cuda_cflags,
66 | extra_include_paths=extra_include_paths,
67 | )
68 | else:
69 | # Build from scratch. Remove the build directory just to be safe: pytorch jit might stuck
70 | # if the build directory exists.
71 | shutil.rmtree(build_dir)
72 | with Console().status(
73 | "[bold yellow]NerfAcc: Setting up CUDA (This may take a few minutes the first time)",
74 | spinner="bouncingBall",
75 | ):
76 | _C = load(
77 | name=name,
78 | sources=sources,
79 | extra_cflags=extra_cflags,
80 | extra_cuda_cflags=extra_cuda_cflags,
81 | extra_include_paths=extra_include_paths,
82 | )
83 | else:
84 | Console().print(
85 | "[yellow]NerfAcc: No CUDA toolkit found. NerfAcc will be disabled.[/yellow]"
86 | )
87 |
88 |
89 | __all__ = ["_C"]
90 |
--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/camera.cu:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "include/utils_cuda.cuh"
4 | #include "include/utils_camera.cuh"
5 |
6 |
7 | namespace {
8 | namespace device {
9 |
10 | __global__ void opencv_lens_undistortion_fisheye(
11 | const int64_t N,
12 | const float* uv,
13 | const float* params,
14 | const int criteria_iters,
15 | const float criteria_eps,
16 | float* uv_out,
17 | bool* success)
18 | {
19 | // parallelize over outputs
20 | for (int64_t tid = blockIdx.x * blockDim.x + threadIdx.x; tid < N; tid += blockDim.x * gridDim.x)
21 | {
22 | success[tid] = iterative_opencv_lens_undistortion_fisheye(
23 | uv[tid * 2 + 0],
24 | uv[tid * 2 + 1],
25 | params[tid * 4 + 0], // k1
26 | params[tid * 4 + 1], // k2
27 | params[tid * 4 + 2], // k3
28 | params[tid * 4 + 3], // k4
29 | criteria_iters,
30 | criteria_eps,
31 | uv_out[tid * 2 + 0],
32 | uv_out[tid * 2 + 1]
33 | );
34 | }
35 | }
36 |
37 | __global__ void opencv_lens_undistortion(
38 | const int64_t N,
39 | const int64_t n_params,
40 | const float* uv,
41 | const float* params,
42 | const float eps,
43 | const int max_iterations,
44 | float* uv_out)
45 | {
46 | // parallelize over outputs
47 | for (int64_t tid = blockIdx.x * blockDim.x + threadIdx.x; tid < N; tid += blockDim.x * gridDim.x)
48 | {
49 | if (n_params == 5) {
50 | radial_and_tangential_undistort(
51 | uv[tid * 2 + 0],
52 | uv[tid * 2 + 1],
53 | params[tid * n_params + 0], // k1
54 | params[tid * n_params + 1], // k2
55 | params[tid * n_params + 4], // k3
56 | 0.f, // k4
57 | 0.f, // k5
58 | 0.f, // k6
59 | params[tid * n_params + 2], // p1
60 | params[tid * n_params + 3], // p2
61 | eps,
62 | max_iterations,
63 | uv_out[tid * 2 + 0],
64 | uv_out[tid * 2 + 1]);
65 | } else if (n_params == 8) {
66 | radial_and_tangential_undistort(
67 | uv[tid * 2 + 0],
68 | uv[tid * 2 + 1],
69 | params[tid * n_params + 0], // k1
70 | params[tid * n_params + 1], // k2
71 | params[tid * n_params + 4], // k3
72 | params[tid * n_params + 5], // k4
73 | params[tid * n_params + 6], // k5
74 | params[tid * n_params + 7], // k6
75 | params[tid * n_params + 2], // p1
76 | params[tid * n_params + 3], // p2
77 | eps,
78 | max_iterations,
79 | uv_out[tid * 2 + 0],
80 | uv_out[tid * 2 + 1]);
81 | } else if (n_params == 12) {
82 | bool success = iterative_opencv_lens_undistortion(
83 | uv[tid * 2 + 0],
84 | uv[tid * 2 + 1],
85 | params[tid * 12 + 0], // k1
86 | params[tid * 12 + 1], // k2
87 | params[tid * 12 + 2], // k3
88 | params[tid * 12 + 3], // k4
89 | params[tid * 12 + 4], // k5
90 | params[tid * 12 + 5], // k6
91 | params[tid * 12 + 6], // p1
92 | params[tid * 12 + 7], // p2
93 | params[tid * 12 + 8], // s1
94 | params[tid * 12 + 9], // s2
95 | params[tid * 12 + 10], // s3
96 | params[tid * 12 + 11], // s4
97 | max_iterations,
98 | uv_out[tid * 2 + 0],
99 | uv_out[tid * 2 + 1]
100 | );
101 | if (!success) {
102 | uv_out[tid * 2 + 0] = uv[tid * 2 + 0];
103 | uv_out[tid * 2 + 1] = uv[tid * 2 + 1];
104 | }
105 | }
106 | }
107 | }
108 |
109 |
110 | } // namespace device
111 | } // namespace
112 |
113 |
114 | torch::Tensor opencv_lens_undistortion(
115 | const torch::Tensor& uv, // [..., 2]
116 | const torch::Tensor& params, // [..., 5] or [..., 12]
117 | const float eps,
118 | const int max_iterations)
119 | {
120 | DEVICE_GUARD(uv);
121 | CHECK_INPUT(uv);
122 | CHECK_INPUT(params);
123 | TORCH_CHECK(uv.ndimension() == params.ndimension());
124 | TORCH_CHECK(uv.size(-1) == 2, "uv must have shape [..., 2]");
125 | TORCH_CHECK(params.size(-1) == 5 || params.size(-1) == 8 || params.size(-1) == 12);
126 |
127 | int64_t N = uv.numel() / 2;
128 | int64_t n_params = params.size(-1);
129 |
130 | at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream();
131 | int64_t max_threads = 512;
132 | int64_t max_blocks = 65535;
133 | dim3 threads = dim3(min(max_threads, N));
134 | dim3 blocks = dim3(min(max_blocks, ceil_div(N, threads.x)));
135 |
136 | auto uv_out = torch::empty_like(uv);
137 | device::opencv_lens_undistortion<<>>(
138 | N,
139 | n_params,
140 | uv.data_ptr(),
141 | params.data_ptr(),
142 | eps,
143 | max_iterations,
144 | uv_out.data_ptr());
145 |
146 | return uv_out;
147 | }
148 |
149 | torch::Tensor opencv_lens_undistortion_fisheye(
150 | const torch::Tensor& uv, // [..., 2]
151 | const torch::Tensor& params, // [..., 4]
152 | const float criteria_eps,
153 | const int criteria_iters)
154 | {
155 | DEVICE_GUARD(uv);
156 | CHECK_INPUT(uv);
157 | CHECK_INPUT(params);
158 | TORCH_CHECK(uv.ndimension() == params.ndimension());
159 | TORCH_CHECK(uv.size(-1) == 2, "uv must have shape [..., 2]");
160 | TORCH_CHECK(params.size(-1) == 4);
161 |
162 | int64_t N = uv.numel() / 2;
163 |
164 | at::cuda::CUDAStream stream = at::cuda::getCurrentCUDAStream();
165 | int64_t max_threads = 512;
166 | int64_t max_blocks = 65535;
167 | dim3 threads = dim3(min(max_threads, N));
168 | dim3 blocks = dim3(min(max_blocks, ceil_div(N, threads.x)));
169 |
170 | auto uv_out = torch::empty_like(uv);
171 | auto success = torch::empty(
172 | uv.sizes().slice(0, uv.ndimension() - 1), uv.options().dtype(torch::kBool));
173 | device::opencv_lens_undistortion_fisheye<<>>(
174 | N,
175 | uv.data_ptr(),
176 | params.data_ptr(),
177 | criteria_iters,
178 | criteria_eps,
179 | uv_out.data_ptr(),
180 | success.data_ptr());
181 |
182 | return uv_out;
183 | }
184 |
--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/data_spec.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include "utils_cuda.cuh"
5 |
6 | struct RaySegmentsSpec {
7 | torch::Tensor vals; // [n_edges] or [n_rays, n_edges_per_ray]
8 | // for flattened tensor
9 | torch::Tensor chunk_starts; // [n_rays]
10 | torch::Tensor chunk_cnts; // [n_rays]
11 | torch::Tensor ray_indices; // [n_edges]
12 | torch::Tensor is_left; // [n_edges] have n_bins true values
13 | torch::Tensor is_right; // [n_edges] have n_bins true values
14 | torch::Tensor is_valid; // [n_edges] have n_bins true values
15 |
16 | inline void check() {
17 | CHECK_INPUT(vals);
18 | TORCH_CHECK(vals.defined());
19 |
20 | // batched tensor [..., n_edges_per_ray]
21 | if (vals.ndimension() > 1) return;
22 |
23 | // flattend tensor [n_edges]
24 | CHECK_INPUT(chunk_starts);
25 | CHECK_INPUT(chunk_cnts);
26 | TORCH_CHECK(chunk_starts.defined());
27 | TORCH_CHECK(chunk_cnts.defined());
28 | TORCH_CHECK(chunk_starts.ndimension() == 1);
29 | TORCH_CHECK(chunk_cnts.ndimension() == 1);
30 | TORCH_CHECK(chunk_starts.numel() == chunk_cnts.numel());
31 | if (ray_indices.defined()) {
32 | CHECK_INPUT(ray_indices);
33 | TORCH_CHECK(ray_indices.ndimension() == 1);
34 | TORCH_CHECK(vals.numel() == ray_indices.numel());
35 | }
36 | if (is_left.defined()) {
37 | CHECK_INPUT(is_left);
38 | TORCH_CHECK(is_left.ndimension() == 1);
39 | TORCH_CHECK(vals.numel() == is_left.numel());
40 | }
41 | if (is_right.defined()) {
42 | CHECK_INPUT(is_right);
43 | TORCH_CHECK(is_right.ndimension() == 1);
44 | TORCH_CHECK(vals.numel() == is_right.numel());
45 | }
46 | if (is_valid.defined()) {
47 | CHECK_INPUT(is_valid);
48 | TORCH_CHECK(is_valid.ndimension() == 1);
49 | TORCH_CHECK(vals.numel() == is_valid.numel());
50 | }
51 | }
52 |
53 | inline void memalloc_cnts(int32_t n_rays, at::TensorOptions options, bool zero_init = true) {
54 | TORCH_CHECK(!chunk_cnts.defined());
55 | if (zero_init) {
56 | chunk_cnts = torch::zeros({n_rays}, options.dtype(torch::kLong));
57 | } else {
58 | chunk_cnts = torch::empty({n_rays}, options.dtype(torch::kLong));
59 | }
60 | }
61 |
62 | inline void memalloc_data(int32_t size, bool alloc_masks = true, bool zero_init = true, bool alloc_valid = false) {
63 | TORCH_CHECK(chunk_cnts.defined());
64 | TORCH_CHECK(!vals.defined());
65 |
66 | if (zero_init) {
67 | vals = torch::zeros({size}, chunk_cnts.options().dtype(torch::kFloat32));
68 | ray_indices = torch::zeros({size}, chunk_cnts.options().dtype(torch::kLong));
69 | if (alloc_masks) {
70 | is_left = torch::zeros({size}, chunk_cnts.options().dtype(torch::kBool));
71 | is_right = torch::zeros({size}, chunk_cnts.options().dtype(torch::kBool));
72 | }
73 | } else {
74 | vals = torch::empty({size}, chunk_cnts.options().dtype(torch::kFloat32));
75 | ray_indices = torch::empty({size}, chunk_cnts.options().dtype(torch::kLong));
76 | if (alloc_masks) {
77 | is_left = torch::empty({size}, chunk_cnts.options().dtype(torch::kBool));
78 | is_right = torch::empty({size}, chunk_cnts.options().dtype(torch::kBool));
79 | }
80 | }
81 | if (alloc_valid) {
82 | is_valid = torch::zeros({size}, chunk_cnts.options().dtype(torch::kBool));
83 | }
84 | }
85 |
86 | inline int64_t memalloc_data_from_chunk(bool alloc_masks = true, bool zero_init = true, bool alloc_valid = false) {
87 | TORCH_CHECK(chunk_cnts.defined());
88 | TORCH_CHECK(!chunk_starts.defined());
89 |
90 | torch::Tensor cumsum = torch::cumsum(chunk_cnts, 0, chunk_cnts.scalar_type());
91 | int64_t n_edges = cumsum[-1].item();
92 |
93 | chunk_starts = cumsum - chunk_cnts;
94 | memalloc_data(n_edges, alloc_masks, zero_init, alloc_valid);
95 | return 1;
96 | }
97 |
98 | // compute the chunk_start from chunk_cnts
99 | inline int64_t compute_chunk_start() {
100 | TORCH_CHECK(chunk_cnts.defined());
101 | // TORCH_CHECK(!chunk_starts.defined());
102 |
103 | torch::Tensor cumsum = torch::cumsum(chunk_cnts, 0, chunk_cnts.scalar_type());
104 | chunk_starts = cumsum - chunk_cnts;
105 | return 1;
106 | }
107 | };
--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/data_spec_packed.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | #include "data_spec.hpp"
6 |
7 | namespace {
8 | namespace device {
9 |
10 | struct PackedRaySegmentsSpec {
11 | PackedRaySegmentsSpec(RaySegmentsSpec& spec) :
12 | vals(spec.vals.defined() ? spec.vals.data_ptr() : nullptr),
13 | is_batched(spec.vals.defined() ? spec.vals.dim() > 1 : false),
14 | // for flattened tensor
15 | chunk_starts(spec.chunk_starts.defined() ? spec.chunk_starts.data_ptr() : nullptr),
16 | chunk_cnts(spec.chunk_cnts.defined() ? spec.chunk_cnts.data_ptr(): nullptr),
17 | ray_indices(spec.ray_indices.defined() ? spec.ray_indices.data_ptr() : nullptr),
18 | is_left(spec.is_left.defined() ? spec.is_left.data_ptr() : nullptr),
19 | is_right(spec.is_right.defined() ? spec.is_right.data_ptr() : nullptr),
20 | is_valid(spec.is_valid.defined() ? spec.is_valid.data_ptr() : nullptr),
21 | // for dimensions
22 | n_edges(spec.vals.defined() ? spec.vals.numel() : 0),
23 | n_rays(spec.chunk_cnts.defined() ? spec.chunk_cnts.size(0) : 0), // for flattened tensor
24 | n_edges_per_ray(spec.vals.defined() ? spec.vals.size(-1) : 0) // for batched tensor
25 | { }
26 |
27 | float* vals;
28 | bool is_batched;
29 |
30 | int64_t* chunk_starts;
31 | int64_t* chunk_cnts;
32 | int64_t* ray_indices;
33 | bool* is_left;
34 | bool* is_right;
35 | bool* is_valid;
36 |
37 | int64_t n_edges;
38 | int32_t n_rays;
39 | int32_t n_edges_per_ray;
40 | };
41 |
42 |
43 | struct SingleRaySpec {
44 | // TODO: check inv_dir if dir is zero.
45 | __device__ SingleRaySpec(
46 | float *rays_o, float *rays_d, float tmin, float tmax) :
47 | origin{rays_o[0], rays_o[1], rays_o[2]},
48 | dir{rays_d[0], rays_d[1], rays_d[2]},
49 | inv_dir{1.0f/rays_d[0], 1.0f/rays_d[1], 1.0f/rays_d[2]},
50 | tmin{tmin},
51 | tmax{tmax}
52 | { }
53 |
54 | float3 origin;
55 | float3 dir;
56 | float3 inv_dir;
57 | float tmin;
58 | float tmax;
59 | };
60 |
61 | struct AABBSpec {
62 | __device__ AABBSpec(float *aabb) :
63 | min{aabb[0], aabb[1], aabb[2]},
64 | max{aabb[3], aabb[4], aabb[5]}
65 | { }
66 | __device__ AABBSpec(float3 min, float3 max) :
67 | min{min.x, min.y, min.z},
68 | max{max.x, max.y, max.z}
69 | { }
70 | float3 min;
71 | float3 max;
72 | };
73 |
74 |
75 | } // namespace device
76 | } // namespace
--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils.cub.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | * Modified from aten/src/ATen/cuda/cub_definitions.cuh in PyTorch.
4 | */
5 |
6 | #pragma once
7 |
8 | #include // for CUDA_VERSION
9 |
10 | #if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
11 | #include
12 | #else
13 | #define CUB_VERSION 0
14 | #endif
15 |
16 | // cub support for scan by key is added to cub 1.15
17 | // in https://github.com/NVIDIA/cub/pull/376
18 | #if CUB_VERSION >= 101500
19 | #define CUB_SUPPORTS_SCAN_BY_KEY() 1
20 | #else
21 | #define CUB_SUPPORTS_SCAN_BY_KEY() 0
22 | #endif
23 |
24 | // https://github.com/pytorch/pytorch/blob/233305a852e1cd7f319b15b5137074c9eac455f6/aten/src/ATen/cuda/cub.cuh#L38-L46
25 | #define CUB_WRAPPER(func, ...) do { \
26 | size_t temp_storage_bytes = 0; \
27 | func(nullptr, temp_storage_bytes, __VA_ARGS__); \
28 | auto& caching_allocator = *::c10::cuda::CUDACachingAllocator::get(); \
29 | auto temp_storage = caching_allocator.allocate(temp_storage_bytes); \
30 | func(temp_storage.get(), temp_storage_bytes, __VA_ARGS__); \
31 | AT_CUDA_CHECK(cudaGetLastError()); \
32 | } while (false)
--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils_camera.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | */
4 |
5 | #include "utils_cuda.cuh"
6 |
7 | #define PI 3.14159265358979323846
8 |
9 | namespace {
10 | namespace device {
11 |
12 | // https://github.com/JamesPerlman/TurboNeRF/blob/75f1228d41b914b0a768a876d2a851f3b3213a58/src/utils/camera-kernels.cuh
13 | inline __device__ void _compute_residual_and_jacobian(
14 | // inputs
15 | float x, float y,
16 | float xd, float yd,
17 | float k1, float k2, float k3, float k4, float k5, float k6,
18 | float p1, float p2,
19 | // outputs
20 | float& fx, float& fy,
21 | float& fx_x, float& fx_y,
22 | float& fy_x, float& fy_y
23 | ) {
24 | // let r(x, y) = x^2 + y^2;
25 | // alpha(x, y) = 1 + k1 * r(x, y) + k2 * r(x, y) ^2 + k3 * r(x, y)^3;
26 | // beta(x, y) = 1 + k4 * r(x, y) + k5 * r(x, y) ^2 + k6 * r(x, y)^3;
27 | // d(x, y) = alpha(x, y) / beta(x, y);
28 | const float r = x * x + y * y;
29 | const float alpha = 1.0f + r * (k1 + r * (k2 + r * k3));
30 | const float beta = 1.0f + r * (k4 + r * (k5 + r * k6));
31 | const float d = alpha / beta;
32 |
33 | // The perfect projection is:
34 | // xd = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2);
35 | // yd = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2);
36 |
37 | // Let's define
38 | // fx(x, y) = x * d(x, y) + 2 * p1 * x * y + p2 * (r(x, y) + 2 * x^2) - xd;
39 | // fy(x, y) = y * d(x, y) + 2 * p2 * x * y + p1 * (r(x, y) + 2 * y^2) - yd;
40 |
41 | // We are looking for a solution that satisfies
42 | // fx(x, y) = fy(x, y) = 0;
43 |
44 | fx = d * x + 2 * p1 * x * y + p2 * (r + 2 * x * x) - xd;
45 | fy = d * y + 2 * p2 * x * y + p1 * (r + 2 * y * y) - yd;
46 |
47 | // Compute derivative of alpha, beta over r.
48 | const float alpha_r = k1 + r * (2.0 * k2 + r * (3.0 * k3));
49 | const float beta_r = k4 + r * (2.0 * k5 + r * (3.0 * k6));
50 |
51 | // Compute derivative of d over [x, y]
52 | const float d_r = (alpha_r * beta - alpha * beta_r) / (beta * beta);
53 | const float d_x = 2.0 * x * d_r;
54 | const float d_y = 2.0 * y * d_r;
55 |
56 | // Compute derivative of fx over x and y.
57 | fx_x = d + d_x * x + 2.0 * p1 * y + 6.0 * p2 * x;
58 | fx_y = d_y * x + 2.0 * p1 * x + 2.0 * p2 * y;
59 |
60 | // Compute derivative of fy over x and y.
61 | fy_x = d_x * y + 2.0 * p2 * y + 2.0 * p1 * x;
62 | fy_y = d + d_y * y + 2.0 * p2 * x + 6.0 * p1 * y;
63 | }
64 |
65 | // https://github.com/JamesPerlman/TurboNeRF/blob/75f1228d41b914b0a768a876d2a851f3b3213a58/src/utils/camera-kernels.cuh
66 | inline __device__ void radial_and_tangential_undistort(
67 | float xd, float yd,
68 | float k1, float k2, float k3, float k4, float k5, float k6,
69 | float p1, float p2,
70 | const float& eps,
71 | const int& max_iterations,
72 | float& x, float& y
73 | ) {
74 | // Initial guess.
75 | x = xd;
76 | y = yd;
77 |
78 | // Newton's method.
79 | for (int i = 0; i < max_iterations; ++i) {
80 | float fx, fy, fx_x, fx_y, fy_x, fy_y;
81 |
82 | _compute_residual_and_jacobian(
83 | x, y,
84 | xd, yd,
85 | k1, k2, k3, k4, k5, k6,
86 | p1, p2,
87 | fx, fy,
88 | fx_x, fx_y, fy_x, fy_y
89 | );
90 |
91 | // Compute the Jacobian.
92 | const float det = fx_y * fy_x - fx_x * fy_y;
93 | if (fabs(det) < eps) {
94 | break;
95 | }
96 |
97 | // Compute the update.
98 | const float dx = (fx * fy_y - fy * fx_y) / det;
99 | const float dy = (fy * fx_x - fx * fy_x) / det;
100 |
101 | // Update the solution.
102 | x += dx;
103 | y += dy;
104 |
105 | // Check for convergence.
106 | if (fabs(dx) < eps && fabs(dy) < eps) {
107 | break;
108 | }
109 | }
110 | }
111 |
112 | // not good
113 | // https://github.com/opencv/opencv/blob/8d0fbc6a1e9f20c822921e8076551a01e58cd632/modules/calib3d/src/undistort.dispatch.cpp#L578
114 | inline __device__ bool iterative_opencv_lens_undistortion(
115 | float u, float v,
116 | float k1, float k2, float k3, float k4, float k5, float k6,
117 | float p1, float p2, float s1, float s2, float s3, float s4,
118 | int iters,
119 | // outputs
120 | float& x, float& y)
121 | {
122 | x = u;
123 | y = v;
124 | for(int i = 0; i < iters; i++)
125 | {
126 | float r2 = x*x + y*y;
127 | float icdist = (1 + ((k6*r2 + k5)*r2 + k4)*r2) / (1 + ((k3*r2 + k2)*r2 + k1)*r2);
128 | if (icdist < 0) return false;
129 | float deltaX = 2*p1*x*y + p2*(r2 + 2*x*x) + s1*r2 + s2*r2*r2;
130 | float deltaY = p1*(r2 + 2*y*y) + 2*p2*x*y + s3*r2 + s4*r2*r2;
131 | x = (u - deltaX) * icdist;
132 | y = (v - deltaY) * icdist;
133 | }
134 | return true;
135 | }
136 |
137 | // https://github.com/opencv/opencv/blob/master/modules/calib3d/src/fisheye.cpp#L321
138 | inline __device__ bool iterative_opencv_lens_undistortion_fisheye(
139 | float u, float v,
140 | float k1, float k2, float k3, float k4,
141 | int criteria_iters,
142 | float criteria_eps,
143 | // outputs
144 | float& u_out, float& v_out)
145 | {
146 | // image point (u, v) to world point (x, y)
147 | float theta_d = sqrt(u * u + v * v);
148 |
149 | // the current camera model is only valid up to 180 FOV
150 | // for larger FOV the loop below does not converge
151 | // clip values so we still get plausible results for super fisheye images > 180 grad
152 | theta_d = min(max(-PI/2., theta_d), PI/2.);
153 |
154 | bool converged = false;
155 | float theta = theta_d;
156 |
157 | float scale = 0.0;
158 |
159 | if (fabs(theta_d) > criteria_eps)
160 | {
161 | // compensate distortion iteratively using Newton method
162 | for (int j = 0; j < criteria_iters; j++)
163 | {
164 | double theta2 = theta*theta, theta4 = theta2*theta2, theta6 = theta4*theta2, theta8 = theta6*theta2;
165 | double k0_theta2 = k1 * theta2, k1_theta4 = k2 * theta4, k2_theta6 = k3 * theta6, k3_theta8 = k4 * theta8;
166 | /* new_theta = theta - theta_fix, theta_fix = f0(theta) / f0'(theta) */
167 | double theta_fix = (theta * (1 + k0_theta2 + k1_theta4 + k2_theta6 + k3_theta8) - theta_d) /
168 | (1 + 3*k0_theta2 + 5*k1_theta4 + 7*k2_theta6 + 9*k3_theta8);
169 | theta = theta - theta_fix;
170 |
171 | if (fabs(theta_fix) < criteria_eps)
172 | {
173 | converged = true;
174 | break;
175 | }
176 | }
177 |
178 | scale = std::tan(theta) / theta_d;
179 | }
180 | else
181 | {
182 | converged = true;
183 | }
184 |
185 | // theta is monotonously increasing or decreasing depending on the sign of theta
186 | // if theta has flipped, it might converge due to symmetry but on the opposite of the camera center
187 | // so we can check whether theta has changed the sign during the optimization
188 | bool theta_flipped = ((theta_d < 0 && theta > 0) || (theta_d > 0 && theta < 0));
189 |
190 | if (converged && !theta_flipped)
191 | {
192 | u_out = u * scale;
193 | v_out = v * scale;
194 | }
195 |
196 | return converged;
197 | }
198 |
199 |
200 | } // namespace device
201 | } // namespace
202 |
--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils_contraction.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | */
4 |
5 | #pragma once
6 |
7 | #include "utils_math.cuh"
8 |
9 | namespace {
10 | namespace device {
11 |
12 | enum ContractionType
13 | {
14 | AABB = 0,
15 | UN_BOUNDED_TANH = 1,
16 | UN_BOUNDED_SPHERE = 2,
17 | };
18 |
19 | inline __device__ __host__ float3 roi_to_unit(
20 | const float3 xyz, const float3 roi_min, const float3 roi_max)
21 | {
22 | // roi -> [0, 1]^3
23 | return (xyz - roi_min) / (roi_max - roi_min);
24 | }
25 |
26 | inline __device__ __host__ float3 unit_to_roi(
27 | const float3 xyz, const float3 roi_min, const float3 roi_max)
28 | {
29 | // [0, 1]^3 -> roi
30 | return xyz * (roi_max - roi_min) + roi_min;
31 | }
32 |
33 | inline __device__ __host__ float3 inf_to_unit_tanh(
34 | const float3 xyz, float3 roi_min, const float3 roi_max)
35 | {
36 | /**
37 | [-inf, inf]^3 -> [0, 1]^3
38 | roi -> cube of [0.25, 0.75]^3
39 | **/
40 | float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3
41 | xyz_unit = xyz_unit - 0.5f; // roi -> [-0.5, 0.5]^3
42 | return make_float3(tanhf(xyz_unit.x), tanhf(xyz_unit.y), tanhf(xyz_unit.z)) * 0.5f + 0.5f;
43 | }
44 |
45 | inline __device__ __host__ float3 unit_to_inf_tanh(
46 | const float3 xyz, float3 roi_min, const float3 roi_max)
47 | {
48 | /**
49 | [0, 1]^3 -> [-inf, inf]^3
50 | cube of [0.25, 0.75]^3 -> roi
51 | **/
52 | float3 xyz_unit = clamp(
53 | make_float3(
54 | atanhf(xyz.x * 2.0f - 1.0f),
55 | atanhf(xyz.y * 2.0f - 1.0f),
56 | atanhf(xyz.z * 2.0f - 1.0f)),
57 | -1e10f,
58 | 1e10f);
59 | xyz_unit = xyz_unit + 0.5f;
60 | xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max);
61 | return xyz_unit;
62 | }
63 |
64 | inline __device__ __host__ float3 inf_to_unit_sphere(
65 | const float3 xyz, const float3 roi_min, const float3 roi_max)
66 | {
67 | /** From MipNeRF360
68 | [-inf, inf]^3 -> sphere of [0, 1]^3;
69 | roi -> sphere of [0.25, 0.75]^3
70 | **/
71 | float3 xyz_unit = roi_to_unit(xyz, roi_min, roi_max); // roi -> [0, 1]^3
72 | xyz_unit = xyz_unit * 2.0f - 1.0f; // roi -> [-1, 1]^3
73 |
74 | float norm_sq = dot(xyz_unit, xyz_unit);
75 | float norm = sqrt(norm_sq);
76 | if (norm > 1.0f)
77 | {
78 | xyz_unit = (2.0f - 1.0f / norm) * (xyz_unit / norm);
79 | }
80 | xyz_unit = xyz_unit * 0.25f + 0.5f; // [-1, 1]^3 -> [0.25, 0.75]^3
81 | return xyz_unit;
82 | }
83 |
84 | inline __device__ __host__ float3 unit_sphere_to_inf(
85 | const float3 xyz, const float3 roi_min, const float3 roi_max)
86 | {
87 | /** From MipNeRF360
88 | sphere of [0, 1]^3 -> [-inf, inf]^3;
89 | sphere of [0.25, 0.75]^3 -> roi
90 | **/
91 | float3 xyz_unit = (xyz - 0.5f) * 4.0f; // [0.25, 0.75]^3 -> [-1, 1]^3
92 |
93 | float norm_sq = dot(xyz_unit, xyz_unit);
94 | float norm = sqrt(norm_sq);
95 | if (norm > 1.0f)
96 | {
97 | xyz_unit = xyz_unit / fmaxf((2.0f * norm - 1.0f * norm_sq), 1e-10f);
98 | }
99 | xyz_unit = xyz_unit * 0.5f + 0.5f; // [-1, 1]^3 -> [0, 1]^3
100 | xyz_unit = unit_to_roi(xyz_unit, roi_min, roi_max); // [0, 1]^3 -> roi
101 | return xyz_unit;
102 | }
103 |
104 | inline __device__ __host__ float3 apply_contraction(
105 | const float3 xyz, const float3 roi_min, const float3 roi_max,
106 | const ContractionType type)
107 | {
108 | switch (type)
109 | {
110 | case AABB:
111 | return roi_to_unit(xyz, roi_min, roi_max);
112 | case UN_BOUNDED_TANH:
113 | return inf_to_unit_tanh(xyz, roi_min, roi_max);
114 | case UN_BOUNDED_SPHERE:
115 | return inf_to_unit_sphere(xyz, roi_min, roi_max);
116 | }
117 | }
118 |
119 | inline __device__ __host__ float3 apply_contraction_inv(
120 | const float3 xyz, const float3 roi_min, const float3 roi_max,
121 | const ContractionType type)
122 | {
123 | switch (type)
124 | {
125 | case AABB:
126 | return unit_to_roi(xyz, roi_min, roi_max);
127 | case UN_BOUNDED_TANH:
128 | return unit_to_inf_tanh(xyz, roi_min, roi_max);
129 | case UN_BOUNDED_SPHERE:
130 | return unit_sphere_to_inf(xyz, roi_min, roi_max);
131 | }
132 | }
133 |
134 | } // namespace device
135 | } // namespace
--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils_cuda.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | */
4 |
5 | #pragma once
6 |
7 | #include
8 | #include
9 | #include
10 |
11 |
12 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
13 | #define CHECK_CONTIGUOUS(x) \
14 | TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
15 | #define CHECK_INPUT(x) \
16 | CHECK_CUDA(x); \
17 | CHECK_CONTIGUOUS(x)
18 | #define CUDA_GET_THREAD_ID(tid, Q) \
19 | const int tid = blockIdx.x * blockDim.x + threadIdx.x; \
20 | if (tid >= Q) \
21 | return
22 | #define CUDA_N_BLOCKS_NEEDED(Q, CUDA_N_THREADS) ((Q - 1) / CUDA_N_THREADS + 1)
23 | #define DEVICE_GUARD(_ten) \
24 | const at::cuda::OptionalCUDAGuard device_guard(device_of(_ten));
25 |
26 | template
27 | inline __device__ __host__ scalar_t ceil_div(scalar_t a, scalar_t b)
28 | {
29 | return (a + b - 1) / b;
30 | }
--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/include/utils_grid.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "data_spec_packed.cuh"
4 | #include "utils_contraction.cuh"
5 | #include "utils_math.cuh"
6 |
7 | namespace {
8 | namespace device {
9 |
10 | inline __device__ bool ray_aabb_intersect(
11 | SingleRaySpec ray, AABBSpec aabb,
12 | // outputs
13 | float& tmin, float& tmax)
14 | {
15 | float tmin_temp{};
16 | float tmax_temp{};
17 |
18 | if (ray.inv_dir.x >= 0) {
19 | tmin = (aabb.min.x - ray.origin.x) * ray.inv_dir.x;
20 | tmax = (aabb.max.x - ray.origin.x) * ray.inv_dir.x;
21 | } else {
22 | tmin = (aabb.max.x - ray.origin.x) * ray.inv_dir.x;
23 | tmax = (aabb.min.x - ray.origin.x) * ray.inv_dir.x;
24 | }
25 |
26 | if (ray.inv_dir.y >= 0) {
27 | tmin_temp = (aabb.min.y - ray.origin.y) * ray.inv_dir.y;
28 | tmax_temp = (aabb.max.y - ray.origin.y) * ray.inv_dir.y;
29 | } else {
30 | tmin_temp = (aabb.max.y - ray.origin.y) * ray.inv_dir.y;
31 | tmax_temp = (aabb.min.y - ray.origin.y) * ray.inv_dir.y;
32 | }
33 |
34 | if (tmin > tmax_temp || tmin_temp > tmax) return false;
35 | if (tmin_temp > tmin) tmin = tmin_temp;
36 | if (tmax_temp < tmax) tmax = tmax_temp;
37 |
38 | if (ray.inv_dir.z >= 0) {
39 | tmin_temp = (aabb.min.z - ray.origin.z) * ray.inv_dir.z;
40 | tmax_temp = (aabb.max.z - ray.origin.z) * ray.inv_dir.z;
41 | } else {
42 | tmin_temp = (aabb.max.z - ray.origin.z) * ray.inv_dir.z;
43 | tmax_temp = (aabb.min.z - ray.origin.z) * ray.inv_dir.z;
44 | }
45 |
46 | if (tmin > tmax_temp || tmin_temp > tmax) return false;
47 | if (tmin_temp > tmin) tmin = tmin_temp;
48 | if (tmax_temp < tmax) tmax = tmax_temp;
49 |
50 | if (tmax <= 0) return false;
51 |
52 | tmin = fmaxf(tmin, ray.tmin);
53 | tmax = fminf(tmax, ray.tmax);
54 | return true;
55 | }
56 |
57 |
58 | inline __device__ void setup_traversal(
59 | SingleRaySpec ray, float tmin, float tmax, float eps,
60 | AABBSpec aabb, int3 resolution,
61 | // outputs
62 | float3 &delta, float3 &tdist,
63 | int3 &step_index, int3 ¤t_index, int3 &final_index)
64 | {
65 | const float3 res = make_float3(resolution);
66 | const float3 voxel_size = (aabb.max - aabb.min) / res;
67 | const float3 ray_start = ray.origin + ray.dir * (tmin + eps);
68 | const float3 ray_end = ray.origin + ray.dir * (tmax - eps);
69 |
70 | // get voxel index of start and end within grid
71 | // TODO: check float error here!
72 | current_index = make_int3(
73 | apply_contraction(ray_start, aabb.min, aabb.max, ContractionType::AABB)
74 | * res
75 | );
76 | current_index = clamp(current_index, make_int3(0, 0, 0), resolution - 1);
77 |
78 | final_index = make_int3(
79 | apply_contraction(ray_end, aabb.min, aabb.max, ContractionType::AABB)
80 | * res
81 | );
82 | final_index = clamp(final_index, make_int3(0, 0, 0), resolution - 1);
83 |
84 | //
85 | const int3 index_delta = make_int3(
86 | ray.dir.x > 0 ? 1 : 0, ray.dir.y > 0 ? 1 : 0, ray.dir.z > 0 ? 1 : 0
87 | );
88 | const int3 start_index = current_index + index_delta;
89 | const float3 tmax_xyz = ((aabb.min +
90 | ((make_float3(start_index) * voxel_size) - ray_start)) * ray.inv_dir) + tmin;
91 |
92 | tdist = make_float3(
93 | (ray.dir.x == 0.0f) ? tmax : tmax_xyz.x,
94 | (ray.dir.y == 0.0f) ? tmax : tmax_xyz.y,
95 | (ray.dir.z == 0.0f) ? tmax : tmax_xyz.z
96 | );
97 | // printf("tdist: %f %f %f\n", tdist.x, tdist.y, tdist.z);
98 |
99 | const float3 step_float = make_float3(
100 | (ray.dir.x == 0.0f) ? 0.0f : (ray.dir.x > 0.0f ? 1.0f : -1.0f),
101 | (ray.dir.y == 0.0f) ? 0.0f : (ray.dir.y > 0.0f ? 1.0f : -1.0f),
102 | (ray.dir.z == 0.0f) ? 0.0f : (ray.dir.z > 0.0f ? 1.0f : -1.0f)
103 | );
104 | step_index = make_int3(step_float);
105 | // printf("step_index: %d %d %d\n", step_index.x, step_index.y, step_index.z);
106 |
107 | const float3 delta_temp = voxel_size * ray.inv_dir * step_float;
108 | delta = make_float3(
109 | (ray.dir.x == 0.0f) ? tmax : delta_temp.x,
110 | (ray.dir.y == 0.0f) ? tmax : delta_temp.y,
111 | (ray.dir.z == 0.0f) ? tmax : delta_temp.z
112 | );
113 | // printf("delta: %f %f %f\n", delta.x, delta.y, delta.z);
114 | }
115 |
116 | inline __device__ bool single_traversal(
117 | float3& tdist, int3& current_index,
118 | const int3 overflow_index, const int3 step_index, const float3 delta) {
119 | if ((tdist.x < tdist.y) && (tdist.x < tdist.z)) {
120 | // X-axis traversal.
121 | current_index.x += step_index.x;
122 | tdist.x += delta.x;
123 | if (current_index.x == overflow_index.x) {
124 | return false;
125 | }
126 | } else if (tdist.y < tdist.z) {
127 | // Y-axis traversal.
128 | current_index.y += step_index.y;
129 | tdist.y += delta.y;
130 | if (current_index.y == overflow_index.y) {
131 | return false;
132 | }
133 | } else {
134 | // Z-axis traversal.
135 | current_index.z += step_index.z;
136 | tdist.z += delta.z;
137 | if (current_index.z == overflow_index.z) {
138 | return false;
139 | }
140 | }
141 | return true;
142 | }
143 |
144 |
145 | } // namespace device
146 | } // namespace
--------------------------------------------------------------------------------
/nerfacc/cuda/csrc/nerfacc.cpp:
--------------------------------------------------------------------------------
1 | // This file contains only Python bindings
2 | #include "include/data_spec.hpp"
3 |
4 | #include
5 |
6 |
7 | // scan
8 | torch::Tensor inclusive_sum(
9 | torch::Tensor chunk_starts,
10 | torch::Tensor chunk_cnts,
11 | torch::Tensor inputs,
12 | bool normalize,
13 | bool backward);
14 | torch::Tensor exclusive_sum(
15 | torch::Tensor chunk_starts,
16 | torch::Tensor chunk_cnts,
17 | torch::Tensor inputs,
18 | bool normalize,
19 | bool backward);
20 | torch::Tensor inclusive_prod_forward(
21 | torch::Tensor chunk_starts,
22 | torch::Tensor chunk_cnts,
23 | torch::Tensor inputs);
24 | torch::Tensor inclusive_prod_backward(
25 | torch::Tensor chunk_starts,
26 | torch::Tensor chunk_cnts,
27 | torch::Tensor inputs,
28 | torch::Tensor outputs,
29 | torch::Tensor grad_outputs);
30 | torch::Tensor exclusive_prod_forward(
31 | torch::Tensor chunk_starts,
32 | torch::Tensor chunk_cnts,
33 | torch::Tensor inputs);
34 | torch::Tensor exclusive_prod_backward(
35 | torch::Tensor chunk_starts,
36 | torch::Tensor chunk_cnts,
37 | torch::Tensor inputs,
38 | torch::Tensor outputs,
39 | torch::Tensor grad_outputs);
40 |
41 | bool is_cub_available();
42 | torch::Tensor inclusive_sum_cub(
43 | torch::Tensor ray_indices,
44 | torch::Tensor inputs,
45 | bool backward);
46 | torch::Tensor exclusive_sum_cub(
47 | torch::Tensor indices,
48 | torch::Tensor inputs,
49 | bool backward);
50 | torch::Tensor inclusive_prod_cub_forward(
51 | torch::Tensor indices,
52 | torch::Tensor inputs);
53 | torch::Tensor inclusive_prod_cub_backward(
54 | torch::Tensor indices,
55 | torch::Tensor inputs,
56 | torch::Tensor outputs,
57 | torch::Tensor grad_outputs);
58 | torch::Tensor exclusive_prod_cub_forward(
59 | torch::Tensor indices,
60 | torch::Tensor inputs);
61 | torch::Tensor exclusive_prod_cub_backward(
62 | torch::Tensor indices,
63 | torch::Tensor inputs,
64 | torch::Tensor outputs,
65 | torch::Tensor grad_outputs);
66 |
67 | // grid
68 | std::vector ray_aabb_intersect(
69 | const torch::Tensor rays_o, // [n_rays, 3]
70 | const torch::Tensor rays_d, // [n_rays, 3]
71 | const torch::Tensor aabbs, // [n_aabbs, 6]
72 | const float near_plane,
73 | const float far_plane,
74 | const float miss_value);
75 | std::tuple traverse_grids(
76 | // rays
77 | const torch::Tensor rays_o, // [n_rays, 3]
78 | const torch::Tensor rays_d, // [n_rays, 3]
79 | const torch::Tensor rays_mask, // [n_rays]
80 | // grids
81 | const torch::Tensor binaries, // [n_grids, resx, resy, resz]
82 | const torch::Tensor aabbs, // [n_grids, 6]
83 | // intersections
84 | const torch::Tensor t_sorted, // [n_rays, n_grids * 2]
85 | const torch::Tensor t_indices, // [n_rays, n_grids * 2]
86 | const torch::Tensor hits, // [n_rays, n_grids]
87 | // options
88 | const torch::Tensor near_planes,
89 | const torch::Tensor far_planes,
90 | const float step_size,
91 | const float cone_angle,
92 | const bool compute_intervals,
93 | const bool compute_samples,
94 | const bool compute_terminate_planes,
95 | const int32_t traverse_steps_limit, // <= 0 means no limit
96 | const bool over_allocate); // over allocate the memory for intervals and samples
97 |
98 | // pdf
99 | std::vector importance_sampling(
100 | RaySegmentsSpec ray_segments,
101 | torch::Tensor cdfs,
102 | torch::Tensor n_intervels_per_ray,
103 | bool stratified);
104 | std::vector importance_sampling(
105 | RaySegmentsSpec ray_segments,
106 | torch::Tensor cdfs,
107 | int64_t n_intervels_per_ray,
108 | bool stratified);
109 | std::vector searchsorted(
110 | RaySegmentsSpec query,
111 | RaySegmentsSpec key);
112 |
113 | // cameras
114 | torch::Tensor opencv_lens_undistortion(
115 | const torch::Tensor& uv, // [..., 2]
116 | const torch::Tensor& params, // [..., 6]
117 | const float eps,
118 | const int max_iterations);
119 | torch::Tensor opencv_lens_undistortion_fisheye(
120 | const torch::Tensor& uv, // [..., 2]
121 | const torch::Tensor& params, // [..., 4]
122 | const float criteria_eps,
123 | const int criteria_iters);
124 |
125 |
126 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
127 | #define _REG_FUNC(funname) m.def(#funname, &funname)
128 | _REG_FUNC(inclusive_sum);
129 | _REG_FUNC(exclusive_sum);
130 | _REG_FUNC(inclusive_prod_forward);
131 | _REG_FUNC(inclusive_prod_backward);
132 | _REG_FUNC(exclusive_prod_forward);
133 | _REG_FUNC(exclusive_prod_backward);
134 |
135 | _REG_FUNC(is_cub_available);
136 | _REG_FUNC(inclusive_sum_cub);
137 | _REG_FUNC(exclusive_sum_cub);
138 | _REG_FUNC(inclusive_prod_cub_forward);
139 | _REG_FUNC(inclusive_prod_cub_backward);
140 | _REG_FUNC(exclusive_prod_cub_forward);
141 | _REG_FUNC(exclusive_prod_cub_backward);
142 |
143 | _REG_FUNC(ray_aabb_intersect);
144 | _REG_FUNC(traverse_grids);
145 | _REG_FUNC(searchsorted);
146 |
147 | _REG_FUNC(opencv_lens_undistortion);
148 | _REG_FUNC(opencv_lens_undistortion_fisheye); // TODO: check this function.
149 | #undef _REG_FUNC
150 |
151 | m.def("importance_sampling", py::overload_cast(&importance_sampling));
152 | m.def("importance_sampling", py::overload_cast(&importance_sampling));
153 |
154 | py::class_(m, "RaySegmentsSpec")
155 | .def(py::init<>())
156 | .def_readwrite("vals", &RaySegmentsSpec::vals)
157 | .def_readwrite("is_left", &RaySegmentsSpec::is_left)
158 | .def_readwrite("is_right", &RaySegmentsSpec::is_right)
159 | .def_readwrite("is_valid", &RaySegmentsSpec::is_valid)
160 | .def_readwrite("chunk_starts", &RaySegmentsSpec::chunk_starts)
161 | .def_readwrite("chunk_cnts", &RaySegmentsSpec::chunk_cnts)
162 | .def_readwrite("ray_indices", &RaySegmentsSpec::ray_indices);
163 | }
--------------------------------------------------------------------------------
/nerfacc/data_specs.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 | from dataclasses import dataclass
5 | from typing import Optional
6 |
7 | import torch
8 |
9 | from . import cuda as _C
10 |
11 |
12 | @dataclass
13 | class RaySamples:
14 | """Ray samples that supports batched and flattened data.
15 |
16 | Note:
17 | When `vals` is flattened, either `packed_info` or `ray_indices` must
18 | be provided.
19 |
20 | Args:
21 | vals: Batched data with shape (n_rays, n_samples) or flattened data
22 | with shape (all_samples,)
23 | packed_info: Optional. A tensor of shape (n_rays, 2) that specifies
24 | the start and count of each chunk in flattened `vals`, with in
25 | total n_rays chunks. Only needed when `vals` is flattened.
26 | ray_indices: Optional. A tensor of shape (all_samples,) that specifies
27 | the ray index of each sample. Only needed when `vals` is flattened.
28 |
29 | Examples:
30 |
31 | .. code-block:: python
32 |
33 | >>> # Batched data
34 | >>> ray_samples = RaySamples(torch.rand(10, 100))
35 | >>> # Flattened data
36 | >>> ray_samples = RaySamples(
37 | >>> torch.rand(1000),
38 | >>> packed_info=torch.tensor([[0, 100], [100, 200], [300, 700]]),
39 | >>> )
40 |
41 | """
42 |
43 | vals: torch.Tensor
44 | packed_info: Optional[torch.Tensor] = None
45 | ray_indices: Optional[torch.Tensor] = None
46 | is_valid: Optional[torch.Tensor] = None
47 |
48 | def _to_cpp(self):
49 | """
50 | Generate object to pass to C++
51 | """
52 |
53 | spec = _C.RaySegmentsSpec()
54 | spec.vals = self.vals.contiguous()
55 | if self.packed_info is not None:
56 | spec.chunk_starts = self.packed_info[:, 0].contiguous()
57 | if self.chunk_cnts is not None:
58 | spec.chunk_cnts = self.packed_info[:, 1].contiguous()
59 | if self.ray_indices is not None:
60 | spec.ray_indices = self.ray_indices.contiguous()
61 | return spec
62 |
63 | @classmethod
64 | def _from_cpp(cls, spec):
65 | """
66 | Generate object from C++
67 | """
68 | if spec.chunk_starts is not None and spec.chunk_cnts is not None:
69 | packed_info = torch.stack([spec.chunk_starts, spec.chunk_cnts], -1)
70 | else:
71 | packed_info = None
72 | ray_indices = spec.ray_indices
73 | if spec.is_valid is not None:
74 | is_valid = spec.is_valid
75 | else:
76 | is_valid = None
77 | vals = spec.vals
78 | return cls(
79 | vals=vals,
80 | packed_info=packed_info,
81 | ray_indices=ray_indices,
82 | is_valid=is_valid,
83 | )
84 |
85 | @property
86 | def device(self) -> torch.device:
87 | return self.vals.device
88 |
89 |
90 | @dataclass
91 | class RayIntervals:
92 | """Ray intervals that supports batched and flattened data.
93 |
94 | Each interval is defined by two edges (left and right). The attribute `vals`
95 | stores the edges of all intervals along the rays. The attributes `is_left`
96 | and `is_right` are for indicating whether each edge is a left or right edge.
97 | This class unifies the representation of both continuous and non-continuous ray
98 | intervals.
99 |
100 | Note:
101 | When `vals` is flattened, either `packed_info` or `ray_indices` must
102 | be provided. Also both `is_left` and `is_right` must be provided.
103 |
104 | Args:
105 | vals: Batched data with shape (n_rays, n_edges) or flattened data
106 | with shape (all_edges,)
107 | packed_info: Optional. A tensor of shape (n_rays, 2) that specifies
108 | the start and count of each chunk in flattened `vals`, with in
109 | total n_rays chunks. Only needed when `vals` is flattened.
110 | ray_indices: Optional. A tensor of shape (all_edges,) that specifies
111 | the ray index of each edge. Only needed when `vals` is flattened.
112 | is_left: Optional. A boolen tensor of shape (all_edges,) that specifies
113 | whether each edge is a left edge. Only needed when `vals` is flattened.
114 | is_right: Optional. A boolen tensor of shape (all_edges,) that specifies
115 | whether each edge is a right edge. Only needed when `vals` is flattened.
116 |
117 | Examples:
118 |
119 | .. code-block:: python
120 |
121 | >>> # Batched data
122 | >>> ray_intervals = RayIntervals(torch.rand(10, 100))
123 | >>> # Flattened data
124 | >>> ray_intervals = RayIntervals(
125 | >>> torch.rand(6),
126 | >>> packed_info=torch.tensor([[0, 2], [2, 0], [2, 4]]),
127 | >>> is_left=torch.tensor([True, False, True, True, True, False]),
128 | >>> is_right=torch.tensor([False, True, False, True, True, True]),
129 | >>> )
130 |
131 | """
132 |
133 | vals: torch.Tensor
134 | packed_info: Optional[torch.Tensor] = None
135 | ray_indices: Optional[torch.Tensor] = None
136 | is_left: Optional[torch.Tensor] = None
137 | is_right: Optional[torch.Tensor] = None
138 |
139 | def _to_cpp(self):
140 | """
141 | Generate object to pass to C++
142 | """
143 |
144 | spec = _C.RaySegmentsSpec()
145 | spec.vals = self.vals.contiguous()
146 | if self.packed_info is not None:
147 | spec.chunk_starts = self.packed_info[:, 0].contiguous()
148 | if self.packed_info is not None:
149 | spec.chunk_cnts = self.packed_info[:, 1].contiguous()
150 | if self.ray_indices is not None:
151 | spec.ray_indices = self.ray_indices.contiguous()
152 | if self.is_left is not None:
153 | spec.is_left = self.is_left.contiguous()
154 | if self.is_right is not None:
155 | spec.is_right = self.is_right.contiguous()
156 | return spec
157 |
158 | @classmethod
159 | def _from_cpp(cls, spec):
160 | """
161 | Generate object from C++
162 | """
163 | if spec.chunk_starts is not None and spec.chunk_cnts is not None:
164 | packed_info = torch.stack([spec.chunk_starts, spec.chunk_cnts], -1)
165 | else:
166 | packed_info = None
167 | ray_indices = spec.ray_indices
168 | is_left = spec.is_left
169 | is_right = spec.is_right
170 | return cls(
171 | vals=spec.vals,
172 | packed_info=packed_info,
173 | ray_indices=ray_indices,
174 | is_left=is_left,
175 | is_right=is_right,
176 | )
177 |
178 | @property
179 | def device(self) -> torch.device:
180 | return self.vals.device
181 |
--------------------------------------------------------------------------------
/nerfacc/estimators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerfstudio-project/nerfacc/57ccfa14feb94975836ea6913149a86737220f2b/nerfacc/estimators/__init__.py
--------------------------------------------------------------------------------
/nerfacc/estimators/base.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 |
7 | class AbstractEstimator(nn.Module):
8 | """An abstract Transmittance Estimator class for Sampling."""
9 |
10 | def __init__(self) -> None:
11 | super().__init__()
12 | self.register_buffer("_dummy", torch.empty(0), persistent=False)
13 |
14 | @property
15 | def device(self) -> torch.device:
16 | return self._dummy.device
17 |
18 | def sampling(self, *args, **kwargs) -> Any:
19 | raise NotImplementedError
20 |
21 | def update_every_n_steps(self, *args, **kwargs) -> None:
22 | raise NotImplementedError
23 |
--------------------------------------------------------------------------------
/nerfacc/losses.py:
--------------------------------------------------------------------------------
1 | from torch import Tensor
2 |
3 | from .scan import exclusive_sum
4 | from .volrend import accumulate_along_rays
5 |
6 |
7 | def distortion(
8 | weights: Tensor,
9 | t_starts: Tensor,
10 | t_ends: Tensor,
11 | ray_indices: Tensor,
12 | n_rays: int,
13 | ) -> Tensor:
14 | """Distortion Regularization proposed in Mip-NeRF 360.
15 |
16 | Args:
17 | weights: The flattened weights of the samples. Shape (n_samples,)
18 | t_starts: The start points of the samples. Shape (n_samples,)
19 | t_ends: The end points of the samples. Shape (n_samples,)
20 | ray_indices: The ray indices of the samples. LongTensor with shape (n_samples,)
21 | n_rays: The total number of rays.
22 |
23 | Returns:
24 | The per-ray distortion loss with the shape (n_rays, 1).
25 | """
26 | assert (
27 | weights.shape == t_starts.shape == t_ends.shape == ray_indices.shape
28 | ), (
29 | f"the shape of the inputs are not the same: "
30 | f"weights {weights.shape}, t_starts {t_starts.shape}, "
31 | f"t_ends {t_ends.shape}, ray_indices {ray_indices.shape}"
32 | )
33 | t_mids = 0.5 * (t_starts + t_ends)
34 | t_deltas = t_ends - t_starts
35 | loss_uni = (1 / 3) * (t_deltas * weights.pow(2))
36 | loss_bi_0 = weights * t_mids * exclusive_sum(weights, indices=ray_indices)
37 | loss_bi_1 = weights * exclusive_sum(weights * t_mids, indices=ray_indices)
38 | loss_bi = 2 * (loss_bi_0 - loss_bi_1)
39 | loss = loss_uni + loss_bi
40 | loss = accumulate_along_rays(loss, None, ray_indices, n_rays)
41 | return loss
42 |
--------------------------------------------------------------------------------
/nerfacc/pack.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 | from typing import Optional
5 |
6 | import torch
7 | from torch import Tensor
8 |
9 |
10 | @torch.no_grad()
11 | def pack_info(ray_indices: Tensor, n_rays: Optional[int] = None) -> Tensor:
12 | """Pack `ray_indices` to `packed_info`. Useful for converting per sample data to per ray data.
13 |
14 | Note:
15 | this function is not differentiable to any inputs.
16 |
17 | Args:
18 | ray_indices: Ray indices of the samples. LongTensor with shape (n_sample).
19 | n_rays: Number of rays. If None, it is inferred from `ray_indices`. Default is None.
20 |
21 | Returns:
22 | A LongTensor of shape (n_rays, 2) that specifies the start and count
23 | of each chunk in the flattened input tensor, with in total n_rays chunks.
24 |
25 | Example:
26 |
27 | .. code-block:: python
28 |
29 | >>> ray_indices = torch.tensor([0, 0, 1, 1, 1, 2, 2, 2, 2], device="cuda")
30 | >>> packed_info = pack_info(ray_indices, n_rays=3)
31 | >>> packed_info
32 | tensor([[0, 2], [2, 3], [5, 4]], device='cuda:0')
33 |
34 | """
35 | assert (
36 | ray_indices.dim() == 1
37 | ), "ray_indices must be a 1D tensor with shape (n_samples)."
38 | if ray_indices.is_cuda:
39 | device = ray_indices.device
40 | dtype = ray_indices.dtype
41 | if n_rays is None:
42 | n_rays = ray_indices.max().item() + 1
43 | chunk_cnts = torch.zeros((n_rays,), device=device, dtype=dtype)
44 | chunk_cnts.index_add_(0, ray_indices, torch.ones_like(ray_indices))
45 | chunk_starts = chunk_cnts.cumsum(dim=0, dtype=dtype) - chunk_cnts
46 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
47 | else:
48 | raise NotImplementedError("Only support cuda inputs.")
49 | return packed_info
50 |
--------------------------------------------------------------------------------
/nerfacc/pdf.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 | from typing import Tuple, Union
5 |
6 | import torch
7 | from torch import Tensor
8 |
9 | from . import cuda as _C
10 | from .data_specs import RayIntervals, RaySamples
11 |
12 |
13 | def searchsorted(
14 | sorted_sequence: Union[RayIntervals, RaySamples],
15 | values: Union[RayIntervals, RaySamples],
16 | ) -> Tuple[Tensor, Tensor]:
17 | """Searchsorted that supports flattened tensor.
18 |
19 | This function returns {`ids_left`, `ids_right`} such that:
20 |
21 | `sorted_sequence.vals.gather(-1, ids_left) <= values.vals < sorted_sequence.vals.gather(-1, ids_right)`
22 |
23 | Note:
24 | When values is out of range of sorted_sequence, we return the
25 | corresponding ids as if the values is clipped to the range of
26 | sorted_sequence. See the example below.
27 |
28 | Args:
29 | sorted_sequence: A :class:`RayIntervals` or :class:`RaySamples` object. We assume
30 | the `sorted_sequence.vals` is acendingly sorted for each ray.
31 | values: A :class:`RayIntervals` or :class:`RaySamples` object.
32 |
33 | Returns:
34 | A tuple of LongTensor:
35 |
36 | - **ids_left**: A LongTensor with the same shape as `values.vals`.
37 | - **ids_right**: A LongTensor with the same shape as `values.vals`.
38 |
39 | Example:
40 | >>> sorted_sequence = RayIntervals(
41 | ... vals=torch.tensor([0.0, 1.0, 0.0, 1.0, 2.0], device="cuda"),
42 | ... packed_info=torch.tensor([[0, 2], [2, 3]], device="cuda"),
43 | ... )
44 | >>> values = RayIntervals(
45 | ... vals=torch.tensor([0.5, 1.5, 2.5], device="cuda"),
46 | ... packed_info=torch.tensor([[0, 1], [1, 2]], device="cuda"),
47 | ... )
48 | >>> ids_left, ids_right = searchsorted(sorted_sequence, values)
49 | >>> ids_left
50 | tensor([0, 3, 3], device='cuda:0')
51 | >>> ids_right
52 | tensor([1, 4, 4], device='cuda:0')
53 | >>> sorted_sequence.vals.gather(-1, ids_left)
54 | tensor([0., 1., 1.], device='cuda:0')
55 | >>> sorted_sequence.vals.gather(-1, ids_right)
56 | tensor([1., 2., 2.], device='cuda:0')
57 | """
58 |
59 | ids_left, ids_right = _C.searchsorted(
60 | values._to_cpp(), sorted_sequence._to_cpp()
61 | )
62 | return ids_left, ids_right
63 |
64 |
65 | def importance_sampling(
66 | intervals: RayIntervals,
67 | cdfs: Tensor,
68 | n_intervals_per_ray: Union[Tensor, int],
69 | stratified: bool = False,
70 | ) -> Tuple[RayIntervals, RaySamples]:
71 | """Importance sampling that supports flattened tensor.
72 |
73 | Given a set of intervals and the corresponding CDFs at the interval edges,
74 | this function performs inverse transform sampling to create a new set of
75 | intervals and samples. Stratified sampling is also supported.
76 |
77 | Args:
78 | intervals: A :class:`RayIntervals` object that specifies the edges of the
79 | intervals along the rays.
80 | cdfs: The CDFs at the interval edges. It has the same shape as
81 | `intervals.vals`.
82 | n_intervals_per_ray: Resample each ray to have this many intervals.
83 | If it is a tensor, it must be of shape (n_rays,). If it is an int,
84 | it is broadcasted to all rays.
85 | stratified: If True, perform stratified sampling.
86 |
87 | Returns:
88 | A tuple of {:class:`RayIntervals`, :class:`RaySamples`}:
89 |
90 | - **intervals**: A :class:`RayIntervals` object. If `n_intervals_per_ray` is an int, \
91 | `intervals.vals` will has the shape of (n_rays, n_intervals_per_ray + 1). \
92 | If `n_intervals_per_ray` is a tensor, we assume each ray results \
93 | in a different number of intervals. In this case, `intervals.vals` \
94 | will has the shape of (all_edges,), the attributes `packed_info`, \
95 | `ray_indices`, `is_left` and `is_right` will be accessable.
96 |
97 | - **samples**: A :class:`RaySamples` object. If `n_intervals_per_ray` is an int, \
98 | `samples.vals` will has the shape of (n_rays, n_intervals_per_ray). \
99 | If `n_intervals_per_ray` is a tensor, we assume each ray results \
100 | in a different number of intervals. In this case, `samples.vals` \
101 | will has the shape of (all_samples,), the attributes `packed_info` and \
102 | `ray_indices` will be accessable.
103 |
104 | Example:
105 |
106 | .. code-block:: python
107 |
108 | >>> intervals = RayIntervals(
109 | ... vals=torch.tensor([0.0, 1.0, 0.0, 1.0, 2.0], device="cuda"),
110 | ... packed_info=torch.tensor([[0, 2], [2, 3]], device="cuda"),
111 | ... )
112 | >>> cdfs = torch.tensor([0.0, 0.5, 0.0, 0.5, 1.0], device="cuda")
113 | >>> n_intervals_per_ray = 2
114 | >>> intervals, samples = importance_sampling(intervals, cdfs, n_intervals_per_ray)
115 | >>> intervals.vals
116 | tensor([[0.0000, 0.5000, 1.0000],
117 | [0.0000, 1.0000, 2.0000]], device='cuda:0')
118 | >>> samples.vals
119 | tensor([[0.2500, 0.7500],
120 | [0.5000, 1.5000]], device='cuda:0')
121 |
122 | """
123 | if isinstance(n_intervals_per_ray, Tensor):
124 | n_intervals_per_ray = n_intervals_per_ray.contiguous()
125 | intervals, samples = _C.importance_sampling(
126 | intervals._to_cpp(),
127 | cdfs.contiguous(),
128 | n_intervals_per_ray,
129 | stratified,
130 | )
131 | return RayIntervals._from_cpp(intervals), RaySamples._from_cpp(samples)
132 |
133 |
134 | def _sample_from_weighted(
135 | bins: Tensor,
136 | weights: Tensor,
137 | num_samples: int,
138 | stratified: bool = False,
139 | vmin: float = -torch.inf,
140 | vmax: float = torch.inf,
141 | ) -> Tuple[Tensor, Tensor]:
142 | import torch.nn.functional as F
143 |
144 | """
145 | Args:
146 | bins: (..., B + 1).
147 | weights: (..., B).
148 |
149 | Returns:
150 | samples: (..., S + 1).
151 | """
152 | B = weights.shape[-1]
153 | S = num_samples
154 | assert bins.shape[-1] == B + 1
155 |
156 | dtype, device = bins.dtype, bins.device
157 | eps = torch.finfo(weights.dtype).eps
158 |
159 | # (..., B).
160 | pdf = F.normalize(weights, p=1, dim=-1)
161 | # (..., B + 1).
162 | cdf = torch.cat(
163 | [
164 | torch.zeros_like(pdf[..., :1]),
165 | torch.cumsum(pdf[..., :-1], dim=-1),
166 | torch.ones_like(pdf[..., :1]),
167 | ],
168 | dim=-1,
169 | )
170 |
171 | # (..., S). Sample positions between [0, 1).
172 | if not stratified:
173 | pad = 1 / (2 * S)
174 | # Get the center of each pdf bins.
175 | u = torch.linspace(pad, 1 - pad - eps, S, dtype=dtype, device=device)
176 | u = u.broadcast_to(bins.shape[:-1] + (S,))
177 | else:
178 | # `u` is in [0, 1) --- it can be zero, but it can never be 1.
179 | u_max = eps + (1 - eps) / S
180 | max_jitter = (1 - u_max) / (S - 1) - eps
181 | # Only perform one jittering per ray (`single_jitter` in the original
182 | # implementation.)
183 | u = (
184 | torch.linspace(0, 1 - u_max, S, dtype=dtype, device=device)
185 | + torch.rand(
186 | *bins.shape[:-1],
187 | 1,
188 | dtype=dtype,
189 | device=device,
190 | )
191 | * max_jitter
192 | )
193 |
194 | # (..., S).
195 | ceil = torch.searchsorted(cdf.contiguous(), u.contiguous(), side="right")
196 | floor = ceil - 1
197 | # (..., S * 2).
198 | inds = torch.cat([floor, ceil], dim=-1)
199 |
200 | # (..., S).
201 | cdf0, cdf1 = cdf.gather(-1, inds).split(S, dim=-1)
202 | b0, b1 = bins.gather(-1, inds).split(S, dim=-1)
203 |
204 | # (..., S). Linear interpolation in 1D.
205 | t = (u - cdf0) / torch.clamp(cdf1 - cdf0, min=eps)
206 | # Sample centers.
207 | centers = b0 + t * (b1 - b0)
208 |
209 | samples = (centers[..., 1:] + centers[..., :-1]) / 2
210 | samples = torch.cat(
211 | [
212 | (2 * centers[..., :1] - samples[..., :1]).clamp_min(vmin),
213 | samples,
214 | (2 * centers[..., -1:] - samples[..., -1:]).clamp_max(vmax),
215 | ],
216 | dim=-1,
217 | )
218 |
219 | return samples, centers
220 |
--------------------------------------------------------------------------------
/nerfacc/version.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) 2022 Ruilong Li, UC Berkeley.
3 | """
4 |
5 | __version__ = "0.5.3"
6 |
--------------------------------------------------------------------------------
/scripts/run_aws_listing.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from collections import defaultdict
3 |
4 | from boto3 import resource
5 |
6 | parser = argparse.ArgumentParser()
7 | parser.add_argument("--bucket", type=str, required=True)
8 | parser.add_argument("--region", type=str, required=True)
9 | args = parser.parse_args()
10 |
11 | ROOT_URL = f"https://{args.bucket}.s3.{args.region}.amazonaws.com/whl"
12 | html = "\n\n\n{}\n\n"
13 | href = ' {}
'
14 | html_args = {
15 | "ContentType": "text/html",
16 | "CacheControl": "max-age=300",
17 | "ACL": "public-read",
18 | }
19 |
20 | bucket = resource("s3").Bucket(name="nerfacc-bucket")
21 |
22 | wheels_dict = defaultdict(list)
23 | for obj in bucket.objects.filter(Prefix="whl"):
24 | if obj.key[-3:] != "whl":
25 | continue
26 | torch_version, wheel = obj.key.split("/")[-2:]
27 | wheel = f"{torch_version}/{wheel}"
28 | wheels_dict[torch_version].append(wheel)
29 |
30 | index_html = html.format(
31 | "\n".join(
32 | [
33 | href.format(
34 | f"{torch_version}.html".replace("+", "%2B"), torch_version
35 | )
36 | for torch_version in wheels_dict
37 | ]
38 | )
39 | )
40 |
41 | with open("index.html", "w") as f:
42 | f.write(index_html)
43 | bucket.Object("whl/index.html").upload_file("index.html", html_args)
44 |
45 | for torch_version, wheel_names in wheels_dict.items():
46 | torch_version_html = html.format(
47 | "\n".join(
48 | [
49 | href.format(
50 | f"{ROOT_URL}/{wheel_name}".replace("+", "%2B"),
51 | wheel_name.split("/")[-1],
52 | )
53 | for wheel_name in wheel_names
54 | ]
55 | )
56 | )
57 |
58 | with open(f"{torch_version}.html", "w") as f:
59 | f.write(torch_version_html)
60 | bucket.Object(f"whl/{torch_version}.html").upload_file(
61 | f"{torch_version}.html", html_args
62 | )
63 |
--------------------------------------------------------------------------------
/scripts/run_dev_checks.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Simple yaml debugger"""
3 | import subprocess
4 |
5 | import yaml
6 | from rich.console import Console
7 | from rich.style import Style
8 |
9 | console = Console(width=120)
10 |
11 | LOCAL_TESTS = [
12 | "Run license checks",
13 | "Run isort",
14 | "Run Black",
15 | "Python Pylint",
16 | "Test with pytest",
17 | ]
18 |
19 |
20 | def run_command(command: str) -> bool:
21 | """Run a command kill actions if it fails
22 |
23 | Args:
24 | command: command to run
25 | continue_on_fail: whether to continue running commands if the current one fails.
26 | """
27 | ret_code = subprocess.call(command, shell=True)
28 | if ret_code != 0:
29 | console.print(f"[bold red]Error: `{command}` failed.")
30 | return ret_code == 0
31 |
32 |
33 | def run_github_actions_file(filename: str):
34 | """Run a github actions file locally.
35 |
36 | Args:
37 | filename: Which yml github actions file to run.
38 | """
39 | with open(filename, "rb") as f:
40 | my_dict = yaml.safe_load(f)
41 | steps = my_dict["jobs"]["build"]["steps"]
42 |
43 | success = True
44 |
45 | for step in steps:
46 | if "name" in step and step["name"] in LOCAL_TESTS:
47 | compressed = step["run"].replace("\n", ";").replace("\\", "")
48 | compressed = compressed.replace("--check", "")
49 | curr_command = f"{compressed}"
50 |
51 | console.line()
52 | console.rule(f"[bold green]Running: {curr_command}")
53 | success = success and run_command(curr_command)
54 | else:
55 | skip_name = step["name"] if "name" in step else step["uses"]
56 | console.print(f"Skipping {skip_name}")
57 |
58 | # Code Testing
59 | console.line()
60 | console.rule("[bold green]Running pytest")
61 | success = success and run_command("pytest")
62 |
63 | # Add checks for building documentation
64 | console.line()
65 | console.rule("[bold green]Building Documentation")
66 | success = success and run_command(
67 | "cd docs/; make clean; make html SPHINXOPTS='-W;'"
68 | )
69 |
70 | if success:
71 | console.line()
72 | console.rule(characters="=")
73 | console.print(
74 | "[bold green]:TADA: :TADA: :TADA: ALL CHECKS PASSED :TADA: :TADA: :TADA:",
75 | justify="center",
76 | )
77 | console.rule(characters="=")
78 | else:
79 | console.line()
80 | console.rule(characters="=", style=Style(color="red"))
81 | console.print(
82 | "[bold red]:skull: :skull: :skull: ERRORS FOUND :skull: :skull: :skull:",
83 | justify="center",
84 | )
85 | console.rule(characters="=", style=Style(color="red"))
86 |
87 |
88 | if __name__ == "__main__":
89 | run_github_actions_file(filename=".github/workflows/code_checks.yml")
90 |
--------------------------------------------------------------------------------
/scripts/run_profiler.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | import torch
4 | import tqdm
5 |
6 | import nerfacc
7 |
8 | # timing
9 | # https://github.com/pytorch/pytorch/commit/d2784c233bfc57a1d836d961694bcc8ec4ed45e4
10 |
11 |
12 | class Profiler:
13 | def __init__(self, warmup=10, repeat=1000):
14 | self.warmup = warmup
15 | self.repeat = repeat
16 |
17 | def __call__(self, func: Callable):
18 | # warmup
19 | for _ in range(self.warmup):
20 | func()
21 | torch.cuda.synchronize()
22 |
23 | # profile
24 | with torch.profiler.profile(
25 | activities=[
26 | torch.profiler.ProfilerActivity.CPU,
27 | torch.profiler.ProfilerActivity.CUDA,
28 | ],
29 | profile_memory=True,
30 | ) as prof:
31 | for _ in range(self.repeat):
32 | func()
33 | torch.cuda.synchronize()
34 |
35 | # return
36 | events = prof.key_averages()
37 | # print(events.table(sort_by="self_cpu_time_total", row_limit=10))
38 | self_cpu_time_total = (
39 | sum([event.self_cpu_time_total for event in events]) / self.repeat
40 | )
41 | self_cuda_time_total = (
42 | sum([event.self_cuda_time_total for event in events]) / self.repeat
43 | )
44 | self_cuda_memory_usage = max(
45 | [event.self_cuda_memory_usage for event in events]
46 | )
47 | return (
48 | self_cpu_time_total, # in us
49 | self_cuda_time_total, # in us
50 | self_cuda_memory_usage, # in bytes
51 | )
52 |
53 |
54 | def main():
55 | device = "cuda:0"
56 | torch.manual_seed(42)
57 | profiler = Profiler(warmup=10, repeat=100)
58 |
59 | # # contract
60 | # print("* contract")
61 | # x = torch.rand([1024, 3], device=device)
62 | # roi = torch.tensor([0, 0, 0, 1, 1, 1], dtype=torch.float32, device=device)
63 | # fn = lambda: nerfacc.contract(
64 | # x, roi=roi, type=nerfacc.ContractionType.UN_BOUNDED_TANH
65 | # )
66 | # cpu_t, cuda_t, cuda_bytes = profiler(fn)
67 | # print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB")
68 |
69 | # rendering
70 | print("* rendering")
71 | batch_size = 81920
72 | rays_o = torch.rand((batch_size, 3), device=device)
73 | rays_d = torch.randn((batch_size, 3), device=device)
74 | rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
75 |
76 | ray_indices, t_starts, t_ends = nerfacc._ray_marching(
77 | rays_o,
78 | rays_d,
79 | near_plane=0.1,
80 | far_plane=1.0,
81 | render_step_size=1e-1,
82 | )
83 | sigmas = torch.randn_like(t_starts, requires_grad=True)
84 | fn = (
85 | lambda: nerfacc.render_weight_from_density(
86 | ray_indices, t_starts, t_ends, sigmas
87 | )
88 | .sum()
89 | .backward()
90 | )
91 | fn()
92 | torch.cuda.synchronize()
93 | for _ in tqdm.tqdm(range(100)):
94 | fn()
95 | torch.cuda.synchronize()
96 |
97 | cpu_t, cuda_t, cuda_bytes = profiler(fn)
98 | print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB")
99 |
100 | packed_info = nerfacc.pack_info(ray_indices, n_rays=batch_size)
101 | fn = (
102 | lambda: nerfacc._vol_rendering._RenderingDensity.apply(
103 | packed_info, t_starts, t_ends, sigmas, 0
104 | )
105 | .sum()
106 | .backward()
107 | )
108 | fn()
109 | torch.cuda.synchronize()
110 | for _ in tqdm.tqdm(range(100)):
111 | fn()
112 | torch.cuda.synchronize()
113 | cpu_t, cuda_t, cuda_bytes = profiler(fn)
114 | print(f"{cpu_t:.2f} us, {cuda_t:.2f} us, {cuda_bytes / 1024 / 1024:.2f} MB")
115 |
116 |
117 | if __name__ == "__main__":
118 | main()
119 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | multi_line_output = 3
3 | line_length = 80
4 | include_trailing_comma = true
5 | skip=./examples/pycolmap,./benchmarks
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import os.path as osp
4 | import platform
5 | import sys
6 |
7 | from setuptools import find_packages, setup
8 |
9 | __version__ = None
10 | exec(open("nerfacc/version.py", "r").read())
11 |
12 | URL = "https://github.com/nerfstudio-project/nerfacc"
13 |
14 | BUILD_NO_CUDA = os.getenv("BUILD_NO_CUDA", "0") == "1"
15 | WITH_SYMBOLS = os.getenv("WITH_SYMBOLS", "0") == "1"
16 |
17 |
18 | def get_ext():
19 | from torch.utils.cpp_extension import BuildExtension
20 |
21 | return BuildExtension.with_options(
22 | no_python_abi_suffix=True, use_ninja=False
23 | )
24 |
25 |
26 | def get_extensions():
27 | import torch
28 | from torch.__config__ import parallel_info
29 | from torch.utils.cpp_extension import CUDAExtension
30 |
31 | extensions_dir = osp.join("nerfacc", "cuda", "csrc")
32 | sources = glob.glob(osp.join(extensions_dir, "*.cu")) + glob.glob(
33 | osp.join(extensions_dir, "*.cpp")
34 | )
35 | # remove generated 'hip' files, in case of rebuilds
36 | sources = [path for path in sources if "hip" not in path]
37 |
38 | undef_macros = []
39 | define_macros = []
40 |
41 | if sys.platform == "win32":
42 | define_macros += [("nerfacc_EXPORTS", None)]
43 |
44 | extra_compile_args = {"cxx": ["-O3"]}
45 | if not os.name == "nt": # Not on Windows:
46 | extra_compile_args["cxx"] += ["-Wno-sign-compare"]
47 | extra_link_args = [] if WITH_SYMBOLS else ["-s"]
48 |
49 | info = parallel_info()
50 | if (
51 | "backend: OpenMP" in info
52 | and "OpenMP not found" not in info
53 | and sys.platform != "darwin"
54 | ):
55 | extra_compile_args["cxx"] += ["-DAT_PARALLEL_OPENMP"]
56 | if sys.platform == "win32":
57 | extra_compile_args["cxx"] += ["/openmp"]
58 | else:
59 | extra_compile_args["cxx"] += ["-fopenmp"]
60 | else:
61 | print("Compiling without OpenMP...")
62 |
63 | # Compile for mac arm64
64 | if sys.platform == "darwin" and platform.machine() == "arm64":
65 | extra_compile_args["cxx"] += ["-arch", "arm64"]
66 | extra_link_args += ["-arch", "arm64"]
67 |
68 | nvcc_flags = os.getenv("NVCC_FLAGS", "")
69 | nvcc_flags = [] if nvcc_flags == "" else nvcc_flags.split(" ")
70 | nvcc_flags += ["-O3"]
71 | if torch.version.hip:
72 | # USE_ROCM was added to later versions of PyTorch.
73 | # Define here to support older PyTorch versions as well:
74 | define_macros += [("USE_ROCM", None)]
75 | undef_macros += ["__HIP_NO_HALF_CONVERSIONS__"]
76 | else:
77 | nvcc_flags += ["--expt-relaxed-constexpr"]
78 | extra_compile_args["nvcc"] = nvcc_flags
79 |
80 | extension = CUDAExtension(
81 | f"nerfacc.csrc",
82 | sources,
83 | include_dirs=[osp.join(extensions_dir, "include")],
84 | define_macros=define_macros,
85 | undef_macros=undef_macros,
86 | extra_compile_args=extra_compile_args,
87 | extra_link_args=extra_link_args,
88 | )
89 |
90 | return [extension]
91 |
92 |
93 | # work-around hipify abs paths
94 | include_package_data = True
95 | # if torch.cuda.is_available() and torch.version.hip:
96 | # include_package_data = False
97 |
98 | setup(
99 | name="nerfacc",
100 | version=__version__,
101 | description="A General NeRF Acceleration Toolbox",
102 | author="Ruilong",
103 | author_email="ruilongli94@gmail.com",
104 | url=URL,
105 | download_url=f"{URL}/archive/{__version__}.tar.gz",
106 | keywords=[],
107 | python_requires=">=3.7",
108 | install_requires=[
109 | "rich>=12",
110 | "torch",
111 | "typing_extensions; python_version<'3.8'",
112 | ],
113 | extras_require={
114 | # dev dependencies. Install them by `pip install nerfacc[dev]`
115 | "dev": [
116 | "black[jupyter]==22.3.0",
117 | "isort==5.10.1",
118 | "pylint==2.13.4",
119 | "pytest==7.1.2",
120 | "pytest-xdist==2.5.0",
121 | "typeguard>=2.13.3",
122 | "pyyaml==6.0",
123 | "build",
124 | "twine",
125 | "ninja",
126 | ],
127 | },
128 | ext_modules=get_extensions() if not BUILD_NO_CUDA else [],
129 | cmdclass={"build_ext": get_ext()} if not BUILD_NO_CUDA else {},
130 | packages=find_packages(),
131 | include_package_data=include_package_data,
132 | )
133 |
--------------------------------------------------------------------------------
/tests/test_camera.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 |
3 | import pytest
4 | import torch
5 | import tqdm
6 | from torch import Tensor
7 |
8 | device = "cuda:0"
9 |
10 |
11 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
12 | @torch.no_grad()
13 | def test_opencv_lens_undistortion():
14 | from nerfacc.cameras import (
15 | _opencv_lens_distortion,
16 | _opencv_lens_distortion_fisheye,
17 | _opencv_lens_undistortion,
18 | opencv_lens_undistortion,
19 | opencv_lens_undistortion_fisheye,
20 | )
21 |
22 | torch.manual_seed(42)
23 |
24 | x = torch.rand((3, 1000, 2), device=device)
25 |
26 | params = torch.rand((8), device=device) * 0.01
27 | x_undistort = opencv_lens_undistortion(x, params, 1e-5, 10)
28 | _x_undistort = _opencv_lens_undistortion(x, params, 1e-5, 10)
29 | assert torch.allclose(x_undistort, _x_undistort, atol=1e-5)
30 | x_distort = _opencv_lens_distortion(x_undistort, params)
31 | assert torch.allclose(x, x_distort, atol=1e-5), (x - x_distort).abs().max()
32 | # print(x[0, 0], x_distort[0, 0], x_undistort[0, 0])
33 |
34 | params = torch.rand((4), device=device) * 0.01
35 | x_undistort = opencv_lens_undistortion_fisheye(x, params, 1e-5, 10)
36 | x_distort = _opencv_lens_distortion_fisheye(x_undistort, params)
37 | assert torch.allclose(x, x_distort, atol=1e-5), (x - x_distort).abs().max()
38 | # print(x[0, 0], x_distort[0, 0], x_undistort[0, 0])
39 |
40 |
41 | if __name__ == "__main__":
42 | test_opencv_lens_undistortion()
43 |
--------------------------------------------------------------------------------
/tests/test_pack.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import torch
3 |
4 | device = "cuda:0"
5 |
6 |
7 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
8 | def test_pack_info():
9 | from nerfacc.pack import pack_info
10 |
11 | _packed_info = torch.tensor(
12 | [[0, 1], [1, 0], [1, 4]], dtype=torch.int32, device=device
13 | )
14 | ray_indices = torch.tensor(
15 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device
16 | )
17 | packed_info = pack_info(ray_indices, n_rays=_packed_info.shape[0])
18 | assert (packed_info == _packed_info).all()
19 |
20 |
21 | if __name__ == "__main__":
22 | test_pack_info()
23 |
--------------------------------------------------------------------------------
/tests/test_pdf.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import torch
3 |
4 | device = "cuda:0"
5 |
6 |
7 | def _create_intervals(n_rays, n_samples, flat=False):
8 | from nerfacc.data_specs import RayIntervals
9 |
10 | torch.manual_seed(42)
11 | vals = torch.rand((n_rays, n_samples + 1), device=device)
12 | vals = torch.sort(vals, -1)[0]
13 |
14 | sample_masks = torch.rand((n_rays, n_samples), device=device) > 0.5
15 | is_lefts = torch.cat(
16 | [
17 | sample_masks,
18 | torch.zeros((n_rays, 1), device=device, dtype=torch.bool),
19 | ],
20 | dim=-1,
21 | )
22 | is_rights = torch.cat(
23 | [
24 | torch.zeros((n_rays, 1), device=device, dtype=torch.bool),
25 | sample_masks,
26 | ],
27 | dim=-1,
28 | )
29 | if not flat:
30 | return RayIntervals(vals=vals)
31 | else:
32 | interval_masks = is_lefts | is_rights
33 | vals = vals[interval_masks]
34 | is_lefts = is_lefts[interval_masks]
35 | is_rights = is_rights[interval_masks]
36 | chunk_cnts = (interval_masks).long().sum(-1)
37 | chunk_starts = torch.cumsum(chunk_cnts, 0) - chunk_cnts
38 | packed_info = torch.stack([chunk_starts, chunk_cnts], -1)
39 |
40 | return RayIntervals(
41 | vals, packed_info, is_left=is_lefts, is_right=is_rights
42 | )
43 |
44 |
45 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
46 | def test_searchsorted():
47 | from nerfacc.data_specs import RayIntervals
48 | from nerfacc.pdf import searchsorted
49 |
50 | torch.manual_seed(42)
51 | query: RayIntervals = _create_intervals(10, 100, flat=False)
52 | key: RayIntervals = _create_intervals(10, 100, flat=False)
53 |
54 | ids_left, ids_right = searchsorted(key, query)
55 | y = key.vals.gather(-1, ids_right)
56 |
57 | _ids_right = torch.searchsorted(key.vals, query.vals, right=True)
58 | _ids_right = torch.clamp(_ids_right, 0, key.vals.shape[-1] - 1)
59 | _y = key.vals.gather(-1, _ids_right)
60 |
61 | assert torch.allclose(ids_right, _ids_right)
62 | assert torch.allclose(y, _y)
63 |
64 |
65 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
66 | def test_importance_sampling():
67 | from nerfacc.data_specs import RayIntervals
68 | from nerfacc.pdf import _sample_from_weighted, importance_sampling
69 |
70 | torch.manual_seed(42)
71 | intervals: RayIntervals = _create_intervals(5, 100, flat=False)
72 | cdfs = torch.rand_like(intervals.vals)
73 | cdfs = torch.sort(cdfs, -1)[0]
74 | n_intervels_per_ray = 100
75 | stratified = False
76 |
77 | _intervals, _samples = importance_sampling(
78 | intervals,
79 | cdfs,
80 | n_intervels_per_ray,
81 | stratified,
82 | )
83 |
84 | for i in range(intervals.vals.shape[0]):
85 | _vals, _mids = _sample_from_weighted(
86 | intervals.vals[i : i + 1],
87 | cdfs[i : i + 1, 1:] - cdfs[i : i + 1, :-1],
88 | n_intervels_per_ray,
89 | stratified,
90 | intervals.vals[i].min(),
91 | intervals.vals[i].max(),
92 | )
93 | assert torch.allclose(_intervals.vals[i : i + 1], _vals, atol=1e-4)
94 | assert torch.allclose(_samples.vals[i : i + 1], _mids, atol=1e-4)
95 |
96 |
97 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
98 | def test_pdf_loss():
99 | from nerfacc.data_specs import RayIntervals
100 | from nerfacc.estimators.prop_net import _lossfun_outer, _pdf_loss
101 | from nerfacc.pdf import _sample_from_weighted, importance_sampling
102 |
103 | torch.manual_seed(42)
104 | intervals: RayIntervals = _create_intervals(5, 100, flat=False)
105 | cdfs = torch.rand_like(intervals.vals)
106 | cdfs = torch.sort(cdfs, -1)[0]
107 | n_intervels_per_ray = 10
108 | stratified = False
109 |
110 | _intervals, _samples = importance_sampling(
111 | intervals,
112 | cdfs,
113 | n_intervels_per_ray,
114 | stratified,
115 | )
116 | _cdfs = torch.rand_like(_intervals.vals)
117 | _cdfs = torch.sort(_cdfs, -1)[0]
118 |
119 | loss = _pdf_loss(intervals, cdfs, _intervals, _cdfs)
120 |
121 | loss2 = _lossfun_outer(
122 | intervals.vals,
123 | cdfs[:, 1:] - cdfs[:, :-1],
124 | _intervals.vals,
125 | _cdfs[:, 1:] - _cdfs[:, :-1],
126 | )
127 | assert torch.allclose(loss, loss2, atol=1e-4)
128 |
129 |
130 | if __name__ == "__main__":
131 | test_importance_sampling()
132 | test_searchsorted()
133 | test_pdf_loss()
134 |
--------------------------------------------------------------------------------
/tests/test_rendering.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import torch
3 |
4 | device = "cuda:0"
5 |
6 |
7 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
8 | def test_render_visibility():
9 | from nerfacc.volrend import render_visibility_from_alpha
10 |
11 | ray_indices = torch.tensor(
12 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device
13 | ) # (all_samples,)
14 | alphas = torch.tensor(
15 | [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
16 | ) # (all_samples,)
17 |
18 | # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]
19 | vis = render_visibility_from_alpha(
20 | alphas, ray_indices=ray_indices, early_stop_eps=0.03, alpha_thre=0.0
21 | )
22 | vis_tgt = torch.tensor(
23 | [True, True, True, True, False], dtype=torch.bool, device=device
24 | )
25 | assert torch.allclose(vis, vis_tgt)
26 |
27 | # transmittance: [1.0, 1.0, 1.0, 0.2, 0.04]
28 | vis = render_visibility_from_alpha(
29 | alphas, ray_indices=ray_indices, early_stop_eps=0.05, alpha_thre=0.35
30 | )
31 | vis_tgt = torch.tensor(
32 | [True, False, True, True, False], dtype=torch.bool, device=device
33 | )
34 | assert torch.allclose(vis, vis_tgt)
35 |
36 |
37 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
38 | def test_render_weight_from_alpha():
39 | from nerfacc.volrend import render_weight_from_alpha
40 |
41 | ray_indices = torch.tensor(
42 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device
43 | ) # (all_samples,)
44 | alphas = torch.tensor(
45 | [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
46 | ) # (all_samples,)
47 |
48 | # transmittance: [1.0, 1.0, 0.7, 0.14, 0.028]
49 | weights, _ = render_weight_from_alpha(
50 | alphas, ray_indices=ray_indices, n_rays=3
51 | )
52 | weights_tgt = torch.tensor(
53 | [1.0 * 0.4, 1.0 * 0.3, 0.7 * 0.8, 0.14 * 0.8, 0.028 * 0.5],
54 | dtype=torch.float32,
55 | device=device,
56 | )
57 | assert torch.allclose(weights, weights_tgt)
58 |
59 |
60 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
61 | def test_render_weight_from_density():
62 | from nerfacc.volrend import (
63 | render_weight_from_alpha,
64 | render_weight_from_density,
65 | )
66 |
67 | ray_indices = torch.tensor(
68 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device
69 | ) # (all_samples,)
70 | sigmas = torch.rand(
71 | (ray_indices.shape[0],), device=device
72 | ) # (all_samples,)
73 | t_starts = torch.rand_like(sigmas)
74 | t_ends = torch.rand_like(sigmas) + 1.0
75 | alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
76 |
77 | weights, _, _ = render_weight_from_density(
78 | t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
79 | )
80 | weights_tgt, _ = render_weight_from_alpha(
81 | alphas, ray_indices=ray_indices, n_rays=3
82 | )
83 | assert torch.allclose(weights, weights_tgt)
84 |
85 |
86 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
87 | def test_accumulate_along_rays():
88 | from nerfacc.volrend import accumulate_along_rays
89 |
90 | ray_indices = torch.tensor(
91 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device
92 | ) # (all_samples,)
93 | weights = torch.tensor(
94 | [0.4, 0.3, 0.8, 0.8, 0.5], dtype=torch.float32, device=device
95 | ) # (all_samples,)
96 | values = torch.rand((5, 2), device=device) # (all_samples, 2)
97 |
98 | ray_values = accumulate_along_rays(
99 | weights, values=values, ray_indices=ray_indices, n_rays=3
100 | )
101 | assert ray_values.shape == (3, 2)
102 | assert torch.allclose(ray_values[0, :], weights[0, None] * values[0, :])
103 | assert (ray_values[1, :] == 0).all()
104 | assert torch.allclose(
105 | ray_values[2, :], (weights[1:, None] * values[1:]).sum(dim=0)
106 | )
107 |
108 |
109 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
110 | def test_grads():
111 | from nerfacc.volrend import (
112 | render_transmittance_from_density,
113 | render_weight_from_alpha,
114 | render_weight_from_density,
115 | )
116 |
117 | ray_indices = torch.tensor(
118 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device
119 | ) # (all_samples,)
120 | packed_info = torch.tensor(
121 | [[0, 1], [1, 0], [1, 4]], dtype=torch.long, device=device
122 | )
123 | sigmas = torch.tensor([0.4, 0.8, 0.1, 0.8, 0.1], device=device)
124 | sigmas.requires_grad = True
125 | t_starts = torch.rand_like(sigmas)
126 | t_ends = t_starts + 1.0
127 |
128 | weights_ref = torch.tensor(
129 | [0.3297, 0.5507, 0.0428, 0.2239, 0.0174], device=device
130 | )
131 | sigmas_grad_ref = torch.tensor(
132 | [0.6703, 0.1653, 0.1653, 0.1653, 0.1653], device=device
133 | )
134 |
135 | # naive impl. trans from sigma
136 | trans, _ = render_transmittance_from_density(
137 | t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
138 | )
139 | weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))
140 | weights.sum().backward()
141 | sigmas_grad = sigmas.grad.clone()
142 | sigmas.grad.zero_()
143 | assert torch.allclose(weights_ref, weights, atol=1e-4)
144 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
145 |
146 | # naive impl. trans from alpha
147 | trans, _ = render_transmittance_from_density(
148 | t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3
149 | )
150 | weights = trans * (1.0 - torch.exp(-sigmas * (t_ends - t_starts)))
151 | weights.sum().backward()
152 | sigmas_grad = sigmas.grad.clone()
153 | sigmas.grad.zero_()
154 | assert torch.allclose(weights_ref, weights, atol=1e-4)
155 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
156 |
157 | weights, _, _ = render_weight_from_density(
158 | t_starts, t_ends, sigmas, ray_indices=ray_indices, n_rays=3
159 | )
160 | weights.sum().backward()
161 | sigmas_grad = sigmas.grad.clone()
162 | sigmas.grad.zero_()
163 | assert torch.allclose(weights_ref, weights, atol=1e-4)
164 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
165 |
166 | weights, _, _ = render_weight_from_density(
167 | t_starts, t_ends, sigmas, packed_info=packed_info, n_rays=3
168 | )
169 | weights.sum().backward()
170 | sigmas_grad = sigmas.grad.clone()
171 | sigmas.grad.zero_()
172 | assert torch.allclose(weights_ref, weights, atol=1e-4)
173 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
174 |
175 | alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
176 | weights, _ = render_weight_from_alpha(
177 | alphas, ray_indices=ray_indices, n_rays=3
178 | )
179 | weights.sum().backward()
180 | sigmas_grad = sigmas.grad.clone()
181 | sigmas.grad.zero_()
182 | assert torch.allclose(weights_ref, weights, atol=1e-4)
183 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
184 |
185 | alphas = 1.0 - torch.exp(-sigmas * (t_ends - t_starts))
186 | weights, _ = render_weight_from_alpha(
187 | alphas, packed_info=packed_info, n_rays=3
188 | )
189 | weights.sum().backward()
190 | sigmas_grad = sigmas.grad.clone()
191 | sigmas.grad.zero_()
192 | assert torch.allclose(weights_ref, weights, atol=1e-4)
193 | assert torch.allclose(sigmas_grad_ref, sigmas_grad, atol=1e-4)
194 |
195 |
196 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
197 | def test_rendering():
198 | from nerfacc.volrend import rendering
199 |
200 | def rgb_sigma_fn(t_starts, t_ends, ray_indices):
201 | return torch.stack([t_starts] * 3, dim=-1), t_starts
202 |
203 | ray_indices = torch.tensor(
204 | [0, 2, 2, 2, 2], dtype=torch.int64, device=device
205 | ) # (all_samples,)
206 | sigmas = torch.rand(
207 | (ray_indices.shape[0],), device=device
208 | ) # (all_samples,)
209 | t_starts = torch.rand_like(sigmas)
210 | t_ends = torch.rand_like(sigmas) + 1.0
211 |
212 | _, _, _, _ = rendering(
213 | t_starts,
214 | t_ends,
215 | ray_indices=ray_indices,
216 | n_rays=3,
217 | rgb_sigma_fn=rgb_sigma_fn,
218 | )
219 |
220 |
221 | if __name__ == "__main__":
222 | test_render_visibility()
223 | test_render_weight_from_alpha()
224 | test_render_weight_from_density()
225 | test_accumulate_along_rays()
226 | test_grads()
227 | test_rendering()
228 |
--------------------------------------------------------------------------------
/tests/test_scan.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import torch
3 |
4 | device = "cuda:0"
5 |
6 |
7 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
8 | def test_inclusive_sum():
9 | from nerfacc.scan import inclusive_sum
10 |
11 | torch.manual_seed(42)
12 |
13 | data = torch.rand((5, 1000), device=device, requires_grad=True)
14 | outputs1 = inclusive_sum(data)
15 | outputs1 = outputs1.flatten()
16 | outputs1.sum().backward()
17 | grad1 = data.grad.clone()
18 | data.grad.zero_()
19 |
20 | chunk_starts = torch.arange(
21 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long
22 | )
23 | chunk_cnts = torch.full(
24 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device
25 | )
26 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
27 | flatten_data = data.flatten()
28 | outputs2 = inclusive_sum(flatten_data, packed_info=packed_info)
29 | outputs2.sum().backward()
30 | grad2 = data.grad.clone()
31 | data.grad.zero_()
32 |
33 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
34 | indices = indices.repeat_interleave(data.shape[1])
35 | indices = indices.flatten()
36 | outputs3 = inclusive_sum(flatten_data, indices=indices)
37 | outputs3.sum().backward()
38 | grad3 = data.grad.clone()
39 | data.grad.zero_()
40 |
41 | assert torch.allclose(outputs1, outputs2)
42 | assert torch.allclose(grad1, grad2)
43 |
44 | assert torch.allclose(outputs1, outputs3)
45 | assert torch.allclose(grad1, grad3)
46 |
47 |
48 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
49 | def test_exclusive_sum():
50 | from nerfacc.scan import exclusive_sum
51 |
52 | torch.manual_seed(42)
53 |
54 | data = torch.rand((5, 1000), device=device, requires_grad=True)
55 | outputs1 = exclusive_sum(data)
56 | outputs1 = outputs1.flatten()
57 | outputs1.sum().backward()
58 | grad1 = data.grad.clone()
59 | data.grad.zero_()
60 |
61 | chunk_starts = torch.arange(
62 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long
63 | )
64 | chunk_cnts = torch.full(
65 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device
66 | )
67 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
68 | flatten_data = data.flatten()
69 | outputs2 = exclusive_sum(flatten_data, packed_info=packed_info)
70 | outputs2.sum().backward()
71 | grad2 = data.grad.clone()
72 | data.grad.zero_()
73 |
74 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
75 | indices = indices.repeat_interleave(data.shape[1])
76 | indices = indices.flatten()
77 | outputs3 = exclusive_sum(flatten_data, indices=indices)
78 | outputs3.sum().backward()
79 | grad3 = data.grad.clone()
80 | data.grad.zero_()
81 |
82 | # TODO: check exclusive sum. numeric error?
83 | # print((outputs1 - outputs2).abs().max()) # 0.0002
84 | assert torch.allclose(outputs1, outputs2, atol=3e-4)
85 | assert torch.allclose(grad1, grad2)
86 |
87 | assert torch.allclose(outputs1, outputs3, atol=3e-4)
88 | assert torch.allclose(grad1, grad3)
89 |
90 |
91 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
92 | def test_inclusive_prod():
93 | from nerfacc.scan import inclusive_prod
94 |
95 | torch.manual_seed(42)
96 |
97 | data = torch.rand((5, 1000), device=device, requires_grad=True)
98 | outputs1 = inclusive_prod(data)
99 | outputs1 = outputs1.flatten()
100 | outputs1.sum().backward()
101 | grad1 = data.grad.clone()
102 | data.grad.zero_()
103 |
104 | chunk_starts = torch.arange(
105 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long
106 | )
107 | chunk_cnts = torch.full(
108 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device
109 | )
110 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
111 | flatten_data = data.flatten()
112 | outputs2 = inclusive_prod(flatten_data, packed_info=packed_info)
113 | outputs2.sum().backward()
114 | grad2 = data.grad.clone()
115 | data.grad.zero_()
116 |
117 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
118 | indices = indices.repeat_interleave(data.shape[1])
119 | indices = indices.flatten()
120 | outputs3 = inclusive_prod(flatten_data, indices=indices)
121 | outputs3.sum().backward()
122 | grad3 = data.grad.clone()
123 | data.grad.zero_()
124 |
125 | assert torch.allclose(outputs1, outputs2)
126 | assert torch.allclose(grad1, grad2)
127 |
128 | assert torch.allclose(outputs1, outputs3)
129 | assert torch.allclose(grad1, grad3)
130 |
131 |
132 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
133 | def test_exclusive_prod():
134 | from nerfacc.scan import exclusive_prod
135 |
136 | torch.manual_seed(42)
137 |
138 | data = torch.rand((5, 1000), device=device, requires_grad=True)
139 | outputs1 = exclusive_prod(data)
140 | outputs1 = outputs1.flatten()
141 | outputs1.sum().backward()
142 | grad1 = data.grad.clone()
143 | data.grad.zero_()
144 |
145 | chunk_starts = torch.arange(
146 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long
147 | )
148 | chunk_cnts = torch.full(
149 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device
150 | )
151 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
152 | flatten_data = data.flatten()
153 | outputs2 = exclusive_prod(flatten_data, packed_info=packed_info)
154 | outputs2.sum().backward()
155 | grad2 = data.grad.clone()
156 | data.grad.zero_()
157 |
158 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
159 | indices = indices.repeat_interleave(data.shape[1])
160 | indices = indices.flatten()
161 | outputs3 = exclusive_prod(flatten_data, indices=indices)
162 | outputs3.sum().backward()
163 | grad3 = data.grad.clone()
164 | data.grad.zero_()
165 |
166 | # TODO: check exclusive sum. numeric error?
167 | # print((outputs1 - outputs2).abs().max())
168 | assert torch.allclose(outputs1, outputs2)
169 | assert torch.allclose(grad1, grad2)
170 |
171 | assert torch.allclose(outputs1, outputs3)
172 | assert torch.allclose(grad1, grad3)
173 |
174 |
175 | def profile():
176 | import tqdm
177 |
178 | from nerfacc.scan import inclusive_sum
179 |
180 | torch.manual_seed(42)
181 |
182 | data = torch.rand((8192, 8192), device=device, requires_grad=True)
183 |
184 | chunk_starts = torch.arange(
185 | 0, data.numel(), data.shape[1], device=device, dtype=torch.long
186 | )
187 | chunk_cnts = torch.full(
188 | (data.shape[0],), data.shape[1], dtype=torch.long, device=device
189 | )
190 | packed_info = torch.stack([chunk_starts, chunk_cnts], dim=-1)
191 | flatten_data = data.flatten()
192 | torch.cuda.synchronize()
193 | for _ in tqdm.trange(2000):
194 | outputs2 = inclusive_sum(flatten_data, packed_info=packed_info)
195 | outputs2.sum().backward()
196 |
197 | indices = torch.arange(data.shape[0], device=device, dtype=torch.long)
198 | indices = indices.repeat_interleave(data.shape[1])
199 | indices = indices.flatten()
200 | torch.cuda.synchronize()
201 | for _ in tqdm.trange(2000):
202 | outputs3 = inclusive_sum(flatten_data, indices=indices)
203 | outputs3.sum().backward()
204 |
205 |
206 | if __name__ == "__main__":
207 | test_inclusive_sum()
208 | test_exclusive_sum()
209 | test_inclusive_prod()
210 | test_exclusive_prod()
211 | profile()
212 |
--------------------------------------------------------------------------------
/tests/test_vdb.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import pytest
4 | import torch
5 |
6 | device = "cuda:0"
7 |
8 |
9 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
10 | def test_traverse_vdbs():
11 | try:
12 | import fvdb
13 | except ImportError:
14 | warnings.warn("fVDB is not installed. Skip the test.")
15 | return
16 |
17 | from nerfacc.estimators.vdb import traverse_vdbs
18 | from nerfacc.grid import _enlarge_aabb, traverse_grids
19 |
20 | torch.manual_seed(42)
21 | n_rays = 100
22 | n_aabbs = 1
23 | reso = 32
24 | cone_angle = 1e-3
25 |
26 | rays_o = torch.randn((n_rays, 3), device=device)
27 | rays_d = torch.randn((n_rays, 3), device=device)
28 | rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
29 |
30 | base_aabb = torch.tensor([-1.0, -1.0, -1.0, 1.0, 1.0, 1.0], device=device)
31 | aabbs = torch.stack(
32 | [_enlarge_aabb(base_aabb, 2**i) for i in range(n_aabbs)]
33 | )
34 |
35 | # Traverse 1-level cascaded grid
36 | binaries = torch.rand((n_aabbs, reso, reso, reso), device=device) > 0.5
37 |
38 | intervals, samples, _ = traverse_grids(
39 | rays_o, rays_d, binaries, aabbs, cone_angle=cone_angle
40 | )
41 | ray_indices = samples.ray_indices
42 | t_starts = intervals.vals[intervals.is_left]
43 | t_ends = intervals.vals[intervals.is_right]
44 |
45 | # Traverse a single fvdb grid
46 | grid = fvdb.GridBatch(device=device)
47 | voxel_sizes = (aabbs[:, 3:] - aabbs[:, :3]) / reso
48 | origins = aabbs[:, :3] + voxel_sizes / 2
49 | ijks = torch.stack(torch.where(binaries[0]), dim=-1)
50 | grid.set_from_ijk(ijks, voxel_sizes=voxel_sizes, origins=origins)
51 |
52 | t_starts, t_ends, ray_indices = traverse_vdbs(
53 | rays_o, rays_d, grid, cone_angle=cone_angle
54 | )
55 |
56 | assert torch.allclose(t_starts, t_starts)
57 | assert torch.allclose(t_ends, t_ends)
58 | assert torch.all(ray_indices == ray_indices)
59 |
60 |
61 | @pytest.mark.skipif(not torch.cuda.is_available, reason="No CUDA device")
62 | def test_base_vdb_estimator():
63 | try:
64 | import fvdb
65 | except ImportError:
66 | warnings.warn("fVDB is not installed. Skip the test.")
67 | return
68 |
69 | import tqdm
70 |
71 | from nerfacc.estimators.occ_grid import OccGridEstimator
72 | from nerfacc.estimators.vdb import VDBEstimator
73 | from nerfacc.grid import _query
74 |
75 | torch.manual_seed(42)
76 |
77 | profile = True
78 | n_aabbs = 1
79 | reso = 32
80 | base_aabb = torch.tensor([-1.0, -1.0, -1.0, 1.0, 1.0, 1.0], device=device)
81 | occ_thre = 0.1
82 |
83 | # Create the target occ grid
84 | occs = torch.rand((n_aabbs, reso, reso, reso), device=device)
85 |
86 | def occ_eval_fn(x):
87 | return _query(x, occs, base_aabb)[0]
88 |
89 | # Create the OccGridEstimator
90 | estimator1 = OccGridEstimator(base_aabb, reso, n_aabbs).to(device)
91 | for _ in tqdm.trange(1000) if profile else range(1):
92 | estimator1._update(step=0, occ_eval_fn=occ_eval_fn, occ_thre=occ_thre)
93 | occs1 = estimator1.occs.reshape_as(occs)
94 | err = (occs - occs1).abs().max()
95 | if not profile:
96 | assert err == 0
97 |
98 | # Create the OccGridEstimator
99 | voxel_sizes = (base_aabb[3:] - base_aabb[:3]) / reso
100 | origins = base_aabb[:3] + voxel_sizes / 2
101 | grid = fvdb.sparse_grid_from_dense(
102 | 1, (reso, reso, reso), voxel_sizes=voxel_sizes, origins=origins
103 | )
104 | estimator2 = VDBEstimator(grid).to(device)
105 | for _ in tqdm.trange(1000) if profile else range(1):
106 | estimator2._update(step=0, occ_eval_fn=occ_eval_fn, occ_thre=occ_thre)
107 |
108 | ijks = estimator1.grid_coords
109 | index = estimator2.grid.ijk_to_index(ijks).jdata
110 | occs2 = estimator2.occs[index].reshape_as(occs)
111 | err = (occs - occs2).abs().max()
112 | if not profile:
113 | assert err == 0
114 |
115 | # ray marching in sparse grid
116 | n_rays = 100
117 | n_aabbs = 1
118 | reso = 32
119 | cone_angle = 1e-3
120 |
121 | rays_o = torch.randn((n_rays, 3), device=device)
122 | rays_d = torch.randn((n_rays, 3), device=device)
123 | rays_d = rays_d / rays_d.norm(dim=-1, keepdim=True)
124 |
125 | for _ in tqdm.trange(1000) if profile else range(1):
126 | ray_indices1, t_starts1, t_ends1 = estimator1.sampling(
127 | rays_o, rays_d, render_step_size=0.1, cone_angle=cone_angle
128 | )
129 | for _ in tqdm.trange(1000) if profile else range(1):
130 | ray_indices2, t_starts2, t_ends2 = estimator2.sampling(
131 | rays_o, rays_d, render_step_size=0.1, cone_angle=cone_angle
132 | )
133 | assert torch.all(ray_indices1 == ray_indices2)
134 | assert torch.allclose(t_starts1, t_starts2)
135 | assert torch.allclose(t_ends1, t_ends2)
136 |
137 |
138 | if __name__ == "__main__":
139 | test_traverse_vdbs()
140 | test_base_vdb_estimator()
141 |
--------------------------------------------------------------------------------