├── .github
    └── workflows
    │   └── autobuild.yml
├── .gitignore
├── CMakeLists.txt
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── img
    ├── output_examples.jpg
    └── viewvox.JPG
├── msvc
    └── vs2022
    │   ├── cuda_voxelizer.sln
    │   ├── cuda_voxelizer.vcxproj
    │   ├── cuda_voxelizer.vcxproj.filters
    │   └── custom_includes.props
├── src
    ├── cpu_voxelizer.cpp
    ├── cpu_voxelizer.h
    ├── libs
    │   ├── cuda
    │   │   ├── helper_cuda.h
    │   │   ├── helper_math.h
    │   │   └── helper_string.h
    │   └── magicavoxel_file_writer
    │   │   ├── LICENSE
    │   │   ├── VoxWriter.cpp
    │   │   └── VoxWriter.h
    ├── main.cpp
    ├── morton_LUTs.h
    ├── timer.h
    ├── todo.txt
    ├── util.h
    ├── util_cuda.cpp
    ├── util_cuda.h
    ├── util_io.cpp
    ├── util_io.h
    ├── voxelize.cu
    ├── voxelize.cuh
    └── voxelize_solid.cu
└── test_models
    ├── bunny.OBJ
    └── credit.txt


/.github/workflows/autobuild.yml:
--------------------------------------------------------------------------------
  1 | name: build
  2 | 
  3 | on: 
  4 |   push:
  5 |     branches:
  6 |       - main
  7 |       - dev
  8 |   pull_request:
  9 |     branches:
 10 |       - main
 11 |       - dev
 12 | jobs:
 13 |   linux-build:
 14 |     runs-on: ubuntu-20.04
 15 |     container: nvidia/cuda:12.2.0-devel-ubuntu20.04
 16 |     
 17 |     env: 
 18 |       CUDAARCHS: '60'
 19 |       TRIMESH_VERSION: '2022.03.04'
 20 |       CMAKE_VERSION: '3.20.4'
 21 | 
 22 |     steps:
 23 |     - name: Checkout
 24 |       uses: actions/checkout@v2 
 25 | 
 26 |     - name: Install OpenMP and other libraries
 27 |       run: | 
 28 |         apt update
 29 |         apt install -y --no-install-recommends apt-utils
 30 |         apt install -y libgomp1 git mesa-common-dev libglu1-mesa-dev libxi-dev wget ninja-build
 31 |     
 32 |     - name: Install CMake
 33 |       run: |
 34 |         wget -q -O ./cmake-install.sh https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}-Linux-x86_64.sh 
 35 |         chmod u+x ./cmake-install.sh
 36 |         mkdir "$HOME"/cmake
 37 |         ./cmake-install.sh --skip-license --prefix="$HOME"/cmake
 38 |         rm ./cmake-install.sh
 39 |     
 40 |     - name: Build Trimesh2
 41 |       run: |
 42 |         git clone --single-branch --depth 1 -b ${{ env.TRIMESH_VERSION }} https://github.com/Forceflow/trimesh2.git ../trimesh2
 43 |         cd ../trimesh2
 44 |         make all -j $(nproc)
 45 |         make clean
 46 | 
 47 |     - name: Configure cuda_voxelizer
 48 |       run: |
 49 |         PATH=$PATH:"$HOME"/cmake/bin
 50 |         cmake -GNinja \
 51 |         -DTrimesh2_INCLUDE_DIR="../trimesh2/include" \
 52 |         -DTrimesh2_LINK_DIR="../trimesh2/lib.Linux64" \
 53 |         -S . -B ./build
 54 | 
 55 |     - name: Build cuda_voxelizer
 56 |       run: |
 57 |         PATH=$PATH:"$HOME"/cmake/bin
 58 |         cmake --build ./build --parallel $(nproc)
 59 | 
 60 |     - name: Test
 61 |       run: ./build/cuda_voxelizer -f ./test_models/bunny.OBJ -s 64 -cpu
 62 |   
 63 |   windows-build:
 64 |     runs-on: windows-2019
 65 |     env:
 66 |       CUDA_MAJOR_VERSION: '12.2'
 67 |       CUDA_PATCH_VERSION: '0'
 68 |       TRIMESH_VERSION: '2022.03.04'
 69 |       CUDAARCHS: '60'
 70 |     
 71 |     steps:
 72 |     - name: Checkout
 73 |       uses: actions/checkout@v2
 74 | 
 75 |     - name: Cache Trimesh2
 76 |       id: trimesh2-cache
 77 |       uses: actions/cache@v2
 78 |       with:
 79 |         path: ${{ runner.workspace }}\trimesh2-build
 80 |         key: ${{ runner.os }}-build-trimesh2-cache-1
 81 | 
 82 |     
 83 |     # Older version then 11.4.0 of CUDA Toolkit does not have thrust option for installation in silent mode 
 84 |     - uses: Jimver/cuda-toolkit@v0.2.11
 85 |       id: cuda-toolkit
 86 |       with:
 87 |         method: 'network'
 88 |         cuda: '${{ env.CUDA_MAJOR_VERSION }}.${{ env.CUDA_PATCH_VERSION }}'
 89 |         sub-packages: '["nvcc", "cudart", "visual_studio_integration"]'
 90 | 
 91 |     - name: Build Trimesh2
 92 |       if: steps.trimesh2-cache.outputs.cache-hit != 'true'
 93 |       run: |
 94 |         Install-Module VSSetup -Scope CurrentUser -Force
 95 |         git clone -b ${{ env.TRIMESH_VERSION }} --single-branch --depth 1 https://github.com/Forceflow/trimesh2.git ..\trimesh2
 96 |         cd ..\trimesh2
 97 |         & (Join-Path (Get-VSSetupInstance).InstallationPath -ChildPath MSBuild\Current\Bin\msbuild.exe) .\msvc\vs2019\trimesh2.sln -nologo -m:2 /t:libsrc /p:Configuration=Release /p:Platform=x64
 98 |         mkdir ..\trimesh2-build
 99 |         Move-Item .\include ..\trimesh2-build
100 |         Move-Item .\lib.Win64.vs142 ..\trimesh2-build
101 |         cd -
102 |         rm -Recurse -Force ..\trimesh2
103 | 
104 |     - name: Configure cuda_voxelizer
105 |       run: |
106 |         $trimeshDir = "..\trimesh2-build"
107 |         cmake -A x64 `
108 |         -DCMAKE_TOOLCHAIN_FILE:FILEPATH="C:\vcpkg\scripts\buildsystems\vcpkg.cmake" `
109 |         -DTrimesh2_INCLUDE_DIR:PATH="$trimeshDir\include" `
110 |         -DTrimesh2_LINK_DIR:PATH="$trimeshDir\lib.Win64.vs142" `
111 |         -DCMAKE_BUILD_TYPE=Release `
112 |         -S . -B .\build
113 |     
114 |     - name: Build cuda_voxelizer
115 |       run: cmake --build .\build --parallel 2 --target ALL_BUILD --config Release
116 |     
117 |     - name: Test cuda_voxelizer
118 |       run: .\build\Release\cuda_voxelizer.exe -f .\test_models\bunny.OBJ -s 64 -cpu
119 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | 
  4 | # User-specific files
  5 | *.suo
  6 | *.user
  7 | *.sln.docstates
  8 | 
  9 | # Thirdparty libraries
 10 | thirdparty
 11 | 
 12 | # Build results
 13 | [Dd]ebug/
 14 | [Dd]ebugPublic/
 15 | [Rr]elease/
 16 | x64/
 17 | build/
 18 | bld/
 19 | [Bb]in/
 20 | [Oo]bj/
 21 | 
 22 | # MSTest test Results
 23 | [Tt]est[Rr]esult*/
 24 | [Bb]uild[Ll]og.*
 25 | 
 26 | #NUNIT
 27 | *.VisualState.xml
 28 | TestResult.xml
 29 | 
 30 | # Build Results of an ATL Project
 31 | [Dd]ebugPS/
 32 | [Rr]eleasePS/
 33 | dlldata.c
 34 | 
 35 | *_i.c
 36 | *_p.c
 37 | *_i.h
 38 | *.ilk
 39 | *.meta
 40 | *.pch
 41 | *.pdb
 42 | *.pgc
 43 | *.pgd
 44 | *.rsp
 45 | *.sbr
 46 | *.tlb
 47 | *.tli
 48 | *.tlh
 49 | *.tmp
 50 | *.tmp_proj
 51 | *.log
 52 | *.vspscc
 53 | *.vssscc
 54 | .builds
 55 | *.pidb
 56 | *.svclog
 57 | *.scc
 58 | 
 59 | # Chutzpah Test files
 60 | _Chutzpah*
 61 | 
 62 | # Visual C++ cache files
 63 | ipch/
 64 | *.aps
 65 | *.ncb
 66 | *.opensdf
 67 | *.sdf
 68 | *.cachefile
 69 | 
 70 | # Visual Studio profiler
 71 | *.psess
 72 | *.vsp
 73 | *.vspx
 74 | 
 75 | # TFS 2012 Local Workspace
 76 | $tf/
 77 | 
 78 | # Guidance Automation Toolkit
 79 | *.gpState
 80 | 
 81 | # ReSharper is a .NET coding add-in
 82 | _ReSharper*/
 83 | *.[Rr]e[Ss]harper
 84 | *.DotSettings.user
 85 | 
 86 | # JustCode is a .NET coding addin-in
 87 | .JustCode
 88 | 
 89 | # TeamCity is a build add-in
 90 | _TeamCity*
 91 | 
 92 | # DotCover is a Code Coverage Tool
 93 | *.dotCover
 94 | 
 95 | # NCrunch
 96 | *.ncrunch*
 97 | _NCrunch_*
 98 | .*crunch*.local.xml
 99 | 
100 | # MightyMoose
101 | *.mm.*
102 | AutoTest.Net/
103 | 
104 | # Web workbench (sass)
105 | .sass-cache/
106 | 
107 | # Installshield output folder
108 | [Ee]xpress/
109 | 
110 | # DocProject is a documentation generator add-in
111 | DocProject/buildhelp/
112 | DocProject/Help/*.HxT
113 | DocProject/Help/*.HxC
114 | DocProject/Help/*.hhc
115 | DocProject/Help/*.hhk
116 | DocProject/Help/*.hhp
117 | DocProject/Help/Html2
118 | DocProject/Help/html
119 | 
120 | # Click-Once directory
121 | publish/
122 | 
123 | # Publish Web Output
124 | *.[Pp]ublish.xml
125 | *.azurePubxml
126 | 
127 | # NuGet Packages Directory
128 | packages/
129 | ## TODO: If the tool you use requires repositories.config uncomment the next line
130 | #!packages/repositories.config
131 | 
132 | # Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets
133 | # This line needs to be after the ignore of the build folder (and the packages folder if the line above has been uncommented)
134 | !packages/build/
135 | 
136 | # Windows Azure Build Output
137 | csx/
138 | *.build.csdef
139 | 
140 | # Windows Store app package directory
141 | AppPackages/
142 | 
143 | # Others
144 | sql/
145 | *.Cache
146 | ClientBin/
147 | [Ss]tyle[Cc]op.*
148 | ~$*
149 | *~
150 | *.dbmdl
151 | *.dbproj.schemaview
152 | *.pfx
153 | *.publishsettings
154 | node_modules/
155 | 
156 | # RIA/Silverlight projects
157 | Generated_Code/
158 | 
159 | # Backup & report files from converting an old project file to a newer
160 | # Visual Studio version. Backup files are not needed, because we have git ;-)
161 | _UpgradeReport_Files/
162 | Backup*/
163 | UpgradeLog*.XML
164 | UpgradeLog*.htm
165 | 
166 | # SQL Server files
167 | *.mdf
168 | *.ldf
169 | 
170 | # Business Intelligence projects
171 | *.rdl.data
172 | *.bim.layout
173 | *.bim_*.settings
174 | 
175 | # Microsoft Fakes
176 | FakesAssemblies/
177 | *.opendb
178 | *.db
179 | *.deps
180 | 
181 | #VS folders
182 | .vs/
183 | 
184 | #Generated voxel models
185 | *.binvox
186 | *.bin


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.20 FATAL_ERROR)
 2 | 
 3 | PROJECT(CudaVoxelize LANGUAGES CXX CUDA)
 4 | 
 5 | FIND_PACKAGE(OpenMP REQUIRED)
 6 | FIND_PACKAGE(CUDAToolkit REQUIRED)
 7 | 
 8 | SET(CUDA_VOXELIZER_EXECUTABLE cuda_voxelizer)
 9 | 
10 | SET(Trimesh2_INCLUDE_DIR CACHE PATH "Path to Trimesh2 includes")
11 | 
12 | IF(NOT Trimesh2_INCLUDE_DIR)
13 |   MESSAGE(FATAL_ERROR "You need to set variable Trimesh2_INCLUDE_DIR")
14 | ENDIF()
15 | 
16 | FIND_FILE(Trimesh2_TriMesh_h TriMesh.h ${Trimesh2_INCLUDE_DIR})
17 | 
18 | IF(NOT Trimesh2_TriMesh_h)
19 | 	message(FATAL_ERROR "Can't find TriMesh.h in ${Trimesh2_INCLUDE_DIR}")
20 | ENDIF()
21 | MARK_AS_ADVANCED(Trimesh2_TriMesh_h)
22 | 
23 | SET(Trimesh2_LINK_DIR CACHE PATH "Path to Trimesh2 library dir.")
24 | 
25 | IF(NOT Trimesh2_LINK_DIR)
26 |   MESSAGE(FATAL_ERROR "You need to set variable Trimesh2_LINK_DIR")
27 | ENDIF()
28 | 
29 | IF(NOT EXISTS "${Trimesh2_LINK_DIR}")
30 |   MESSAGE(FATAL_ERROR "Trimesh2 library dir does not exist")
31 | ENDIF()
32 | 
33 | FIND_LIBRARY(Trimesh2_LIBRARY trimesh ${Trimesh2_LINK_DIR})
34 | 
35 | IF(NOT Trimesh2_LIBRARY)
36 | 	message(SEND_ERROR "Can't find libtrimesh.a in ${Trimesh2_LINK_DIR}")
37 | ENDIF()
38 | MARK_AS_ADVANCED(Trimesh2_LIBRARY)
39 | 
40 | MESSAGE(STATUS "Found Trimesh2 include: ${Trimesh2_TriMesh_h}")
41 | MESSAGE(STATUS "Found Trimesh2 lib: ${Trimesh2_LIBRARY}")
42 | 
43 | SET(CUDA_VOXELIZER_SRCS
44 |   ./src/main.cpp
45 |   ./src/util_cuda.cpp
46 |   ./src/util_io.cpp
47 |   ./src/cpu_voxelizer.cpp
48 |   ./src/libs/magicavoxel_file_writer/VoxWriter.cpp
49 | )
50 | SET(CUDA_VOXELIZER_SRCS_CU
51 |   ./src/voxelize.cu
52 |   ./src/voxelize_solid.cu
53 | )
54 | 
55 | ADD_EXECUTABLE(
56 |   ${CUDA_VOXELIZER_EXECUTABLE}
57 |   ${CUDA_VOXELIZER_SRCS}
58 |   ${CUDA_VOXELIZER_SRCS_CU})
59 | 
60 | TARGET_COMPILE_FEATURES(${CUDA_VOXELIZER_EXECUTABLE} PRIVATE cxx_std_17)
61 | TARGET_INCLUDE_DIRECTORIES(${CUDA_VOXELIZER_EXECUTABLE} PRIVATE ${Trimesh2_INCLUDE_DIR})
62 | TARGET_LINK_LIBRARIES(${CUDA_VOXELIZER_EXECUTABLE} PRIVATE ${Trimesh2_LIBRARY} PRIVATE OpenMP::OpenMP_CXX PRIVATE CUDA::cudart)
63 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | * Please make PR's to the `dev` branch
2 | * Update the CMake and MSVC projects to include any extra files you add
3 | * Avoid pulling in extra dependencies (but I'll allow if needed)
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Jeroen Baert
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![Build Status](https://github.com/Forceflow/cuda_voxelizer/actions/workflows/autobuild.yml/badge.svg) ![license](https://img.shields.io/github/license/Forceflow/cuda_voxelizer.svg)<br>
  2 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/Z8Z7GFNW3) 
  3 | 
  4 | # cuda_voxelizer v0.6
  5 | 
  6 | A command-line tool to convert polygon meshes to (annotated) voxel grids.
  7 |  * Supported input formats: .ply, .off, .obj, .3DS, .SM and RAY
  8 |  * Supported output formats: .vox, .binvox, .obj cubes and point cloud, morton ordered grid
  9 |  * Requires a CUDA-compatible video card. Compute Capability 2.0 or higher (Nvidia Fermi or better).
 10 |    * Since v0.4.4, the voxelizer reverts to a (slower) CPU voxelization method when no CUDA device is found
 11 |    
 12 | **Important:** _In v0.6 I replaced all GLM math types with builtin CUDA types, removing an external dependency. This is a big change. I've tested the release as well as I can, but if you encounter any weirdness, it's advised to check if you can reproduce the problem with an older version. Thanks!_
 13 | 
 14 | ## Usage
 15 | Program options:
 16 |  * `-f <path to model file>`: **(required)** A path to a polygon-based 3D model file. 
 17 |  * `-s <voxel grid length>`: **(default: 256)** The length of the cubical voxel grid. The process will construct the tightest possible cubical bounding box around the input model.
 18 |  * `-o <output format>`: The output format for voxelized models, default: *binvox*. Output files are saved in the same folder as the input file, in the format `<original file name>_<grid_size>.extension`.
 19 |    * `vox`: **(default)** A [vox](https://github.com/ephtracy/voxel-model/blob/master/MagicaVoxel-file-format-vox.txt) file, which is the native format of and can be viewed with the excellent [MagicaVoxel](https://ephtracy.github.io/).
 20 |    * `binvox`: A [binvox](http://www.patrickmin.com/binvox/binvox.html) file. Can be viewed using [viewvox](http://www.patrickmin.com/viewvox/).
 21 |    * `obj`: A mesh containing actual cubes (made up of triangle faces) for each voxel.
 22 |    * `obj_points`: A mesh containing a point cloud, with a vertex for each voxel. Can be viewed using any compatible viewer that can just display vertices, like [Blender](https://www.blender.org/) or [Meshlab](https://www.meshlab.net/).
 23 |    * `morton`: a binary file containing a Morton-ordered grid. This is an internal format I use for other tools.
 24 |  * `-cpu`: Force multi-threaded voxelization on the CPU instead of GPU. Can be used when a CUDA device is not detected/compatible, or for very small models where GPU call overhead is not worth it.
 25 |  * `-solid` : (Experimental) Use solid voxelization instead of voxelizing the mesh faces. Needs a watertight input mesh.
 26 | 
 27 | ## Examples
 28 | `cuda_voxelizer -f bunny.ply -s 256` generates a 256 x 256 x 256 vox-based voxel model which will be stored in `bunny_256.vox`. 
 29 | 
 30 | `cuda_voxelizer -f torus.ply -s 64 -o obj -solid` generates a solid (filled) 64 x 64 x 64 .obj voxel model which will be stored in `torus_64.obj`. 
 31 | 
 32 | ![output_examples](https://raw.githubusercontent.com/Forceflow/cuda_voxelizer/main/img/output_examples.jpg)
 33 | 
 34 | ## Building
 35 | The build process is aimed at 64-bit executables. It's possible to build for 32-bit as well, but I'm not actively testing/supporting this.
 36 | You can build using CMake or using the provided Visual Studio project. Since 2022, cuda_voxelizer builds via [Github Actions](https://github.com/Forceflow/cuda_voxelizer/actions) as well, check the .[yml config file](https://github.com/Forceflow/cuda_voxelizer/blob/main/.github/workflows/autobuild.yml) for more info.
 37 | 
 38 | ### Dependencies
 39 | The project has the following build dependencies:
 40 |  * [Nvidia Cuda 8.0 Toolkit (or higher)](https://developer.nvidia.com/cuda-toolkit) for CUDA
 41 |  * [Trimesh2](https://github.com/Forceflow/trimesh2) for model importing. Latest version recommended.
 42 |  * [OpenMP](https://www.openmp.org/) for multi-threading.
 43 | 
 44 | ### Build using CMake (Windows, Linux)
 45 | After installing dependencies, do `mkdir build` and `cd build`, followed by:
 46 | 
 47 | For Windows with Visual Studio:
 48 | ```powershell
 49 | $env:CUDAARCHS="your_cuda_compute_capability"
 50 | cmake -A x64 -DTrimesh2_INCLUDE_DIR:PATH="path_to_trimesh2_include" -DTrimesh2_LINK_DIR:PATH="path_to_trimesh2_library_dir" .. 
 51 | ```
 52 | 
 53 | For Linux:
 54 | ```bash
 55 | CUDAARCHS="your_cuda_compute_capability" cmake -DTrimesh2_INCLUDE_DIR:PATH="path_to_trimesh2_include" -DTrimesh2_LINK_DIR:PATH="path_to_trimesh2_library_dir" -DCUDA_ARCH:STRING="your_cuda_compute_capability" .. 
 56 | ```
 57 | Where `your_cuda_compute_capability` is a string specifying your CUDA architecture ([more info here](https://docs.nvidia.com/cuda/archive/10.2/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation-gpu-architecture) and [here CMake](https://cmake.org/cmake/help/v3.20/envvar/CUDAARCHS.html#envvar:CUDAARCHS)). For example: `CUDAARCHS="50;61"` or `CUDAARCHS="60"`.
 58 | 
 59 | Finally, run
 60 | ```
 61 | cmake --build . --parallel number_of_cores
 62 | ```
 63 | 
 64 | ### Build using Visual Studio project (Windows)
 65 | A project solution for Visual Studio 2022 is provided in the `msvc` folder. It is configured for CUDA 12.1, but you can edit the project file to make it work with other CUDA versions. You can edit the `custom_includes.props` file to configure the library locations, and specify a place where the resulting binaries should be placed.
 66 | 
 67 | ```
 68 |     <TRIMESH_DIR>C:\libs\trimesh2\</TRIMESH_DIR>
 69 |     <GLM_DIR>C:\libs\glm\</GLM_DIR>
 70 |     <BINARY_OUTPUT_DIR>D:\dev\Binaries\</BINARY_OUTPUT_DIR>
 71 | ```
 72 | ## Details
 73 | `cuda_voxelizer` implements an optimized version of the method described in M. Schwarz and HP Seidel's 2010 paper [*Fast Parallel Surface and Solid Voxelization on GPU's*](http://research.michael-schwarz.com/publ/2010/vox/). The morton-encoded table was based on my 2013 HPG paper [*Out-Of-Core construction of Sparse Voxel Octrees*](http://graphics.cs.kuleuven.be/publications/BLD14OCCSVO/)  and the work in [*libmorton*](https://github.com/Forceflow/libmorton).
 74 | 
 75 | `cuda_voxelizer` is built with a focus on performance. Usage of the routine as a per-frame voxelization step for real-time applications is viable. These are the voxelization timings for the [Stanford Bunny Model](https://graphics.stanford.edu/data/3Dscanrep/) (1,55 MB, 70k triangles). 
 76 |  * This is the voxelization time for a non-solid voxelization. No I/O - from disk or to GPU - is included in this timing.
 77 |  * CPU voxelization time is heavily dependent on how many cores your CPU has - OpenMP allocates 1 thread per core.
 78 | 
 79 | | Grid size | GPU (GTX 1050 TI) | CPU (Intel i7 8750H, 12 threads) |
 80 | |-----------|--------|--------|
 81 | | 64³     | 0.2 ms | 39.8 ms |
 82 | | 128³     | 0.3 ms | 63.6 ms |
 83 | | 256³     | 0.6 ms | 118.2 ms |
 84 | | 512³     | 1.8 ms | 308.8 ms |
 85 | | 1024³    | 8.6 ms | 1047.5 ms |
 86 | | 2048³    | 44.6 ms | 4147.4 ms |
 87 | 
 88 | ## Thanks
 89 |  * The [MagicaVoxel](https://ephtracy.github.io/) I/O was implemented using [MagicaVoxel File Writer](https://github.com/aiekick/MagicaVoxel_File_Writer) by [aiekick](https://github.com/aiekick).
 90 | * Thanks to [conceptclear](https://github.com/conceptclear) for implementing solid voxelization.
 91 | 
 92 | ## See also
 93 | 
 94 |  * The [.binvox file format](https://www.patrickmin.com/binvox/binvox.html) was created by Michael Kazhdan. 
 95 |    * [Patrick Min](https://www.patrickmin.com/binvox/) wrote some interesting tools to work with it:
 96 |      * [viewvox](https://www.patrickmin.com/viewvox/): Visualization of voxel grids (a copy of this tool is included in cuda_voxelizer releases)
 97 |      * [thinvox](https://www.patrickmin.com/thinvox/): Thinning of voxel grids
 98 |    * [binvox-rw-py](https://github.com/dimatura/binvox-rw-py) is a Python module to interact with .binvox files
 99 |  * [Zarbuz](https://github.com/zarbuz)'s [FileToVox](https://github.com/Zarbuz/FileToVox) looks interesting as well
100 |  * If you want a good customizable CPU-based voxelizer, I can recommend [VoxSurf](https://github.com/sylefeb/VoxSurf).
101 |  * Another hackable voxel viewer is Sean Barrett's excellent [stb_voxel_render.h](https://github.com/nothings/stb/blob/master/stb_voxel_render.h).
102 |  * Nvidia also has a voxel library called [GVDB](https://developer.nvidia.com/gvdb), that does a lot more than just voxelizing.
103 | 
104 | ## Todo / Possible future work
105 | This is on my list of "nice things to add".
106 | 
107 |  * Better output filename control
108 |  * Noncubic grid support
109 |  * Memory limits test
110 |  * Implement partitioning for larger models
111 |  * Do a pre-pass to categorize triangles
112 |  * Implement capture of normals / color / texture data
113 |  
114 | ## Citation
115 | If you use cuda_voxelizer in your published paper or other software, please reference it, for example as follows:
116 | <pre>
117 | @Misc{cudavoxelizer17,
118 | author = "Jeroen Baert",
119 | title = "Cuda Voxelizer: A GPU-accelerated Mesh Voxelizer",
120 | howpublished = "\url{https://github.com/Forceflow/cuda_voxelizer}",
121 | year = "2017"}
122 | </pre>
123 | If you end up using cuda_voxelizer in something cool, drop me an e-mail: **mail (at) jeroen-baert.be**
124 | 
125 | ## Donate
126 | cuda_voxelizer is developed in my free time. If you want to support the project, you can do so through:
127 | * [Kofi](https://ko-fi.com/jbaert)
128 | * BTC: 3GX3b7BZK2nhsneBG8eTqEchgCQ8FDfwZq 
129 | * ETH: 0x7C9e97D2bBC2dFDd93EF56C77f626e802BA56860
130 | 


--------------------------------------------------------------------------------
/img/output_examples.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Forceflow/cuda_voxelizer/ff93fe65a9144c1dc9f11d22e786ad698387767b/img/output_examples.jpg


--------------------------------------------------------------------------------
/img/viewvox.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Forceflow/cuda_voxelizer/ff93fe65a9144c1dc9f11d22e786ad698387767b/img/viewvox.JPG


--------------------------------------------------------------------------------
/msvc/vs2022/cuda_voxelizer.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 15
 4 | VisualStudioVersion = 15.0.28307.271
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuda_voxelizer", "cuda_voxelizer.vcxproj", "{D4330816-735D-4CC7-AE2A-04A0E998099E}"
 7 | EndProject
 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{C52A2702-E60C-4590-9C55-C8C66CCA5BAB}"
 9 | EndProject
10 | Global
11 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | 		Debug|x64 = Debug|x64
13 | 		Release|x64 = Release|x64
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{D4330816-735D-4CC7-AE2A-04A0E998099E}.Debug|x64.ActiveCfg = Debug|x64
17 | 		{D4330816-735D-4CC7-AE2A-04A0E998099E}.Debug|x64.Build.0 = Debug|x64
18 | 		{D4330816-735D-4CC7-AE2A-04A0E998099E}.Release|x64.ActiveCfg = Release|x64
19 | 		{D4330816-735D-4CC7-AE2A-04A0E998099E}.Release|x64.Build.0 = Release|x64
20 | 	EndGlobalSection
21 | 	GlobalSection(SolutionProperties) = preSolution
22 | 		HideSolutionNode = FALSE
23 | 	EndGlobalSection
24 | 	GlobalSection(ExtensibilityGlobals) = postSolution
25 | 		SolutionGuid = {D7628502-09E5-4B15-AB62-365471E954D4}
26 | 	EndGlobalSection
27 | EndGlobal
28 | 


--------------------------------------------------------------------------------
/msvc/vs2022/cuda_voxelizer.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|x64">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>x64</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|x64">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |   </ItemGroup>
 13 |   <PropertyGroup Label="Globals">
 14 |     <ProjectGuid>{D4330816-735D-4CC7-AE2A-04A0E998099E}</ProjectGuid>
 15 |     <RootNamespace>cuda_voxelizer</RootNamespace>
 16 |     <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
 17 |   </PropertyGroup>
 18 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 19 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 20 |     <ConfigurationType>Application</ConfigurationType>
 21 |     <UseDebugLibraries>true</UseDebugLibraries>
 22 |     <CharacterSet>MultiByte</CharacterSet>
 23 |     <PlatformToolset>v143</PlatformToolset>
 24 |   </PropertyGroup>
 25 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 26 |     <ConfigurationType>Application</ConfigurationType>
 27 |     <UseDebugLibraries>false</UseDebugLibraries>
 28 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 29 |     <CharacterSet>MultiByte</CharacterSet>
 30 |     <PlatformToolset>v143</PlatformToolset>
 31 |   </PropertyGroup>
 32 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 33 |   <ImportGroup Label="ExtensionSettings">
 34 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 12.2.props" />
 35 |   </ImportGroup>
 36 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 37 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 38 |     <Import Project="custom_includes.props" />
 39 |   </ImportGroup>
 40 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 41 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 42 |     <Import Project="custom_includes.props" />
 43 |   </ImportGroup>
 44 |   <PropertyGroup Label="UserMacros" />
 45 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 46 |     <LinkIncremental>true</LinkIncremental>
 47 |     <IncludePath>C:\libs\trimesh2\include;C:\libs\glm;$(IncludePath)</IncludePath>
 48 |     <LibraryPath>C:\libs\trimesh2\lib.Win64;$(LibraryPath)</LibraryPath>
 49 |     <CustomBuildAfterTargets>xcopy /y "$(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName).exe" "$(BINARY_OUTPUT_DIR)$(ProjectName).exe"</CustomBuildAfterTargets>
 50 |     <TargetName>$(ProjectName)_debug</TargetName>
 51 |   </PropertyGroup>
 52 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 53 |     <IncludePath>C:\libs\trimesh2\include;C:\libs\glm;$(IncludePath)</IncludePath>
 54 |     <LibraryPath>C:\libs\trimesh2\lib.Win64;$(LibraryPath)</LibraryPath>
 55 |     <CustomBuildAfterTargets>xcopy /y "$(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName).exe" "$(BINARY_OUTPUT_DIR)$(ProjectName).exe"</CustomBuildAfterTargets>
 56 |   </PropertyGroup>
 57 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 58 |     <ClCompile>
 59 |       <WarningLevel>Level3</WarningLevel>
 60 |       <Optimization>Disabled</Optimization>
 61 |       <PreprocessorDefinitions>WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 62 |       <OpenMPSupport>true</OpenMPSupport>
 63 |     </ClCompile>
 64 |     <Link>
 65 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 66 |       <SubSystem>Console</SubSystem>
 67 |       <AdditionalDependencies>trimeshd.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
 68 |     </Link>
 69 |     <PostBuildEvent>
 70 |       <Command>copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
 71 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(BINARY_OUTPUT_DIR)"
 72 | copy /y "$(SolutionDir)$(Platform)\$(Configuration)\$(TargetName).exe" "$(BINARY_OUTPUT_DIR)$(TargetName).exe"</Command>
 73 |     </PostBuildEvent>
 74 |     <CudaCompile>
 75 |       <PtxAsOptionV>false</PtxAsOptionV>
 76 |       <AdditionalOptions>--source-in-ptx %(AdditionalOptions)</AdditionalOptions>
 77 |       <CodeGeneration>compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80</CodeGeneration>
 78 |     </CudaCompile>
 79 |   </ItemDefinitionGroup>
 80 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 81 |     <ClCompile>
 82 |       <WarningLevel>Level3</WarningLevel>
 83 |       <Optimization>MaxSpeed</Optimization>
 84 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 85 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 86 |       <PreprocessorDefinitions>WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 87 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
 88 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
 89 |       <OpenMPSupport>true</OpenMPSupport>
 90 |       <WholeProgramOptimization>false</WholeProgramOptimization>
 91 |       <FloatingPointModel>Strict</FloatingPointModel>
 92 |     </ClCompile>
 93 |     <Link>
 94 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
 95 |       <OptimizeReferences>true</OptimizeReferences>
 96 |       <SubSystem>Console</SubSystem>
 97 |       <AdditionalDependencies>trimesh.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
 98 |     </Link>
 99 |     <PostBuildEvent>
100 |       <Command>copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
101 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(BINARY_OUTPUT_DIR)"
102 | copy /y "$(SolutionDir)$(Platform)\$(Configuration)\$(TargetName).exe" "$(BINARY_OUTPUT_DIR)$(TargetName).exe"</Command>
103 |     </PostBuildEvent>
104 |     <CudaCompile>
105 |       <FastMath>true</FastMath>
106 |       <TargetMachinePlatform>64</TargetMachinePlatform>
107 |       <CodeGeneration>compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80</CodeGeneration>
108 |     </CudaCompile>
109 |   </ItemDefinitionGroup>
110 |   <ItemGroup>
111 |     <CudaCompile Include="..\..\src/voxelize.cu" />
112 |     <CudaCompile Include="..\..\src\voxelize_solid.cu" />
113 |   </ItemGroup>
114 |   <ItemGroup>
115 |     <ClCompile Include="..\..\src\cpu_voxelizer.cpp" />
116 |     <ClCompile Include="..\..\src\libs\magicavoxel_file_writer\VoxWriter.cpp" />
117 |     <ClCompile Include="..\..\src\util_io.cpp" />
118 |     <ClCompile Include="..\..\src\util_cuda.cpp" />
119 |     <ClCompile Include="..\..\src\main.cpp" />
120 |   </ItemGroup>
121 |   <ItemGroup>
122 |     <ClInclude Include="..\..\src\cpu_voxelizer.h" />
123 |     <ClInclude Include="..\..\src\libs\cuda\helper_cuda.h" />
124 |     <ClInclude Include="..\..\src\libs\cuda\helper_math.h" />
125 |     <ClInclude Include="..\..\src\libs\cuda\helper_string.h" />
126 |     <ClInclude Include="..\..\src\libs\magicavoxel_file_writer\VoxWriter.h" />
127 |     <ClInclude Include="..\..\src\util_io.h" />
128 |     <ClInclude Include="..\..\src\util.h" />
129 |     <ClInclude Include="..\..\src\util_cuda.h" />
130 |     <ClInclude Include="..\..\src\morton_LUTs.h" />
131 |     <ClInclude Include="..\..\src\timer.h" />
132 |     <ClInclude Include="..\..\src\voxelize.cuh" />
133 |   </ItemGroup>
134 |   <ItemGroup>
135 |     <Text Include="..\..\src\todo.txt" />
136 |   </ItemGroup>
137 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
138 |   <ImportGroup Label="ExtensionTargets">
139 |     <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 12.2.targets" />
140 |   </ImportGroup>
141 | </Project>


--------------------------------------------------------------------------------
/msvc/vs2022/cuda_voxelizer.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <CudaCompile Include="..\..\src/voxelize.cu" />
 5 |     <CudaCompile Include="..\..\src\voxelize_solid.cu" />
 6 |   </ItemGroup>
 7 |   <ItemGroup>
 8 |     <ClCompile Include="..\..\src\main.cpp" />
 9 |     <ClCompile Include="..\..\src\util_cuda.cpp">
10 |       <Filter>util</Filter>
11 |     </ClCompile>
12 |     <ClCompile Include="..\..\src\util_io.cpp">
13 |       <Filter>util</Filter>
14 |     </ClCompile>
15 |     <ClCompile Include="..\..\src\cpu_voxelizer.cpp" />
16 |     <ClCompile Include="..\..\src\libs\magicavoxel_file_writer\VoxWriter.cpp">
17 |       <Filter>libs\magicavoxel_file_writer</Filter>
18 |     </ClCompile>
19 |   </ItemGroup>
20 |   <ItemGroup>
21 |     <ClInclude Include="..\..\src\voxelize.cuh" />
22 |     <ClInclude Include="..\..\src\morton_LUTs.h">
23 |       <Filter>util</Filter>
24 |     </ClInclude>
25 |     <ClInclude Include="..\..\src\timer.h" />
26 |     <ClInclude Include="..\..\src\util_cuda.h">
27 |       <Filter>util</Filter>
28 |     </ClInclude>
29 |     <ClInclude Include="..\..\src\util_io.h">
30 |       <Filter>util</Filter>
31 |     </ClInclude>
32 |     <ClInclude Include="..\..\src\util.h">
33 |       <Filter>util</Filter>
34 |     </ClInclude>
35 |     <ClInclude Include="..\..\src\libs\cuda\helper_cuda.h">
36 |       <Filter>libs\cuda</Filter>
37 |     </ClInclude>
38 |     <ClInclude Include="..\..\src\libs\cuda\helper_string.h">
39 |       <Filter>libs\cuda</Filter>
40 |     </ClInclude>
41 |     <ClInclude Include="..\..\src\cpu_voxelizer.h" />
42 |     <ClInclude Include="..\..\src\libs\magicavoxel_file_writer\VoxWriter.h">
43 |       <Filter>libs\magicavoxel_file_writer</Filter>
44 |     </ClInclude>
45 |     <ClInclude Include="..\..\src\libs\cuda\helper_math.h">
46 |       <Filter>libs\cuda</Filter>
47 |     </ClInclude>
48 |   </ItemGroup>
49 |   <ItemGroup>
50 |     <Text Include="..\..\src\todo.txt" />
51 |   </ItemGroup>
52 |   <ItemGroup>
53 |     <Filter Include="util">
54 |       <UniqueIdentifier>{a0232da8-2097-49f4-9412-0e4223c7ba4d}</UniqueIdentifier>
55 |     </Filter>
56 |     <Filter Include="libs">
57 |       <UniqueIdentifier>{f8ccb03d-e5cc-438b-96d6-5f9b5fb54160}</UniqueIdentifier>
58 |     </Filter>
59 |     <Filter Include="libs\cuda">
60 |       <UniqueIdentifier>{ea2a8fd1-3d76-496e-9ad4-123e8f208140}</UniqueIdentifier>
61 |     </Filter>
62 |     <Filter Include="libs\magicavoxel_file_writer">
63 |       <UniqueIdentifier>{e8008c56-21a7-481c-9d07-a2e13e61a713}</UniqueIdentifier>
64 |     </Filter>
65 |   </ItemGroup>
66 | </Project>


--------------------------------------------------------------------------------
/msvc/vs2022/custom_includes.props:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ImportGroup Label="PropertySheets" />
 4 |   <PropertyGroup Label="UserMacros">
 5 |     <TRIMESH_DIR>D:\dev\libs\trimesh2\</TRIMESH_DIR>
 6 |     <GLM_DIR>D:\dev\libs\glm\</GLM_DIR>
 7 |     <BINARY_OUTPUT_DIR>D:\dev\Binaries\</BINARY_OUTPUT_DIR>
 8 |   </PropertyGroup>
 9 |   <PropertyGroup>
10 |     <IncludePath>$(GLM_DIR);$(TRIMESH_DIR)\include\;$(IncludePath)</IncludePath>
11 |     <_PropertySheetDisplayName>custom_includes</_PropertySheetDisplayName>
12 |     <LibraryPath>$(TRIMESH_DIR)\lib.Win$(PlatformArchitecture).vs$(PlatformToolsetVersion);$(LibraryPath)</LibraryPath>
13 |   </PropertyGroup>
14 |   <PropertyGroup />
15 |   <ItemDefinitionGroup />
16 |   <ItemGroup>
17 |     <BuildMacro Include="BINARY_OUTPUT_DIR">
18 |       <Value>$(BINARY_OUTPUT_DIR)</Value>
19 |       <EnvironmentVariable>true</EnvironmentVariable>
20 |     </BuildMacro>
21 |     <BuildMacro Include="TRIMESH_DIR">
22 |       <Value>$(TRIMESH_DIR)</Value>
23 |       <EnvironmentVariable>true</EnvironmentVariable>
24 |     </BuildMacro>
25 |     <BuildMacro Include="GLM_DIR">
26 |       <Value>$(GLM_DIR)</Value>
27 |       <EnvironmentVariable>true</EnvironmentVariable>
28 |     </BuildMacro>
29 |   </ItemGroup>
30 | </Project>


--------------------------------------------------------------------------------
/src/cpu_voxelizer.cpp:
--------------------------------------------------------------------------------
  1 | #include "cpu_voxelizer.h"
  2 | #define float_error 0.000001
  3 | 
  4 | namespace cpu_voxelizer {
  5 | 
  6 | 	// Set specific bit in voxel table
  7 | 	void setBit(unsigned int* voxel_table, size_t index) {
  8 | 		size_t int_location = index / size_t(32);
  9 | 		uint32_t bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR
 10 | 		uint32_t mask = 1 << bit_pos | 0;
 11 | 		#pragma omp critical 
 12 | 		{
 13 | 			voxel_table[int_location] = (voxel_table[int_location] | mask);
 14 | 		}
 15 | 	}
 16 | 
 17 | 	// Encode morton code using LUT table
 18 | 	uint64_t mortonEncode_LUT(unsigned int x, unsigned int y, unsigned int z) {
 19 | 		uint64_t answer = 0;
 20 | 		answer = host_morton256_z[(z >> 16) & 0xFF] |
 21 | 			host_morton256_y[(y >> 16) & 0xFF] |
 22 | 			host_morton256_x[(x >> 16) & 0xFF];
 23 | 		answer = answer << 48 |
 24 | 			host_morton256_z[(z >> 8) & 0xFF] |
 25 | 			host_morton256_y[(y >> 8) & 0xFF] |
 26 | 			host_morton256_x[(x >> 8) & 0xFF];
 27 | 		answer = answer << 24 |
 28 | 			host_morton256_z[(z) & 0xFF] |
 29 | 			host_morton256_y[(y) & 0xFF] |
 30 | 			host_morton256_x[(x) & 0xFF];
 31 | 		return answer;
 32 | 	}
 33 | 
 34 | 	// Mesh voxelization method
 35 | 	void cpu_voxelize_mesh(voxinfo info, trimesh::TriMesh* themesh, unsigned int* voxel_table, bool morton_order) {
 36 | 		Timer cpu_voxelization_timer; cpu_voxelization_timer.start();
 37 | 
 38 | 		// PREPASS
 39 | 		// Move all vertices to origin (can be done in parallel)
 40 | 		trimesh::vec3 move_min = float3_to_trimesh<trimesh::vec3>(info.bbox.min);
 41 | #pragma omp parallel for
 42 | 		for (int64_t i = 0; i < (int64_t) themesh->vertices.size(); i++) {
 43 | 			if (i == 0) { printf("[Info] Using %d threads \n", omp_get_num_threads()); }
 44 | 			themesh->vertices[i] = themesh->vertices[i] - move_min;
 45 | 		}
 46 | 
 47 | #ifdef _DEBUG
 48 | 		size_t debug_n_triangles = 0;
 49 | 		size_t debug_n_voxels_tested = 0;
 50 | 		size_t debug_n_voxels_marked = 0;
 51 | #endif
 52 | 
 53 | #pragma omp parallel for
 54 | 		for (int64_t i = 0; i < (int64_t) info.n_triangles; i++) {
 55 | 			// Common variables used in the voxelization process
 56 | 			float3 delta_p = make_float3(info.unit.x, info.unit.y, info.unit.z);
 57 | 			float3 c = make_float3(0.0f, 0.0f, 0.0f); // critical point
 58 | 			int3 grid_max = make_int3(info.gridsize.x - 1, info.gridsize.y - 1, info.gridsize.z - 1); // grid max (grid runs from 0 to gridsize-1)
 59 | #ifdef _DEBUG
 60 | 			debug_n_triangles++;
 61 | #endif
 62 | 			// COMPUTE COMMON TRIANGLE PROPERTIES
 63 | 			float3 v0 = trimesh_to_float3<trimesh::point>(themesh->vertices[themesh->faces[i][0]]);
 64 | 			float3 v1 = trimesh_to_float3<trimesh::point>(themesh->vertices[themesh->faces[i][1]]);
 65 | 			float3 v2 = trimesh_to_float3<trimesh::point>(themesh->vertices[themesh->faces[i][2]]);
 66 | 
 67 | 			// Edge vectors
 68 | 			float3 e0 = v1-v0;
 69 | 			float3 e1 = v2-v1;
 70 | 			float3 e2 = v0-v2;
 71 | 			// Normal vector pointing up from the triangle
 72 | 			float3 n = normalize(cross(e0, e1));
 73 | 
 74 | 			// COMPUTE TRIANGLE BBOX IN GRID
 75 | 			// Triangle bounding box in world coordinates is min(v0,v1,v2) and max(v0,v1,v2)
 76 | 			AABox<float3> t_bbox_world(fminf(v0, fminf(v1, v2)), fmaxf(v0, fmaxf(v1, v2)));
 77 | 			// Triangle bounding box in voxel grid coordinates is the world bounding box divided by the grid unit vector
 78 | 			AABox<int3> t_bbox_grid;
 79 | 			t_bbox_grid.min = clamp(float3_to_int3(t_bbox_world.min / info.unit), make_int3(0, 0, 0), grid_max);
 80 | 			t_bbox_grid.max = clamp(float3_to_int3(t_bbox_world.max / info.unit), make_int3(0, 0, 0), grid_max);
 81 | 
 82 | 			// PREPARE PLANE TEST PROPERTIES
 83 | 			if (n.x > 0.0f) { c.x = info.unit.x; }
 84 | 			if (n.y > 0.0f) { c.y = info.unit.y; }
 85 | 			if (n.z > 0.0f) { c.z = info.unit.z; }
 86 | 			float d1 = dot(n, (c - v0));
 87 | 			float d2 = dot(n, ((delta_p - c) - v0));
 88 | 
 89 | 			// PREPARE PROJECTION TEST PROPERTIES
 90 | 			// XY plane
 91 | 			float2 n_xy_e0 = make_float2(-1.0f * e0.y, e0.x);
 92 | 			float2 n_xy_e1 = make_float2(-1.0f * e1.y, e1.x);
 93 | 			float2 n_xy_e2 = make_float2(-1.0f * e2.y, e2.x);
 94 | 			if (n.z < 0.0f) {
 95 | 				n_xy_e0 = -n_xy_e0;
 96 | 				n_xy_e1 = -n_xy_e1;
 97 | 				n_xy_e2 = -n_xy_e2;
 98 | 			}
 99 | 			float d_xy_e0 = (-1.0f * dot(n_xy_e0, make_float2(v0.x, v0.y))) + max(0.0f, info.unit.x * n_xy_e0.x) + max(0.0f, info.unit.y * n_xy_e0.y);
100 | 			float d_xy_e1 = (-1.0f * dot(n_xy_e1, make_float2(v1.x, v1.y))) + max(0.0f, info.unit.x * n_xy_e1.x) + max(0.0f, info.unit.y * n_xy_e1.y);
101 | 			float d_xy_e2 = (-1.0f * dot(n_xy_e2, make_float2(v2.x, v2.y))) + max(0.0f, info.unit.x * n_xy_e2.x) + max(0.0f, info.unit.y * n_xy_e2.y);
102 | 			// YZ plane
103 | 			float2 n_yz_e0 = make_float2(-1.0f * e0.z, e0.y);
104 | 			float2 n_yz_e1 = make_float2(-1.0f * e1.z, e1.y);
105 | 			float2 n_yz_e2 = make_float2(-1.0f * e2.z, e2.y);
106 | 			if (n.x < 0.0f) {
107 | 				n_yz_e0 = -n_yz_e0;
108 | 				n_yz_e1 = -n_yz_e1;
109 | 				n_yz_e2 = -n_yz_e2;
110 | 			}
111 | 			float d_yz_e0 = (-1.0f * dot(n_yz_e0, make_float2(v0.y, v0.z))) + max(0.0f, info.unit.y * n_yz_e0.x) + max(0.0f, info.unit.z * n_yz_e0.y);
112 | 			float d_yz_e1 = (-1.0f * dot(n_yz_e1, make_float2(v1.y, v1.z))) + max(0.0f, info.unit.y * n_yz_e1.x) + max(0.0f, info.unit.z * n_yz_e1.y);
113 | 			float d_yz_e2 = (-1.0f * dot(n_yz_e2, make_float2(v2.y, v2.z))) + max(0.0f, info.unit.y * n_yz_e2.x) + max(0.0f, info.unit.z * n_yz_e2.y);
114 | 			// ZX plane
115 | 			float2 n_zx_e0 = make_float2(-1.0f * e0.x, e0.z);
116 | 			float2 n_zx_e1 = make_float2(-1.0f * e1.x, e1.z);
117 | 			float2 n_zx_e2 = make_float2(-1.0f * e2.x, e2.z);
118 | 			if (n.y < 0.0f) {
119 | 				n_zx_e0 = -n_zx_e0;
120 | 				n_zx_e1 = -n_zx_e1;
121 | 				n_zx_e2 = -n_zx_e2;
122 | 			}
123 | 			float d_xz_e0 = (-1.0f * dot(n_zx_e0, make_float2(v0.z, v0.x))) + max(0.0f, info.unit.x * n_zx_e0.x) + max(0.0f, info.unit.z * n_zx_e0.y);
124 | 			float d_xz_e1 = (-1.0f * dot(n_zx_e1, make_float2(v1.z, v1.x))) + max(0.0f, info.unit.x * n_zx_e1.x) + max(0.0f, info.unit.z * n_zx_e1.y);
125 | 			float d_xz_e2 = (-1.0f * dot(n_zx_e2, make_float2(v2.z, v2.x))) + max(0.0f, info.unit.x * n_zx_e2.x) + max(0.0f, info.unit.z * n_zx_e2.y);
126 | 
127 | 			// test possible grid boxes for overlap
128 | 			for (int z = t_bbox_grid.min.z; z <= t_bbox_grid.max.z; z++) {
129 | 				for (int y = t_bbox_grid.min.y; y <= t_bbox_grid.max.y; y++) {
130 | 					for (int x = t_bbox_grid.min.x; x <= t_bbox_grid.max.x; x++) {
131 | 						// size_t location = x + (y*info.gridsize) + (z*info.gridsize*info.gridsize);
132 | 						// if (checkBit(voxel_table, location)){ continue; }
133 | #ifdef _DEBUG
134 | 						debug_n_voxels_tested++;
135 | #endif
136 | 
137 | 						// TRIANGLE PLANE THROUGH BOX TEST
138 | 						float3 p = make_float3(x * info.unit.x, y * info.unit.y, z * info.unit.z);
139 | 						float nDOTp = dot(n, p);
140 | 						if (((nDOTp + d1) * (nDOTp + d2)) > 0.0f) { continue; }
141 | 
142 | 						// PROJECTION TESTS
143 | 						// XY
144 | 						float2 p_xy = make_float2(p.x, p.y);
145 | 						if ((dot(n_xy_e0, p_xy) + d_xy_e0) < 0.0f) { continue; }
146 | 						if ((dot(n_xy_e1, p_xy) + d_xy_e1) < 0.0f) { continue; }
147 | 						if ((dot(n_xy_e2, p_xy) + d_xy_e2) < 0.0f) { continue; }
148 | 
149 | 						// YZ
150 | 						float2 p_yz = make_float2(p.y, p.z);
151 | 						if ((dot(n_yz_e0, p_yz) + d_yz_e0) < 0.0f) { continue; }
152 | 						if ((dot(n_yz_e1, p_yz) + d_yz_e1) < 0.0f) { continue; }
153 | 						if ((dot(n_yz_e2, p_yz) + d_yz_e2) < 0.0f) { continue; }
154 | 
155 | 						// XZ	
156 | 						float2 p_zx = make_float2(p.z, p.x);
157 | 						if ((dot(n_zx_e0, p_zx) + d_xz_e0) < 0.0f) { continue; }
158 | 						if ((dot(n_zx_e1, p_zx) + d_xz_e1) < 0.0f) { continue; }
159 | 						if ((dot(n_zx_e2, p_zx) + d_xz_e2) < 0.0f) { continue; }
160 | #ifdef _DEBUG
161 | 						debug_n_voxels_marked += 1;
162 | #endif
163 | 						if (morton_order) {
164 | 							size_t location = mortonEncode_LUT(x, y, z);
165 | 							setBit(voxel_table, location);
166 | 						}
167 | 						else {
168 | 							size_t location = static_cast<size_t>(x) + (static_cast<size_t>(y)* static_cast<size_t>(info.gridsize.y)) + (static_cast<size_t>(z)* static_cast<size_t>(info.gridsize.y)* static_cast<size_t>(info.gridsize.z));
169 | 							//std:: cout << "Voxel found at " << x << " " << y << " " << z << std::endl;
170 | 							setBit(voxel_table, location);
171 | 						}
172 | 						continue;
173 | 					}
174 | 				}
175 | 			}
176 | 		}
177 | 		cpu_voxelization_timer.stop(); std::fprintf(stdout, "[Perf] CPU voxelization time: %.1f ms \n", cpu_voxelization_timer.elapsed_time_milliseconds);
178 | #ifdef _DEBUG
179 | 		printf("[Debug] Processed %llu triangles on the CPU \n", debug_n_triangles);
180 | 		printf("[Debug] Tested %llu voxels for overlap on CPU \n", debug_n_voxels_tested);
181 | 		printf("[Debug] Marked %llu voxels as filled (includes duplicates!) on CPU \n", debug_n_voxels_marked);
182 | #endif
183 | 	}
184 | 
185 | 	// use Xor for voxels whose corresponding bits have to flipped
186 | 	void setBitXor(unsigned int* voxel_table, size_t index) {
187 | 		size_t int_location = index / size_t(32);
188 | 		unsigned int bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR
189 | 		unsigned int mask = 1 << bit_pos;
190 | 		#pragma omp critical 
191 | 		{
192 | 			voxel_table[int_location] = (voxel_table[int_location] ^ mask);
193 | 		}
194 | 	}
195 | 
196 | 	bool TopLeftEdge(float2 v0, float2 v1) {
197 | 		return ((v1.y < v0.y) || (v1.y == v0.y && v0.x > v1.x));
198 | 	}
199 | 
200 | 	//check the triangle is counterclockwise or not
201 | 	bool checkCCW(float2 v0, float2 v1, float2 v2) {
202 | 		float2 e0 = v1 - v0;
203 | 		float2 e1 = v2 - v0;
204 | 		float result = e0.x * e1.y - e1.x * e0.y;
205 | 		if (result > 0)
206 | 			return true;
207 | 		else
208 | 			return false;
209 | 	}
210 | 
211 | 	//find the x coordinate of the voxel
212 | 	float get_x_coordinate(float3 n, float3 v0, float2 point) {
213 | 		return (-(n.y * (point.x - v0.y) + n.z * (point.y - v0.z)) / n.x + v0.x);
214 | 	}
215 | 
216 | 	//check the location with point and triangle
217 | 	int check_point_triangle(float2 v0, float2 v1, float2 v2, float2 point) {
218 | 		float2 PA = point - v0;
219 | 		float2 PB = point - v1;
220 | 		float2 PC = point - v2;
221 | 
222 | 		float t1 = PA.x * PB.y - PA.y * PB.x;
223 | 		if (std::fabs(t1) < float_error && PA.x * PB.x <= 0 && PA.y * PB.y <= 0)
224 | 			return 1;
225 | 
226 | 		float t2 = PB.x * PC.y - PB.y * PC.x;
227 | 		if (std::fabs(t2) < float_error && PB.x * PC.x <= 0 && PB.y * PC.y <= 0)
228 | 			return 2;
229 | 
230 | 		float t3 = PC.x * PA.y - PC.y * PA.x;
231 | 		if (std::fabs(t3) < float_error && PC.x * PA.x <= 0 && PC.y * PA.y <= 0)
232 | 			return 3;
233 | 
234 | 		if (t1 * t2 > 0 && t1 * t3 > 0)
235 | 			return 0;
236 | 		else
237 | 			return -1;
238 | 	}
239 | 
240 | 	// Mesh voxelization method
241 | 	void cpu_voxelize_mesh_solid(voxinfo info, trimesh::TriMesh* themesh, unsigned int* voxel_table, bool morton_order) {
242 | 		Timer cpu_voxelization_timer; cpu_voxelization_timer.start();
243 | 
244 | 		// PREPASS
245 | 		// Move all vertices to origin (can be done in parallel)
246 | 		trimesh::vec3 move_min = float3_to_trimesh<trimesh::vec3>(info.bbox.min);
247 | #pragma omp parallel for
248 | 		for (int64_t i = 0; i < (int64_t) themesh->vertices.size(); i++) {
249 | 			if (i == 0) { printf("[Info] Using %d threads \n", omp_get_num_threads()); }
250 | 			themesh->vertices[i] = themesh->vertices[i] - move_min;
251 | 		}
252 | 
253 | #pragma omp parallel for
254 | 		for (int64_t i = 0; i < (int64_t) info.n_triangles; i++) {
255 | 			// Triangle vertices
256 | 			float3 v0 = trimesh_to_float3<trimesh::point>(themesh->vertices[themesh->faces[i][0]]);
257 | 			float3 v1 = trimesh_to_float3<trimesh::point>(themesh->vertices[themesh->faces[i][1]]);
258 | 			float3 v2 = trimesh_to_float3<trimesh::point>(themesh->vertices[themesh->faces[i][2]]);
259 | 			// Edge vectors
260 | 			float3 e0 = v1 - v0;
261 | 			float3 e1 = v2 - v1;
262 | 			float3 e2 = v0 - v2;
263 | 			// Normal vector pointing up from the triangle
264 | 			float3 n = normalize(cross(e0, e1));
265 | 			if (std::fabs(n.x) < float_error) {continue;}
266 | 
267 | 			// Calculate the projection of three point into yoz plane
268 | 			float2 v0_yz = make_float2(v0.y, v0.z);
269 | 			float2 v1_yz = make_float2(v1.y, v1.z);
270 | 			float2 v2_yz = make_float2(v2.y, v2.z);
271 | 
272 | 			// Set the triangle counterclockwise
273 | 			if (!checkCCW(v0_yz, v1_yz, v2_yz))
274 | 			{
275 | 				float2 v3 = v1_yz;
276 | 				v1_yz = v2_yz;
277 | 				v2_yz = v3;
278 | 			}
279 | 
280 | 			// COMPUTE TRIANGLE BBOX IN GRID
281 | 			// Triangle bounding box in world coordinates is min(v0,v1,v2) and max(v0,v1,v2)
282 | 			float2 bbox_max = fmaxf(v0_yz, fmaxf(v1_yz, v2_yz));
283 | 			float2 bbox_min = fminf(v0_yz, fminf(v1_yz, v2_yz));
284 | 
285 | 			float2 bbox_max_grid = make_float2(floor(bbox_max.x / info.unit.y - 0.5f), floor(bbox_max.y / info.unit.z - 0.5f));
286 | 			float2 bbox_min_grid = make_float2(ceil(bbox_min.x / info.unit.y - 0.5f), ceil(bbox_min.y / info.unit.z - 0.5f));
287 | 
288 | 			for (int y = static_cast<int>(bbox_min_grid.x); y <= bbox_max_grid.x; y++)
289 | 			{
290 | 				for (int z = static_cast<int>(bbox_min_grid.y); z <= bbox_max_grid.y; z++)
291 | 				{
292 | 					float2 point = make_float2((y + 0.5f) * info.unit.y, (z + 0.5f) * info.unit.z);
293 | 					int checknum = check_point_triangle(v0_yz, v1_yz, v2_yz, point);
294 | 					if ((checknum == 1 && TopLeftEdge(v0_yz, v1_yz)) || (checknum == 2 && TopLeftEdge(v1_yz, v2_yz)) || (checknum == 3 && TopLeftEdge(v2_yz, v0_yz)) || (checknum == 0))
295 | 					{
296 | 						unsigned int xmax = int(get_x_coordinate(n, v0, point) / info.unit.x - 0.5);
297 | 						for (unsigned int x = 0; x <= xmax; x++)
298 | 						{
299 | 							if (morton_order) {
300 | 								size_t location = mortonEncode_LUT(x, y, z);
301 | 								setBitXor(voxel_table, location);
302 | 							}
303 | 							else {
304 | 								size_t location = static_cast<size_t>(x) + (static_cast<size_t>(y) * static_cast<size_t>(info.gridsize.y)) + (static_cast<size_t>(z) * static_cast<size_t>(info.gridsize.y) * static_cast<size_t>(info.gridsize.z));
305 | 								setBitXor(voxel_table, location);
306 | 							}
307 | 							continue;
308 | 						}
309 | 					}
310 | 				}
311 | 			}
312 | 		}
313 | 		cpu_voxelization_timer.stop(); fprintf(stdout, "[Perf] CPU voxelization time: %.1f ms \n", cpu_voxelization_timer.elapsed_time_milliseconds);
314 | 	}
315 | }


--------------------------------------------------------------------------------
/src/cpu_voxelizer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <TriMesh.h>
 4 | #include <cstdio>
 5 | #include <cmath> 
 6 | #include <omp.h>
 7 | #include "libs/cuda/helper_math.h"
 8 | #include "util.h"
 9 | #include "timer.h"
10 | #include "morton_LUTs.h"
11 | 
12 | namespace cpu_voxelizer {
13 | 	void cpu_voxelize_mesh(voxinfo info, trimesh::TriMesh* themesh, unsigned int* voxel_table, bool morton_order);
14 | 	void cpu_voxelize_mesh_solid(voxinfo info, trimesh::TriMesh* themesh, unsigned int* voxel_table, bool morton_order);
15 | }


--------------------------------------------------------------------------------
/src/libs/cuda/helper_cuda.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 1993-2017 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 |  * with this source code for terms and conditions that govern your use of
  6 |  * this software. Any use, reproduction, disclosure, or distribution of
  7 |  * this software and related documentation outside the terms of the EULA
  8 |  * is strictly prohibited.
  9 |  *
 10 |  */
 11 | 
 12 | ////////////////////////////////////////////////////////////////////////////////
 13 | // These are CUDA Helper functions for initialization and error checking
 14 | 
 15 | #ifndef COMMON_HELPER_CUDA_H_
 16 | #define COMMON_HELPER_CUDA_H_
 17 | 
 18 | #pragma once
 19 | 
 20 | #include <stdint.h>
 21 | #include <stdio.h>
 22 | #include <stdlib.h>
 23 | #include <string.h>
 24 | 
 25 | #include "helper_string.h"
 26 | 
 27 | #ifndef EXIT_WAIVED
 28 | #define EXIT_WAIVED 2
 29 | #endif
 30 | 
 31 | // Note, it is required that your SDK sample to include the proper header
 32 | // files, please refer the CUDA examples for examples of the needed CUDA
 33 | // headers, which may change depending on which CUDA functions are used.
 34 | 
 35 | // CUDA Runtime error messages
 36 | #ifdef __DRIVER_TYPES_H__
 37 | static const char *_cudaGetErrorEnum(cudaError_t error) {
 38 |   return cudaGetErrorName(error);
 39 | }
 40 | #endif
 41 | 
 42 | #ifdef CUDA_DRIVER_API
 43 | // CUDA Driver API errors
 44 | static const char *_cudaGetErrorEnum(CUresult error) {
 45 |   static char unknown[] = "<unknown>";
 46 |   const char *ret = NULL;
 47 |   cuGetErrorName(error, &ret);
 48 |   return ret ? ret : unknown;
 49 | }
 50 | #endif
 51 | 
 52 | #ifdef CUBLAS_API_H_
 53 | // cuBLAS API errors
 54 | static const char *_cudaGetErrorEnum(cublasStatus_t error) {
 55 |   switch (error) {
 56 |     case CUBLAS_STATUS_SUCCESS:
 57 |       return "CUBLAS_STATUS_SUCCESS";
 58 | 
 59 |     case CUBLAS_STATUS_NOT_INITIALIZED:
 60 |       return "CUBLAS_STATUS_NOT_INITIALIZED";
 61 | 
 62 |     case CUBLAS_STATUS_ALLOC_FAILED:
 63 |       return "CUBLAS_STATUS_ALLOC_FAILED";
 64 | 
 65 |     case CUBLAS_STATUS_INVALID_VALUE:
 66 |       return "CUBLAS_STATUS_INVALID_VALUE";
 67 | 
 68 |     case CUBLAS_STATUS_ARCH_MISMATCH:
 69 |       return "CUBLAS_STATUS_ARCH_MISMATCH";
 70 | 
 71 |     case CUBLAS_STATUS_MAPPING_ERROR:
 72 |       return "CUBLAS_STATUS_MAPPING_ERROR";
 73 | 
 74 |     case CUBLAS_STATUS_EXECUTION_FAILED:
 75 |       return "CUBLAS_STATUS_EXECUTION_FAILED";
 76 | 
 77 |     case CUBLAS_STATUS_INTERNAL_ERROR:
 78 |       return "CUBLAS_STATUS_INTERNAL_ERROR";
 79 | 
 80 |     case CUBLAS_STATUS_NOT_SUPPORTED:
 81 |       return "CUBLAS_STATUS_NOT_SUPPORTED";
 82 | 
 83 |     case CUBLAS_STATUS_LICENSE_ERROR:
 84 |       return "CUBLAS_STATUS_LICENSE_ERROR";
 85 |   }
 86 | 
 87 |   return "<unknown>";
 88 | }
 89 | #endif
 90 | 
 91 | #ifdef _CUFFT_H_
 92 | // cuFFT API errors
 93 | static const char *_cudaGetErrorEnum(cufftResult error) {
 94 |   switch (error) {
 95 |     case CUFFT_SUCCESS:
 96 |       return "CUFFT_SUCCESS";
 97 | 
 98 |     case CUFFT_INVALID_PLAN:
 99 |       return "CUFFT_INVALID_PLAN";
100 | 
101 |     case CUFFT_ALLOC_FAILED:
102 |       return "CUFFT_ALLOC_FAILED";
103 | 
104 |     case CUFFT_INVALID_TYPE:
105 |       return "CUFFT_INVALID_TYPE";
106 | 
107 |     case CUFFT_INVALID_VALUE:
108 |       return "CUFFT_INVALID_VALUE";
109 | 
110 |     case CUFFT_INTERNAL_ERROR:
111 |       return "CUFFT_INTERNAL_ERROR";
112 | 
113 |     case CUFFT_EXEC_FAILED:
114 |       return "CUFFT_EXEC_FAILED";
115 | 
116 |     case CUFFT_SETUP_FAILED:
117 |       return "CUFFT_SETUP_FAILED";
118 | 
119 |     case CUFFT_INVALID_SIZE:
120 |       return "CUFFT_INVALID_SIZE";
121 | 
122 |     case CUFFT_UNALIGNED_DATA:
123 |       return "CUFFT_UNALIGNED_DATA";
124 | 
125 |     case CUFFT_INCOMPLETE_PARAMETER_LIST:
126 |       return "CUFFT_INCOMPLETE_PARAMETER_LIST";
127 | 
128 |     case CUFFT_INVALID_DEVICE:
129 |       return "CUFFT_INVALID_DEVICE";
130 | 
131 |     case CUFFT_PARSE_ERROR:
132 |       return "CUFFT_PARSE_ERROR";
133 | 
134 |     case CUFFT_NO_WORKSPACE:
135 |       return "CUFFT_NO_WORKSPACE";
136 | 
137 |     case CUFFT_NOT_IMPLEMENTED:
138 |       return "CUFFT_NOT_IMPLEMENTED";
139 | 
140 |     case CUFFT_LICENSE_ERROR:
141 |       return "CUFFT_LICENSE_ERROR";
142 | 
143 |     case CUFFT_NOT_SUPPORTED:
144 |       return "CUFFT_NOT_SUPPORTED";
145 |   }
146 | 
147 |   return "<unknown>";
148 | }
149 | #endif
150 | 
151 | #ifdef CUSPARSEAPI
152 | // cuSPARSE API errors
153 | static const char *_cudaGetErrorEnum(cusparseStatus_t error) {
154 |   switch (error) {
155 |     case CUSPARSE_STATUS_SUCCESS:
156 |       return "CUSPARSE_STATUS_SUCCESS";
157 | 
158 |     case CUSPARSE_STATUS_NOT_INITIALIZED:
159 |       return "CUSPARSE_STATUS_NOT_INITIALIZED";
160 | 
161 |     case CUSPARSE_STATUS_ALLOC_FAILED:
162 |       return "CUSPARSE_STATUS_ALLOC_FAILED";
163 | 
164 |     case CUSPARSE_STATUS_INVALID_VALUE:
165 |       return "CUSPARSE_STATUS_INVALID_VALUE";
166 | 
167 |     case CUSPARSE_STATUS_ARCH_MISMATCH:
168 |       return "CUSPARSE_STATUS_ARCH_MISMATCH";
169 | 
170 |     case CUSPARSE_STATUS_MAPPING_ERROR:
171 |       return "CUSPARSE_STATUS_MAPPING_ERROR";
172 | 
173 |     case CUSPARSE_STATUS_EXECUTION_FAILED:
174 |       return "CUSPARSE_STATUS_EXECUTION_FAILED";
175 | 
176 |     case CUSPARSE_STATUS_INTERNAL_ERROR:
177 |       return "CUSPARSE_STATUS_INTERNAL_ERROR";
178 | 
179 |     case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
180 |       return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
181 |   }
182 | 
183 |   return "<unknown>";
184 | }
185 | #endif
186 | 
187 | #ifdef CUSOLVER_COMMON_H_
188 | // cuSOLVER API errors
189 | static const char *_cudaGetErrorEnum(cusolverStatus_t error) {
190 |   switch (error) {
191 |     case CUSOLVER_STATUS_SUCCESS:
192 |       return "CUSOLVER_STATUS_SUCCESS";
193 |     case CUSOLVER_STATUS_NOT_INITIALIZED:
194 |       return "CUSOLVER_STATUS_NOT_INITIALIZED";
195 |     case CUSOLVER_STATUS_ALLOC_FAILED:
196 |       return "CUSOLVER_STATUS_ALLOC_FAILED";
197 |     case CUSOLVER_STATUS_INVALID_VALUE:
198 |       return "CUSOLVER_STATUS_INVALID_VALUE";
199 |     case CUSOLVER_STATUS_ARCH_MISMATCH:
200 |       return "CUSOLVER_STATUS_ARCH_MISMATCH";
201 |     case CUSOLVER_STATUS_MAPPING_ERROR:
202 |       return "CUSOLVER_STATUS_MAPPING_ERROR";
203 |     case CUSOLVER_STATUS_EXECUTION_FAILED:
204 |       return "CUSOLVER_STATUS_EXECUTION_FAILED";
205 |     case CUSOLVER_STATUS_INTERNAL_ERROR:
206 |       return "CUSOLVER_STATUS_INTERNAL_ERROR";
207 |     case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
208 |       return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
209 |     case CUSOLVER_STATUS_NOT_SUPPORTED:
210 |       return "CUSOLVER_STATUS_NOT_SUPPORTED ";
211 |     case CUSOLVER_STATUS_ZERO_PIVOT:
212 |       return "CUSOLVER_STATUS_ZERO_PIVOT";
213 |     case CUSOLVER_STATUS_INVALID_LICENSE:
214 |       return "CUSOLVER_STATUS_INVALID_LICENSE";
215 |   }
216 | 
217 |   return "<unknown>";
218 | }
219 | #endif
220 | 
221 | #ifdef CURAND_H_
222 | // cuRAND API errors
223 | static const char *_cudaGetErrorEnum(curandStatus_t error) {
224 |   switch (error) {
225 |     case CURAND_STATUS_SUCCESS:
226 |       return "CURAND_STATUS_SUCCESS";
227 | 
228 |     case CURAND_STATUS_VERSION_MISMATCH:
229 |       return "CURAND_STATUS_VERSION_MISMATCH";
230 | 
231 |     case CURAND_STATUS_NOT_INITIALIZED:
232 |       return "CURAND_STATUS_NOT_INITIALIZED";
233 | 
234 |     case CURAND_STATUS_ALLOCATION_FAILED:
235 |       return "CURAND_STATUS_ALLOCATION_FAILED";
236 | 
237 |     case CURAND_STATUS_TYPE_ERROR:
238 |       return "CURAND_STATUS_TYPE_ERROR";
239 | 
240 |     case CURAND_STATUS_OUT_OF_RANGE:
241 |       return "CURAND_STATUS_OUT_OF_RANGE";
242 | 
243 |     case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
244 |       return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
245 | 
246 |     case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
247 |       return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
248 | 
249 |     case CURAND_STATUS_LAUNCH_FAILURE:
250 |       return "CURAND_STATUS_LAUNCH_FAILURE";
251 | 
252 |     case CURAND_STATUS_PREEXISTING_FAILURE:
253 |       return "CURAND_STATUS_PREEXISTING_FAILURE";
254 | 
255 |     case CURAND_STATUS_INITIALIZATION_FAILED:
256 |       return "CURAND_STATUS_INITIALIZATION_FAILED";
257 | 
258 |     case CURAND_STATUS_ARCH_MISMATCH:
259 |       return "CURAND_STATUS_ARCH_MISMATCH";
260 | 
261 |     case CURAND_STATUS_INTERNAL_ERROR:
262 |       return "CURAND_STATUS_INTERNAL_ERROR";
263 |   }
264 | 
265 |   return "<unknown>";
266 | }
267 | #endif
268 | 
269 | #ifdef NVJPEGAPI
270 | // nvJPEG API errors
271 | static const char *_cudaGetErrorEnum(nvjpegStatus_t error) {
272 |   switch (error) {
273 |     case NVJPEG_STATUS_SUCCESS:
274 |       return "NVJPEG_STATUS_SUCCESS";
275 | 
276 |     case NVJPEG_STATUS_NOT_INITIALIZED:
277 |       return "NVJPEG_STATUS_NOT_INITIALIZED";
278 | 
279 |     case NVJPEG_STATUS_INVALID_PARAMETER:
280 |       return "NVJPEG_STATUS_INVALID_PARAMETER";
281 | 
282 |     case NVJPEG_STATUS_BAD_JPEG:
283 |       return "NVJPEG_STATUS_BAD_JPEG";
284 | 
285 |     case NVJPEG_STATUS_JPEG_NOT_SUPPORTED:
286 |       return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED";
287 | 
288 |     case NVJPEG_STATUS_ALLOCATOR_FAILURE:
289 |       return "NVJPEG_STATUS_ALLOCATOR_FAILURE";
290 | 
291 |     case NVJPEG_STATUS_EXECUTION_FAILED:
292 |       return "NVJPEG_STATUS_EXECUTION_FAILED";
293 | 
294 |     case NVJPEG_STATUS_ARCH_MISMATCH:
295 |       return "NVJPEG_STATUS_ARCH_MISMATCH";
296 | 
297 |     case NVJPEG_STATUS_INTERNAL_ERROR:
298 |       return "NVJPEG_STATUS_INTERNAL_ERROR";
299 |   }
300 | 
301 |   return "<unknown>";
302 | }
303 | #endif
304 | 
305 | #ifdef NV_NPPIDEFS_H
306 | // NPP API errors
307 | static const char *_cudaGetErrorEnum(NppStatus error) {
308 |   switch (error) {
309 |     case NPP_NOT_SUPPORTED_MODE_ERROR:
310 |       return "NPP_NOT_SUPPORTED_MODE_ERROR";
311 | 
312 |     case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
313 |       return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
314 | 
315 |     case NPP_RESIZE_NO_OPERATION_ERROR:
316 |       return "NPP_RESIZE_NO_OPERATION_ERROR";
317 | 
318 |     case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
319 |       return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
320 | 
321 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
322 | 
323 |     case NPP_BAD_ARG_ERROR:
324 |       return "NPP_BAD_ARGUMENT_ERROR";
325 | 
326 |     case NPP_COEFF_ERROR:
327 |       return "NPP_COEFFICIENT_ERROR";
328 | 
329 |     case NPP_RECT_ERROR:
330 |       return "NPP_RECTANGLE_ERROR";
331 | 
332 |     case NPP_QUAD_ERROR:
333 |       return "NPP_QUADRANGLE_ERROR";
334 | 
335 |     case NPP_MEM_ALLOC_ERR:
336 |       return "NPP_MEMORY_ALLOCATION_ERROR";
337 | 
338 |     case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
339 |       return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
340 | 
341 |     case NPP_INVALID_INPUT:
342 |       return "NPP_INVALID_INPUT";
343 | 
344 |     case NPP_POINTER_ERROR:
345 |       return "NPP_POINTER_ERROR";
346 | 
347 |     case NPP_WARNING:
348 |       return "NPP_WARNING";
349 | 
350 |     case NPP_ODD_ROI_WARNING:
351 |       return "NPP_ODD_ROI_WARNING";
352 | #else
353 | 
354 |     // These are for CUDA 5.5 or higher
355 |     case NPP_BAD_ARGUMENT_ERROR:
356 |       return "NPP_BAD_ARGUMENT_ERROR";
357 | 
358 |     case NPP_COEFFICIENT_ERROR:
359 |       return "NPP_COEFFICIENT_ERROR";
360 | 
361 |     case NPP_RECTANGLE_ERROR:
362 |       return "NPP_RECTANGLE_ERROR";
363 | 
364 |     case NPP_QUADRANGLE_ERROR:
365 |       return "NPP_QUADRANGLE_ERROR";
366 | 
367 |     case NPP_MEMORY_ALLOCATION_ERR:
368 |       return "NPP_MEMORY_ALLOCATION_ERROR";
369 | 
370 |     case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
371 |       return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
372 | 
373 |     case NPP_INVALID_HOST_POINTER_ERROR:
374 |       return "NPP_INVALID_HOST_POINTER_ERROR";
375 | 
376 |     case NPP_INVALID_DEVICE_POINTER_ERROR:
377 |       return "NPP_INVALID_DEVICE_POINTER_ERROR";
378 | #endif
379 | 
380 |     case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
381 |       return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
382 | 
383 |     case NPP_TEXTURE_BIND_ERROR:
384 |       return "NPP_TEXTURE_BIND_ERROR";
385 | 
386 |     case NPP_WRONG_INTERSECTION_ROI_ERROR:
387 |       return "NPP_WRONG_INTERSECTION_ROI_ERROR";
388 | 
389 |     case NPP_NOT_EVEN_STEP_ERROR:
390 |       return "NPP_NOT_EVEN_STEP_ERROR";
391 | 
392 |     case NPP_INTERPOLATION_ERROR:
393 |       return "NPP_INTERPOLATION_ERROR";
394 | 
395 |     case NPP_RESIZE_FACTOR_ERROR:
396 |       return "NPP_RESIZE_FACTOR_ERROR";
397 | 
398 |     case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
399 |       return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
400 | 
401 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
402 | 
403 |     case NPP_MEMFREE_ERR:
404 |       return "NPP_MEMFREE_ERR";
405 | 
406 |     case NPP_MEMSET_ERR:
407 |       return "NPP_MEMSET_ERR";
408 | 
409 |     case NPP_MEMCPY_ERR:
410 |       return "NPP_MEMCPY_ERROR";
411 | 
412 |     case NPP_MIRROR_FLIP_ERR:
413 |       return "NPP_MIRROR_FLIP_ERR";
414 | #else
415 | 
416 |     case NPP_MEMFREE_ERROR:
417 |       return "NPP_MEMFREE_ERROR";
418 | 
419 |     case NPP_MEMSET_ERROR:
420 |       return "NPP_MEMSET_ERROR";
421 | 
422 |     case NPP_MEMCPY_ERROR:
423 |       return "NPP_MEMCPY_ERROR";
424 | 
425 |     case NPP_MIRROR_FLIP_ERROR:
426 |       return "NPP_MIRROR_FLIP_ERROR";
427 | #endif
428 | 
429 |     case NPP_ALIGNMENT_ERROR:
430 |       return "NPP_ALIGNMENT_ERROR";
431 | 
432 |     case NPP_STEP_ERROR:
433 |       return "NPP_STEP_ERROR";
434 | 
435 |     case NPP_SIZE_ERROR:
436 |       return "NPP_SIZE_ERROR";
437 | 
438 |     case NPP_NULL_POINTER_ERROR:
439 |       return "NPP_NULL_POINTER_ERROR";
440 | 
441 |     case NPP_CUDA_KERNEL_EXECUTION_ERROR:
442 |       return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
443 | 
444 |     case NPP_NOT_IMPLEMENTED_ERROR:
445 |       return "NPP_NOT_IMPLEMENTED_ERROR";
446 | 
447 |     case NPP_ERROR:
448 |       return "NPP_ERROR";
449 | 
450 |     case NPP_SUCCESS:
451 |       return "NPP_SUCCESS";
452 | 
453 |     case NPP_WRONG_INTERSECTION_QUAD_WARNING:
454 |       return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
455 | 
456 |     case NPP_MISALIGNED_DST_ROI_WARNING:
457 |       return "NPP_MISALIGNED_DST_ROI_WARNING";
458 | 
459 |     case NPP_AFFINE_QUAD_INCORRECT_WARNING:
460 |       return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
461 | 
462 |     case NPP_DOUBLE_SIZE_WARNING:
463 |       return "NPP_DOUBLE_SIZE_WARNING";
464 | 
465 |     case NPP_WRONG_INTERSECTION_ROI_WARNING:
466 |       return "NPP_WRONG_INTERSECTION_ROI_WARNING";
467 | 
468 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
469 |     /* These are 6.0 or higher */
470 |     case NPP_LUT_PALETTE_BITSIZE_ERROR:
471 |       return "NPP_LUT_PALETTE_BITSIZE_ERROR";
472 | 
473 |     case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
474 |       return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
475 | 
476 |     case NPP_QUALITY_INDEX_ERROR:
477 |       return "NPP_QUALITY_INDEX_ERROR";
478 | 
479 |     case NPP_CHANNEL_ORDER_ERROR:
480 |       return "NPP_CHANNEL_ORDER_ERROR";
481 | 
482 |     case NPP_ZERO_MASK_VALUE_ERROR:
483 |       return "NPP_ZERO_MASK_VALUE_ERROR";
484 | 
485 |     case NPP_NUMBER_OF_CHANNELS_ERROR:
486 |       return "NPP_NUMBER_OF_CHANNELS_ERROR";
487 | 
488 |     case NPP_COI_ERROR:
489 |       return "NPP_COI_ERROR";
490 | 
491 |     case NPP_DIVISOR_ERROR:
492 |       return "NPP_DIVISOR_ERROR";
493 | 
494 |     case NPP_CHANNEL_ERROR:
495 |       return "NPP_CHANNEL_ERROR";
496 | 
497 |     case NPP_STRIDE_ERROR:
498 |       return "NPP_STRIDE_ERROR";
499 | 
500 |     case NPP_ANCHOR_ERROR:
501 |       return "NPP_ANCHOR_ERROR";
502 | 
503 |     case NPP_MASK_SIZE_ERROR:
504 |       return "NPP_MASK_SIZE_ERROR";
505 | 
506 |     case NPP_MOMENT_00_ZERO_ERROR:
507 |       return "NPP_MOMENT_00_ZERO_ERROR";
508 | 
509 |     case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
510 |       return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
511 | 
512 |     case NPP_THRESHOLD_ERROR:
513 |       return "NPP_THRESHOLD_ERROR";
514 | 
515 |     case NPP_CONTEXT_MATCH_ERROR:
516 |       return "NPP_CONTEXT_MATCH_ERROR";
517 | 
518 |     case NPP_FFT_FLAG_ERROR:
519 |       return "NPP_FFT_FLAG_ERROR";
520 | 
521 |     case NPP_FFT_ORDER_ERROR:
522 |       return "NPP_FFT_ORDER_ERROR";
523 | 
524 |     case NPP_SCALE_RANGE_ERROR:
525 |       return "NPP_SCALE_RANGE_ERROR";
526 | 
527 |     case NPP_DATA_TYPE_ERROR:
528 |       return "NPP_DATA_TYPE_ERROR";
529 | 
530 |     case NPP_OUT_OFF_RANGE_ERROR:
531 |       return "NPP_OUT_OFF_RANGE_ERROR";
532 | 
533 |     case NPP_DIVIDE_BY_ZERO_ERROR:
534 |       return "NPP_DIVIDE_BY_ZERO_ERROR";
535 | 
536 |     case NPP_RANGE_ERROR:
537 |       return "NPP_RANGE_ERROR";
538 | 
539 |     case NPP_NO_MEMORY_ERROR:
540 |       return "NPP_NO_MEMORY_ERROR";
541 | 
542 |     case NPP_ERROR_RESERVED:
543 |       return "NPP_ERROR_RESERVED";
544 | 
545 |     case NPP_NO_OPERATION_WARNING:
546 |       return "NPP_NO_OPERATION_WARNING";
547 | 
548 |     case NPP_DIVIDE_BY_ZERO_WARNING:
549 |       return "NPP_DIVIDE_BY_ZERO_WARNING";
550 | #endif
551 | 
552 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
553 |     /* These are 7.0 or higher */
554 |     case NPP_OVERFLOW_ERROR:
555 |       return "NPP_OVERFLOW_ERROR";
556 | 
557 |     case NPP_CORRUPTED_DATA_ERROR:
558 |       return "NPP_CORRUPTED_DATA_ERROR";
559 | #endif
560 |   }
561 | 
562 |   return "<unknown>";
563 | }
564 | #endif
565 | 
566 | template <typename T>
567 | void check(T result, char const *const func, const char *const file,
568 |            int const line) {
569 |   if (result) {
570 |     fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
571 |             static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
572 |     exit(EXIT_FAILURE);
573 |   }
574 | }
575 | 
576 | #ifdef __DRIVER_TYPES_H__
577 | // This will output the proper CUDA error strings in the event
578 | // that a CUDA host call returns an error
579 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
580 | 
581 | // This will output the proper error string when calling cudaGetLastError
582 | #define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
583 | 
584 | inline void __getLastCudaError(const char *errorMessage, const char *file,
585 |                                const int line) {
586 |   cudaError_t err = cudaGetLastError();
587 | 
588 |   if (cudaSuccess != err) {
589 |     fprintf(stderr,
590 |             "%s(%i) : getLastCudaError() CUDA error :"
591 |             " %s : (%d) %s.\n",
592 |             file, line, errorMessage, static_cast<int>(err),
593 |             cudaGetErrorString(err));
594 |     exit(EXIT_FAILURE);
595 |   }
596 | }
597 | 
598 | // This will only print the proper error string when calling cudaGetLastError
599 | // but not exit program incase error detected.
600 | #define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__)
601 | 
602 | inline void __printLastCudaError(const char *errorMessage, const char *file,
603 |                                  const int line) {
604 |   cudaError_t err = cudaGetLastError();
605 | 
606 |   if (cudaSuccess != err) {
607 |     fprintf(stderr,
608 |             "%s(%i) : getLastCudaError() CUDA error :"
609 |             " %s : (%d) %s.\n",
610 |             file, line, errorMessage, static_cast<int>(err),
611 |             cudaGetErrorString(err));
612 |   }
613 | }
614 | #endif
615 | 
616 | #ifndef MAX
617 | #define MAX(a, b) (a > b ? a : b)
618 | #endif
619 | 
620 | // Float To Int conversion
621 | inline int ftoi(float value) {
622 |   return (value >= 0 ? static_cast<int>(value + 0.5)
623 |                      : static_cast<int>(value - 0.5));
624 | }
625 | 
626 | // Beginning of GPU Architecture definitions
627 | inline int _ConvertSMVer2Cores(int major, int minor) {
628 |   // Defines for GPU Architecture types (using the SM version to determine
629 |   // the # of cores per SM
630 |   typedef struct {
631 |     int SM;  // 0xMm (hexidecimal notation), M = SM Major version,
632 |     // and m = SM minor version
633 |     int Cores;
634 |   } sSMtoCores;
635 | 
636 |   sSMtoCores nGpuArchCoresPerSM[] = {
637 |       {0x30, 192},
638 |       {0x32, 192},
639 |       {0x35, 192},
640 |       {0x37, 192},
641 |       {0x50, 128},
642 |       {0x52, 128},
643 |       {0x53, 128},
644 |       {0x60,  64},
645 |       {0x61, 128},
646 |       {0x62, 128},
647 |       {0x70,  64},
648 |       {0x72,  64},
649 |       {0x75,  64},
650 |       {0x80,  64},
651 |       {0x86, 128},
652 |       {0x87, 128},
653 |       {-1, -1}};
654 | 
655 |   int index = 0;
656 | 
657 |   while (nGpuArchCoresPerSM[index].SM != -1) {
658 |     if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
659 |       return nGpuArchCoresPerSM[index].Cores;
660 |     }
661 | 
662 |     index++;
663 |   }
664 | 
665 |   // If we don't find the values, we default use the previous one
666 |   // to run properly
667 |   printf(
668 |       "MapSMtoCores for SM %d.%d is undefined."
669 |       "  Default to use %d Cores/SM\n",
670 |       major, minor, nGpuArchCoresPerSM[index - 1].Cores);
671 |   return nGpuArchCoresPerSM[index - 1].Cores;
672 | }
673 | 
674 | inline const char* _ConvertSMVer2ArchName(int major, int minor) {
675 |   // Defines for GPU Architecture types (using the SM version to determine
676 |   // the GPU Arch name)
677 |   typedef struct {
678 |     int SM;  // 0xMm (hexidecimal notation), M = SM Major version,
679 |     // and m = SM minor version
680 |     const char* name;
681 |   } sSMtoArchName;
682 | 
683 |   sSMtoArchName nGpuArchNameSM[] = {
684 |       {0x30, "Kepler"},
685 |       {0x32, "Kepler"},
686 |       {0x35, "Kepler"},
687 |       {0x37, "Kepler"},
688 |       {0x50, "Maxwell"},
689 |       {0x52, "Maxwell"},
690 |       {0x53, "Maxwell"},
691 |       {0x60, "Pascal"},
692 |       {0x61, "Pascal"},
693 |       {0x62, "Pascal"},
694 |       {0x70, "Volta"},
695 |       {0x72, "Xavier"},
696 |       {0x75, "Turing"},
697 |       {0x80, "Ampere"},
698 |       {0x86, "Ampere"},
699 |       {0x87, "Ampere"},
700 |       {-1, "Graphics Device"}};
701 | 
702 |   int index = 0;
703 | 
704 |   while (nGpuArchNameSM[index].SM != -1) {
705 |     if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) {
706 |       return nGpuArchNameSM[index].name;
707 |     }
708 | 
709 |     index++;
710 |   }
711 | 
712 |   // If we don't find the values, we default use the previous one
713 |   // to run properly
714 |   printf(
715 |       "MapSMtoArchName for SM %d.%d is undefined."
716 |       "  Default to use %s\n",
717 |       major, minor, nGpuArchNameSM[index - 1].name);
718 |   return nGpuArchNameSM[index - 1].name;
719 | }
720 |   // end of GPU Architecture definitions
721 | 
722 | #ifdef __CUDA_RUNTIME_H__
723 | // General GPU Device CUDA Initialization
724 | inline int gpuDeviceInit(int devID) {
725 |   int device_count;
726 |   checkCudaErrors(cudaGetDeviceCount(&device_count));
727 | 
728 |   if (device_count == 0) {
729 |     fprintf(stderr,
730 |             "gpuDeviceInit() CUDA error: "
731 |             "no devices supporting CUDA.\n");
732 |     exit(EXIT_FAILURE);
733 |   }
734 | 
735 |   if (devID < 0) {
736 |     devID = 0;
737 |   }
738 | 
739 |   if (devID > device_count - 1) {
740 |     fprintf(stderr, "\n");
741 |     fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n",
742 |             device_count);
743 |     fprintf(stderr,
744 |             ">> gpuDeviceInit (-device=%d) is not a valid"
745 |             " GPU device. <<\n",
746 |             devID);
747 |     fprintf(stderr, "\n");
748 |     return -devID;
749 |   }
750 | 
751 |   int computeMode = -1, major = 0, minor = 0;
752 |   checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID));
753 |   checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
754 |   checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
755 |   if (computeMode == cudaComputeModeProhibited) {
756 |     fprintf(stderr,
757 |             "Error: device is running in <Compute Mode "
758 |             "Prohibited>, no threads can use cudaSetDevice().\n");
759 |     return -1;
760 |   }
761 | 
762 |   if (major < 1) {
763 |     fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
764 |     exit(EXIT_FAILURE);
765 |   }
766 | 
767 |   checkCudaErrors(cudaSetDevice(devID));
768 |   printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, _ConvertSMVer2ArchName(major, minor));
769 | 
770 |   return devID;
771 | }
772 | 
773 | // This function returns the best GPU (with maximum GFLOPS)
774 | inline int gpuGetMaxGflopsDeviceId() {
775 |   int current_device = 0, sm_per_multiproc = 0;
776 |   int max_perf_device = 0;
777 |   int device_count = 0;
778 |   int devices_prohibited = 0;
779 | 
780 |   uint64_t max_compute_perf = 0;
781 |   checkCudaErrors(cudaGetDeviceCount(&device_count));
782 | 
783 |   if (device_count == 0) {
784 |     fprintf(stderr,
785 |             "gpuGetMaxGflopsDeviceId() CUDA error:"
786 |             " no devices supporting CUDA.\n");
787 |     exit(EXIT_FAILURE);
788 |   }
789 | 
790 |   // Find the best CUDA capable GPU device
791 |   current_device = 0;
792 | 
793 |   while (current_device < device_count) {
794 |     int computeMode = -1, major = 0, minor = 0;
795 |     checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
796 |     checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
797 |     checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
798 | 
799 |     // If this GPU is not running on Compute Mode prohibited,
800 |     // then we can add it to the list
801 |     if (computeMode != cudaComputeModeProhibited) {
802 |       if (major == 9999 && minor == 9999) {
803 |         sm_per_multiproc = 1;
804 |       } else {
805 |         sm_per_multiproc =
806 |             _ConvertSMVer2Cores(major,  minor);
807 |       }
808 |       int multiProcessorCount = 0, clockRate = 0;
809 |       checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device));
810 |       cudaError_t result = cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device);
811 |       if (result != cudaSuccess) {
812 |         // If cudaDevAttrClockRate attribute is not supported we
813 |         // set clockRate as 1, to consider GPU with most SMs and CUDA Cores.
814 |         if(result == cudaErrorInvalidValue) {
815 |           clockRate = 1;
816 |         }
817 |         else {
818 |           fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n", __FILE__, __LINE__,
819 |             static_cast<unsigned int>(result), _cudaGetErrorEnum(result));
820 |           exit(EXIT_FAILURE);
821 |         }
822 |       }
823 |       uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate;
824 | 
825 |       if (compute_perf > max_compute_perf) {
826 |         max_compute_perf = compute_perf;
827 |         max_perf_device = current_device;
828 |       }
829 |     } else {
830 |       devices_prohibited++;
831 |     }
832 | 
833 |     ++current_device;
834 |   }
835 | 
836 |   if (devices_prohibited == device_count) {
837 |     fprintf(stderr,
838 |             "gpuGetMaxGflopsDeviceId() CUDA error:"
839 |             " all devices have compute mode prohibited.\n");
840 |     exit(EXIT_FAILURE);
841 |   }
842 | 
843 |   return max_perf_device;
844 | }
845 | 
846 | // Initialization code to find the best CUDA Device
847 | inline int findCudaDevice(int argc, const char **argv) {
848 |   int devID = 0;
849 | 
850 |   // If the command-line has a device number specified, use it
851 |   if (checkCmdLineFlag(argc, argv, "device")) {
852 |     devID = getCmdLineArgumentInt(argc, argv, "device=");
853 | 
854 |     if (devID < 0) {
855 |       printf("Invalid command line parameter\n ");
856 |       exit(EXIT_FAILURE);
857 |     } else {
858 |       devID = gpuDeviceInit(devID);
859 | 
860 |       if (devID < 0) {
861 |         printf("exiting...\n");
862 |         exit(EXIT_FAILURE);
863 |       }
864 |     }
865 |   } else {
866 |     // Otherwise pick the device with highest Gflops/s
867 |     devID = gpuGetMaxGflopsDeviceId();
868 |     checkCudaErrors(cudaSetDevice(devID));
869 |     int major = 0, minor = 0; 
870 |     checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
871 |     checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
872 |     printf("GPU Device %d: \"%s\" with compute capability %d.%d\n",
873 |            devID, _ConvertSMVer2ArchName(major, minor), major, minor);
874 | 
875 |   }
876 | 
877 |   return devID;
878 | }
879 | 
880 | inline int findIntegratedGPU() {
881 |   int current_device = 0;
882 |   int device_count = 0;
883 |   int devices_prohibited = 0;
884 | 
885 |   checkCudaErrors(cudaGetDeviceCount(&device_count));
886 | 
887 |   if (device_count == 0) {
888 |     fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
889 |     exit(EXIT_FAILURE);
890 |   }
891 | 
892 |   // Find the integrated GPU which is compute capable
893 |   while (current_device < device_count) {
894 |     int computeMode = -1, integrated = -1;
895 |     checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
896 |     checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device));
897 |     // If GPU is integrated and is not running on Compute Mode prohibited,
898 |     // then cuda can map to GLES resource
899 |     if (integrated && (computeMode != cudaComputeModeProhibited)) {
900 |       checkCudaErrors(cudaSetDevice(current_device));
901 | 
902 |       int major = 0, minor = 0; 
903 |       checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
904 |       checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
905 |       printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
906 |              current_device, _ConvertSMVer2ArchName(major, minor), major, minor);
907 | 
908 |       return current_device;
909 |     } else {
910 |       devices_prohibited++;
911 |     }
912 | 
913 |     current_device++;
914 |   }
915 | 
916 |   if (devices_prohibited == device_count) {
917 |     fprintf(stderr,
918 |             "CUDA error:"
919 |             " No GLES-CUDA Interop capable GPU found.\n");
920 |     exit(EXIT_FAILURE);
921 |   }
922 | 
923 |   return -1;
924 | }
925 | 
926 | // General check for CUDA GPU SM Capabilities
927 | inline bool checkCudaCapabilities(int major_version, int minor_version) {
928 |   int dev;
929 |   int major = 0, minor = 0;
930 | 
931 |   checkCudaErrors(cudaGetDevice(&dev));
932 |   checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev));
933 |   checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev));
934 | 
935 |   if ((major > major_version) ||
936 |       (major == major_version &&
937 |        minor >= minor_version)) {
938 |     printf("  Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
939 |            _ConvertSMVer2ArchName(major, minor), major, minor);
940 |     return true;
941 |   } else {
942 |     printf(
943 |         "  No GPU device was found that can support "
944 |         "CUDA compute capability %d.%d.\n",
945 |         major_version, minor_version);
946 |     return false;
947 |   }
948 | }
949 | #endif
950 | 
951 |   // end of CUDA Helper Functions
952 | 
953 | #endif  // COMMON_HELPER_CUDA_H_
954 | 


--------------------------------------------------------------------------------
/src/libs/cuda/helper_string.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * Please refer to the NVIDIA end user license agreement (EULA) associated
  5 |  * with this source code for terms and conditions that govern your use of
  6 |  * this software. Any use, reproduction, disclosure, or distribution of
  7 |  * this software and related documentation outside the terms of the EULA
  8 |  * is strictly prohibited.
  9 |  *
 10 |  */
 11 | 
 12 | // These are helper functions for the SDK samples (string parsing, timers, etc)
 13 | #ifndef COMMON_HELPER_STRING_H_
 14 | #define COMMON_HELPER_STRING_H_
 15 | 
 16 | #include <stdio.h>
 17 | #include <stdlib.h>
 18 | #include <fstream>
 19 | #include <string>
 20 | 
 21 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
 22 | #ifndef _CRT_SECURE_NO_DEPRECATE
 23 | #define _CRT_SECURE_NO_DEPRECATE
 24 | #endif
 25 | #ifndef STRCASECMP
 26 | #define STRCASECMP _stricmp
 27 | #endif
 28 | #ifndef STRNCASECMP
 29 | #define STRNCASECMP _strnicmp
 30 | #endif
 31 | #ifndef STRCPY
 32 | #define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
 33 | #endif
 34 | 
 35 | #ifndef FOPEN
 36 | #define FOPEN(fHandle, filename, mode) fopen_s(&fHandle, filename, mode)
 37 | #endif
 38 | #ifndef FOPEN_FAIL
 39 | #define FOPEN_FAIL(result) (result != 0)
 40 | #endif
 41 | #ifndef SSCANF
 42 | #define SSCANF sscanf_s
 43 | #endif
 44 | #ifndef SPRINTF
 45 | #define SPRINTF sprintf_s
 46 | #endif
 47 | #else  // Linux Includes
 48 | #include <string.h>
 49 | #include <strings.h>
 50 | 
 51 | #ifndef STRCASECMP
 52 | #define STRCASECMP strcasecmp
 53 | #endif
 54 | #ifndef STRNCASECMP
 55 | #define STRNCASECMP strncasecmp
 56 | #endif
 57 | #ifndef STRCPY
 58 | #define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
 59 | #endif
 60 | 
 61 | #ifndef FOPEN
 62 | #define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
 63 | #endif
 64 | #ifndef FOPEN_FAIL
 65 | #define FOPEN_FAIL(result) (result == NULL)
 66 | #endif
 67 | #ifndef SSCANF
 68 | #define SSCANF sscanf
 69 | #endif
 70 | #ifndef SPRINTF
 71 | #define SPRINTF sprintf
 72 | #endif
 73 | #endif
 74 | 
 75 | #ifndef EXIT_WAIVED
 76 | #define EXIT_WAIVED 2
 77 | #endif
 78 | 
 79 | // CUDA Utility Helper Functions
 80 | inline int stringRemoveDelimiter(char delimiter, const char *string) {
 81 |   int string_start = 0;
 82 | 
 83 |   while (string[string_start] == delimiter) {
 84 |     string_start++;
 85 |   }
 86 | 
 87 |   if (string_start >= static_cast<int>(strlen(string) - 1)) {
 88 |     return 0;
 89 |   }
 90 | 
 91 |   return string_start;
 92 | }
 93 | 
 94 | inline int getFileExtension(char *filename, char **extension) {
 95 |   int string_length = static_cast<int>(strlen(filename));
 96 | 
 97 |   while (filename[string_length--] != '.') {
 98 |     if (string_length == 0) break;
 99 |   }
100 | 
101 |   if (string_length > 0) string_length += 2;
102 | 
103 |   if (string_length == 0)
104 |     *extension = NULL;
105 |   else
106 |     *extension = &filename[string_length];
107 | 
108 |   return string_length;
109 | }
110 | 
111 | inline bool checkCmdLineFlag(const int argc, const char **argv,
112 |                              const char *string_ref) {
113 |   bool bFound = false;
114 | 
115 |   if (argc >= 1) {
116 |     for (int i = 1; i < argc; i++) {
117 |       int string_start = stringRemoveDelimiter('-', argv[i]);
118 |       const char *string_argv = &argv[i][string_start];
119 | 
120 |       const char *equal_pos = strchr(string_argv, '=');
121 |       int argv_length = static_cast<int>(
122 |           equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
123 | 
124 |       int length = static_cast<int>(strlen(string_ref));
125 | 
126 |       if (length == argv_length &&
127 |           !STRNCASECMP(string_argv, string_ref, length)) {
128 |         bFound = true;
129 |         continue;
130 |       }
131 |     }
132 |   }
133 | 
134 |   return bFound;
135 | }
136 | 
137 | // This function wraps the CUDA Driver API into a template function
138 | template <class T>
139 | inline bool getCmdLineArgumentValue(const int argc, const char **argv,
140 |                                     const char *string_ref, T *value) {
141 |   bool bFound = false;
142 | 
143 |   if (argc >= 1) {
144 |     for (int i = 1; i < argc; i++) {
145 |       int string_start = stringRemoveDelimiter('-', argv[i]);
146 |       const char *string_argv = &argv[i][string_start];
147 |       int length = static_cast<int>(strlen(string_ref));
148 | 
149 |       if (!STRNCASECMP(string_argv, string_ref, length)) {
150 |         if (length + 1 <= static_cast<int>(strlen(string_argv))) {
151 |           int auto_inc = (string_argv[length] == '=') ? 1 : 0;
152 |           *value = (T)atoi(&string_argv[length + auto_inc]);
153 |         }
154 | 
155 |         bFound = true;
156 |         i = argc;
157 |       }
158 |     }
159 |   }
160 | 
161 |   return bFound;
162 | }
163 | 
164 | inline int getCmdLineArgumentInt(const int argc, const char **argv,
165 |                                  const char *string_ref) {
166 |   bool bFound = false;
167 |   int value = -1;
168 | 
169 |   if (argc >= 1) {
170 |     for (int i = 1; i < argc; i++) {
171 |       int string_start = stringRemoveDelimiter('-', argv[i]);
172 |       const char *string_argv = &argv[i][string_start];
173 |       int length = static_cast<int>(strlen(string_ref));
174 | 
175 |       if (!STRNCASECMP(string_argv, string_ref, length)) {
176 |         if (length + 1 <= static_cast<int>(strlen(string_argv))) {
177 |           int auto_inc = (string_argv[length] == '=') ? 1 : 0;
178 |           value = atoi(&string_argv[length + auto_inc]);
179 |         } else {
180 |           value = 0;
181 |         }
182 | 
183 |         bFound = true;
184 |         continue;
185 |       }
186 |     }
187 |   }
188 | 
189 |   if (bFound) {
190 |     return value;
191 |   } else {
192 |     return 0;
193 |   }
194 | }
195 | 
196 | inline float getCmdLineArgumentFloat(const int argc, const char **argv,
197 |                                      const char *string_ref) {
198 |   bool bFound = false;
199 |   float value = -1;
200 | 
201 |   if (argc >= 1) {
202 |     for (int i = 1; i < argc; i++) {
203 |       int string_start = stringRemoveDelimiter('-', argv[i]);
204 |       const char *string_argv = &argv[i][string_start];
205 |       int length = static_cast<int>(strlen(string_ref));
206 | 
207 |       if (!STRNCASECMP(string_argv, string_ref, length)) {
208 |         if (length + 1 <= static_cast<int>(strlen(string_argv))) {
209 |           int auto_inc = (string_argv[length] == '=') ? 1 : 0;
210 |           value = static_cast<float>(atof(&string_argv[length + auto_inc]));
211 |         } else {
212 |           value = 0.f;
213 |         }
214 | 
215 |         bFound = true;
216 |         continue;
217 |       }
218 |     }
219 |   }
220 | 
221 |   if (bFound) {
222 |     return value;
223 |   } else {
224 |     return 0;
225 |   }
226 | }
227 | 
228 | inline bool getCmdLineArgumentString(const int argc, const char **argv,
229 |                                      const char *string_ref,
230 |                                      char **string_retval) {
231 |   bool bFound = false;
232 | 
233 |   if (argc >= 1) {
234 |     for (int i = 1; i < argc; i++) {
235 |       int string_start = stringRemoveDelimiter('-', argv[i]);
236 |       char *string_argv = const_cast<char*>(&argv[i][string_start]);
237 |       int length = static_cast<int>(strlen(string_ref));
238 | 
239 |       if (!STRNCASECMP(string_argv, string_ref, length)) {
240 |         *string_retval = &string_argv[length + 1];
241 |         bFound = true;
242 |         continue;
243 |       }
244 |     }
245 |   }
246 | 
247 |   if (!bFound) {
248 |     *string_retval = NULL;
249 |   }
250 | 
251 |   return bFound;
252 | }
253 | 
254 | //////////////////////////////////////////////////////////////////////////////
255 | //! Find the path for a file assuming that
256 | //! files are found in the searchPath.
257 | //!
258 | //! @return the path if succeeded, otherwise 0
259 | //! @param filename         name of the file
260 | //! @param executable_path  optional absolute path of the executable
261 | //////////////////////////////////////////////////////////////////////////////
262 | inline char *sdkFindFilePath(const char *filename,
263 |                              const char *executable_path) {
264 |   // <executable_name> defines a variable that is replaced with the name of the
265 |   // executable
266 | 
267 |   // Typical relative search paths to locate needed companion files (e.g. sample
268 |   // input data, or JIT source files) The origin for the relative search may be
269 |   // the .exe file, a .bat file launching an .exe, a browser .exe launching the
270 |   // .exe or .bat, etc
271 |   const char *searchPath[] = {
272 |       "./",  // same dir
273 |       "./<executable_name>_data_files/",
274 |       "./common/",                      // "/common/" subdir
275 |       "./common/data/",                 // "/common/data/" subdir
276 |       "./data/",                        // "/data/" subdir
277 |       "./src/",                         // "/src/" subdir
278 |       "./src/<executable_name>/data/",  // "/src/<executable_name>/data/" subdir
279 |       "./inc/",                         // "/inc/" subdir
280 |       "./0_Simple/",                    // "/0_Simple/" subdir
281 |       "./1_Utilities/",                 // "/1_Utilities/" subdir
282 |       "./2_Graphics/",                  // "/2_Graphics/" subdir
283 |       "./3_Imaging/",                   // "/3_Imaging/" subdir
284 |       "./4_Finance/",                   // "/4_Finance/" subdir
285 |       "./5_Simulations/",               // "/5_Simulations/" subdir
286 |       "./6_Advanced/",                  // "/6_Advanced/" subdir
287 |       "./7_CUDALibraries/",             // "/7_CUDALibraries/" subdir
288 |       "./8_Android/",                   // "/8_Android/" subdir
289 |       "./samples/",                     // "/samples/" subdir
290 | 
291 |       "./0_Simple/<executable_name>/data/",  // "/0_Simple/<executable_name>/data/"
292 |                                              // subdir
293 |       "./1_Utilities/<executable_name>/data/",  // "/1_Utilities/<executable_name>/data/"
294 |                                                 // subdir
295 |       "./2_Graphics/<executable_name>/data/",  // "/2_Graphics/<executable_name>/data/"
296 |                                                // subdir
297 |       "./3_Imaging/<executable_name>/data/",  // "/3_Imaging/<executable_name>/data/"
298 |                                               // subdir
299 |       "./4_Finance/<executable_name>/data/",  // "/4_Finance/<executable_name>/data/"
300 |                                               // subdir
301 |       "./5_Simulations/<executable_name>/data/",  // "/5_Simulations/<executable_name>/data/"
302 |                                                   // subdir
303 |       "./6_Advanced/<executable_name>/data/",  // "/6_Advanced/<executable_name>/data/"
304 |                                                // subdir
305 |       "./7_CUDALibraries/<executable_name>/",  // "/7_CUDALibraries/<executable_name>/"
306 |                                                // subdir
307 |       "./7_CUDALibraries/<executable_name>/data/",  // "/7_CUDALibraries/<executable_name>/data/"
308 |                                                     // subdir
309 | 
310 |       "../",              // up 1 in tree
311 |       "../common/",       // up 1 in tree, "/common/" subdir
312 |       "../common/data/",  // up 1 in tree, "/common/data/" subdir
313 |       "../data/",         // up 1 in tree, "/data/" subdir
314 |       "../src/",          // up 1 in tree, "/src/" subdir
315 |       "../inc/",          // up 1 in tree, "/inc/" subdir
316 | 
317 |       "../0_Simple/<executable_name>/data/",  // up 1 in tree,
318 |                                               // "/0_Simple/<executable_name>/"
319 |                                               // subdir
320 |       "../1_Utilities/<executable_name>/data/",  // up 1 in tree,
321 |                                                  // "/1_Utilities/<executable_name>/"
322 |                                                  // subdir
323 |       "../2_Graphics/<executable_name>/data/",  // up 1 in tree,
324 |                                                 // "/2_Graphics/<executable_name>/"
325 |                                                 // subdir
326 |       "../3_Imaging/<executable_name>/data/",  // up 1 in tree,
327 |                                                // "/3_Imaging/<executable_name>/"
328 |                                                // subdir
329 |       "../4_Finance/<executable_name>/data/",  // up 1 in tree,
330 |                                                // "/4_Finance/<executable_name>/"
331 |                                                // subdir
332 |       "../5_Simulations/<executable_name>/data/",  // up 1 in tree,
333 |                                                    // "/5_Simulations/<executable_name>/"
334 |                                                    // subdir
335 |       "../6_Advanced/<executable_name>/data/",  // up 1 in tree,
336 |                                                 // "/6_Advanced/<executable_name>/"
337 |                                                 // subdir
338 |       "../7_CUDALibraries/<executable_name>/data/",  // up 1 in tree,
339 |                                                      // "/7_CUDALibraries/<executable_name>/"
340 |                                                      // subdir
341 |       "../8_Android/<executable_name>/data/",  // up 1 in tree,
342 |                                                // "/8_Android/<executable_name>/"
343 |                                                // subdir
344 |       "../samples/<executable_name>/data/",  // up 1 in tree,
345 |                                              // "/samples/<executable_name>/"
346 |                                              // subdir
347 |       "../../",                              // up 2 in tree
348 |       "../../common/",                       // up 2 in tree, "/common/" subdir
349 |       "../../common/data/",  // up 2 in tree, "/common/data/" subdir
350 |       "../../data/",         // up 2 in tree, "/data/" subdir
351 |       "../../src/",          // up 2 in tree, "/src/" subdir
352 |       "../../inc/",          // up 2 in tree, "/inc/" subdir
353 |       "../../sandbox/<executable_name>/data/",  // up 2 in tree,
354 |                                                 // "/sandbox/<executable_name>/"
355 |                                                 // subdir
356 |       "../../0_Simple/<executable_name>/data/",  // up 2 in tree,
357 |                                                  // "/0_Simple/<executable_name>/"
358 |                                                  // subdir
359 |       "../../1_Utilities/<executable_name>/data/",  // up 2 in tree,
360 |                                                     // "/1_Utilities/<executable_name>/"
361 |                                                     // subdir
362 |       "../../2_Graphics/<executable_name>/data/",  // up 2 in tree,
363 |                                                    // "/2_Graphics/<executable_name>/"
364 |                                                    // subdir
365 |       "../../3_Imaging/<executable_name>/data/",  // up 2 in tree,
366 |                                                   // "/3_Imaging/<executable_name>/"
367 |                                                   // subdir
368 |       "../../4_Finance/<executable_name>/data/",  // up 2 in tree,
369 |                                                   // "/4_Finance/<executable_name>/"
370 |                                                   // subdir
371 |       "../../5_Simulations/<executable_name>/data/",  // up 2 in tree,
372 |                                                       // "/5_Simulations/<executable_name>/"
373 |                                                       // subdir
374 |       "../../6_Advanced/<executable_name>/data/",  // up 2 in tree,
375 |                                                    // "/6_Advanced/<executable_name>/"
376 |                                                    // subdir
377 |       "../../7_CUDALibraries/<executable_name>/data/",  // up 2 in tree,
378 |                                                         // "/7_CUDALibraries/<executable_name>/"
379 |                                                         // subdir
380 |       "../../8_Android/<executable_name>/data/",  // up 2 in tree,
381 |                                                   // "/8_Android/<executable_name>/"
382 |                                                   // subdir
383 |       "../../samples/<executable_name>/data/",  // up 2 in tree,
384 |                                                 // "/samples/<executable_name>/"
385 |                                                 // subdir
386 |       "../../../",                              // up 3 in tree
387 |       "../../../src/<executable_name>/",        // up 3 in tree,
388 |                                           // "/src/<executable_name>/" subdir
389 |       "../../../src/<executable_name>/data/",  // up 3 in tree,
390 |                                                // "/src/<executable_name>/data/"
391 |                                                // subdir
392 |       "../../../src/<executable_name>/src/",   // up 3 in tree,
393 |                                                // "/src/<executable_name>/src/"
394 |                                                // subdir
395 |       "../../../src/<executable_name>/inc/",   // up 3 in tree,
396 |                                                // "/src/<executable_name>/inc/"
397 |                                                // subdir
398 |       "../../../sandbox/<executable_name>/",   // up 3 in tree,
399 |                                                // "/sandbox/<executable_name>/"
400 |                                                // subdir
401 |       "../../../sandbox/<executable_name>/data/",  // up 3 in tree,
402 |                                                    // "/sandbox/<executable_name>/data/"
403 |                                                    // subdir
404 |       "../../../sandbox/<executable_name>/src/",  // up 3 in tree,
405 |                                                   // "/sandbox/<executable_name>/src/"
406 |                                                   // subdir
407 |       "../../../sandbox/<executable_name>/inc/",  // up 3 in tree,
408 |                                                   // "/sandbox/<executable_name>/inc/"
409 |                                                   // subdir
410 |       "../../../0_Simple/<executable_name>/data/",  // up 3 in tree,
411 |                                                     // "/0_Simple/<executable_name>/"
412 |                                                     // subdir
413 |       "../../../1_Utilities/<executable_name>/data/",  // up 3 in tree,
414 |                                                        // "/1_Utilities/<executable_name>/"
415 |                                                        // subdir
416 |       "../../../2_Graphics/<executable_name>/data/",  // up 3 in tree,
417 |                                                       // "/2_Graphics/<executable_name>/"
418 |                                                       // subdir
419 |       "../../../3_Imaging/<executable_name>/data/",  // up 3 in tree,
420 |                                                      // "/3_Imaging/<executable_name>/"
421 |                                                      // subdir
422 |       "../../../4_Finance/<executable_name>/data/",  // up 3 in tree,
423 |                                                      // "/4_Finance/<executable_name>/"
424 |                                                      // subdir
425 |       "../../../5_Simulations/<executable_name>/data/",  // up 3 in tree,
426 |                                                          // "/5_Simulations/<executable_name>/"
427 |                                                          // subdir
428 |       "../../../6_Advanced/<executable_name>/data/",  // up 3 in tree,
429 |                                                       // "/6_Advanced/<executable_name>/"
430 |                                                       // subdir
431 |       "../../../7_CUDALibraries/<executable_name>/data/",  // up 3 in tree,
432 |                                                            // "/7_CUDALibraries/<executable_name>/"
433 |                                                            // subdir
434 |       "../../../8_Android/<executable_name>/data/",  // up 3 in tree,
435 |                                                      // "/8_Android/<executable_name>/"
436 |                                                      // subdir
437 |       "../../../0_Simple/<executable_name>/",  // up 3 in tree,
438 |                                                // "/0_Simple/<executable_name>/"
439 |                                                // subdir
440 |       "../../../1_Utilities/<executable_name>/",  // up 3 in tree,
441 |                                                   // "/1_Utilities/<executable_name>/"
442 |                                                   // subdir
443 |       "../../../2_Graphics/<executable_name>/",  // up 3 in tree,
444 |                                                  // "/2_Graphics/<executable_name>/"
445 |                                                  // subdir
446 |       "../../../3_Imaging/<executable_name>/",  // up 3 in tree,
447 |                                                 // "/3_Imaging/<executable_name>/"
448 |                                                 // subdir
449 |       "../../../4_Finance/<executable_name>/",  // up 3 in tree,
450 |                                                 // "/4_Finance/<executable_name>/"
451 |                                                 // subdir
452 |       "../../../5_Simulations/<executable_name>/",  // up 3 in tree,
453 |                                                     // "/5_Simulations/<executable_name>/"
454 |                                                     // subdir
455 |       "../../../6_Advanced/<executable_name>/",  // up 3 in tree,
456 |                                                  // "/6_Advanced/<executable_name>/"
457 |                                                  // subdir
458 |       "../../../7_CUDALibraries/<executable_name>/",  // up 3 in tree,
459 |                                                       // "/7_CUDALibraries/<executable_name>/"
460 |                                                       // subdir
461 |       "../../../8_Android/<executable_name>/",  // up 3 in tree,
462 |                                                 // "/8_Android/<executable_name>/"
463 |                                                 // subdir
464 |       "../../../samples/<executable_name>/data/",  // up 3 in tree,
465 |                                                    // "/samples/<executable_name>/"
466 |                                                    // subdir
467 |       "../../../common/",       // up 3 in tree, "../../../common/" subdir
468 |       "../../../common/data/",  // up 3 in tree, "../../../common/data/" subdir
469 |       "../../../data/",         // up 3 in tree, "../../../data/" subdir
470 |       "../../../../",           // up 4 in tree
471 |       "../../../../src/<executable_name>/",  // up 4 in tree,
472 |                                              // "/src/<executable_name>/" subdir
473 |       "../../../../src/<executable_name>/data/",  // up 4 in tree,
474 |                                                   // "/src/<executable_name>/data/"
475 |                                                   // subdir
476 |       "../../../../src/<executable_name>/src/",  // up 4 in tree,
477 |                                                  // "/src/<executable_name>/src/"
478 |                                                  // subdir
479 |       "../../../../src/<executable_name>/inc/",  // up 4 in tree,
480 |                                                  // "/src/<executable_name>/inc/"
481 |                                                  // subdir
482 |       "../../../../sandbox/<executable_name>/",  // up 4 in tree,
483 |                                                  // "/sandbox/<executable_name>/"
484 |                                                  // subdir
485 |       "../../../../sandbox/<executable_name>/data/",  // up 4 in tree,
486 |                                                       // "/sandbox/<executable_name>/data/"
487 |                                                       // subdir
488 |       "../../../../sandbox/<executable_name>/src/",  // up 4 in tree,
489 |                                                      // "/sandbox/<executable_name>/src/"
490 |                                                      // subdir
491 |       "../../../../sandbox/<executable_name>/inc/",  // up 4 in tree,
492 |                                                      // "/sandbox/<executable_name>/inc/"
493 |                                                      // subdir
494 |       "../../../../0_Simple/<executable_name>/data/",  // up 4 in tree,
495 |                                                        // "/0_Simple/<executable_name>/"
496 |                                                        // subdir
497 |       "../../../../1_Utilities/<executable_name>/data/",  // up 4 in tree,
498 |                                                           // "/1_Utilities/<executable_name>/"
499 |                                                           // subdir
500 |       "../../../../2_Graphics/<executable_name>/data/",  // up 4 in tree,
501 |                                                          // "/2_Graphics/<executable_name>/"
502 |                                                          // subdir
503 |       "../../../../3_Imaging/<executable_name>/data/",  // up 4 in tree,
504 |                                                         // "/3_Imaging/<executable_name>/"
505 |                                                         // subdir
506 |       "../../../../4_Finance/<executable_name>/data/",  // up 4 in tree,
507 |                                                         // "/4_Finance/<executable_name>/"
508 |                                                         // subdir
509 |       "../../../../5_Simulations/<executable_name>/data/",  // up 4 in tree,
510 |                                                             // "/5_Simulations/<executable_name>/"
511 |                                                             // subdir
512 |       "../../../../6_Advanced/<executable_name>/data/",  // up 4 in tree,
513 |                                                          // "/6_Advanced/<executable_name>/"
514 |                                                          // subdir
515 |       "../../../../7_CUDALibraries/<executable_name>/data/",  // up 4 in tree,
516 |                                                               // "/7_CUDALibraries/<executable_name>/"
517 |                                                               // subdir
518 |       "../../../../8_Android/<executable_name>/data/",  // up 4 in tree,
519 |                                                         // "/8_Android/<executable_name>/"
520 |                                                         // subdir
521 |       "../../../../0_Simple/<executable_name>/",  // up 4 in tree,
522 |                                                   // "/0_Simple/<executable_name>/"
523 |                                                   // subdir
524 |       "../../../../1_Utilities/<executable_name>/",  // up 4 in tree,
525 |                                                      // "/1_Utilities/<executable_name>/"
526 |                                                      // subdir
527 |       "../../../../2_Graphics/<executable_name>/",  // up 4 in tree,
528 |                                                     // "/2_Graphics/<executable_name>/"
529 |                                                     // subdir
530 |       "../../../../3_Imaging/<executable_name>/",  // up 4 in tree,
531 |                                                    // "/3_Imaging/<executable_name>/"
532 |                                                    // subdir
533 |       "../../../../4_Finance/<executable_name>/",  // up 4 in tree,
534 |                                                    // "/4_Finance/<executable_name>/"
535 |                                                    // subdir
536 |       "../../../../5_Simulations/<executable_name>/",  // up 4 in tree,
537 |                                                        // "/5_Simulations/<executable_name>/"
538 |                                                        // subdir
539 |       "../../../../6_Advanced/<executable_name>/",  // up 4 in tree,
540 |                                                     // "/6_Advanced/<executable_name>/"
541 |                                                     // subdir
542 |       "../../../../7_CUDALibraries/<executable_name>/",  // up 4 in tree,
543 |                                                          // "/7_CUDALibraries/<executable_name>/"
544 |                                                          // subdir
545 |       "../../../../8_Android/<executable_name>/",  // up 4 in tree,
546 |                                                    // "/8_Android/<executable_name>/"
547 |                                                    // subdir
548 |       "../../../../samples/<executable_name>/data/",  // up 4 in tree,
549 |                                                       // "/samples/<executable_name>/"
550 |                                                       // subdir
551 |       "../../../../common/",       // up 4 in tree, "../../../common/" subdir
552 |       "../../../../common/data/",  // up 4 in tree, "../../../common/data/"
553 |                                    // subdir
554 |       "../../../../data/",         // up 4 in tree, "../../../data/" subdir
555 |       "../../../../../",           // up 5 in tree
556 |       "../../../../../src/<executable_name>/",  // up 5 in tree,
557 |                                                 // "/src/<executable_name>/"
558 |                                                 // subdir
559 |       "../../../../../src/<executable_name>/data/",  // up 5 in tree,
560 |                                                      // "/src/<executable_name>/data/"
561 |                                                      // subdir
562 |       "../../../../../src/<executable_name>/src/",  // up 5 in tree,
563 |                                                     // "/src/<executable_name>/src/"
564 |                                                     // subdir
565 |       "../../../../../src/<executable_name>/inc/",  // up 5 in tree,
566 |                                                     // "/src/<executable_name>/inc/"
567 |                                                     // subdir
568 |       "../../../../../sandbox/<executable_name>/",  // up 5 in tree,
569 |                                                     // "/sandbox/<executable_name>/"
570 |                                                     // subdir
571 |       "../../../../../sandbox/<executable_name>/data/",  // up 5 in tree,
572 |                                                          // "/sandbox/<executable_name>/data/"
573 |                                                          // subdir
574 |       "../../../../../sandbox/<executable_name>/src/",  // up 5 in tree,
575 |                                                         // "/sandbox/<executable_name>/src/"
576 |                                                         // subdir
577 |       "../../../../../sandbox/<executable_name>/inc/",  // up 5 in tree,
578 |                                                         // "/sandbox/<executable_name>/inc/"
579 |                                                         // subdir
580 |       "../../../../../0_Simple/<executable_name>/data/",  // up 5 in tree,
581 |                                                           // "/0_Simple/<executable_name>/"
582 |                                                           // subdir
583 |       "../../../../../1_Utilities/<executable_name>/data/",  // up 5 in tree,
584 |                                                              // "/1_Utilities/<executable_name>/"
585 |                                                              // subdir
586 |       "../../../../../2_Graphics/<executable_name>/data/",  // up 5 in tree,
587 |                                                             // "/2_Graphics/<executable_name>/"
588 |                                                             // subdir
589 |       "../../../../../3_Imaging/<executable_name>/data/",  // up 5 in tree,
590 |                                                            // "/3_Imaging/<executable_name>/"
591 |                                                            // subdir
592 |       "../../../../../4_Finance/<executable_name>/data/",  // up 5 in tree,
593 |                                                            // "/4_Finance/<executable_name>/"
594 |                                                            // subdir
595 |       "../../../../../5_Simulations/<executable_name>/data/",  // up 5 in tree,
596 |                                                                // "/5_Simulations/<executable_name>/"
597 |                                                                // subdir
598 |       "../../../../../6_Advanced/<executable_name>/data/",  // up 5 in tree,
599 |                                                             // "/6_Advanced/<executable_name>/"
600 |                                                             // subdir
601 |       "../../../../../7_CUDALibraries/<executable_name>/data/",  // up 5 in
602 |                                                                  // tree,
603 |                                                                  // "/7_CUDALibraries/<executable_name>/"
604 |                                                                  // subdir
605 |       "../../../../../8_Android/<executable_name>/data/",  // up 5 in tree,
606 |                                                            // "/8_Android/<executable_name>/"
607 |                                                            // subdir
608 |       "../../../../../samples/<executable_name>/data/",  // up 5 in tree,
609 |                                                          // "/samples/<executable_name>/"
610 |                                                          // subdir
611 |       "../../../../../common/",       // up 5 in tree, "../../../common/" subdir
612 |       "../../../../../common/data/",  // up 5 in tree, "../../../common/data/"
613 |                                       // subdir
614 |   };
615 | 
616 |   // Extract the executable name
617 |   std::string executable_name;
618 | 
619 |   if (executable_path != 0) {
620 |     executable_name = std::string(executable_path);
621 | 
622 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
623 |     // Windows path delimiter
624 |     size_t delimiter_pos = executable_name.find_last_of('\\');
625 |     executable_name.erase(0, delimiter_pos + 1);
626 | 
627 |     if (executable_name.rfind(".exe") != std::string::npos) {
628 |       // we strip .exe, only if the .exe is found
629 |       executable_name.resize(executable_name.size() - 4);
630 |     }
631 | 
632 | #else
633 |     // Linux & OSX path delimiter
634 |     size_t delimiter_pos = executable_name.find_last_of('/');
635 |     executable_name.erase(0, delimiter_pos + 1);
636 | #endif
637 |   }
638 | 
639 |   // Loop over all search paths and return the first hit
640 |   for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) {
641 |     std::string path(searchPath[i]);
642 |     size_t executable_name_pos = path.find("<executable_name>");
643 | 
644 |     // If there is executable_name variable in the searchPath
645 |     // replace it with the value
646 |     if (executable_name_pos != std::string::npos) {
647 |       if (executable_path != 0) {
648 |         path.replace(executable_name_pos, strlen("<executable_name>"),
649 |                      executable_name);
650 |       } else {
651 |         // Skip this path entry if no executable argument is given
652 |         continue;
653 |       }
654 |     }
655 | 
656 | #ifdef _DEBUG
657 |     printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
658 | #endif
659 | 
660 |     // Test if the file exists
661 |     path.append(filename);
662 |     FILE *fp;
663 |     FOPEN(fp, path.c_str(), "rb");
664 | 
665 |     if (fp != NULL) {
666 |       fclose(fp);
667 |       // File found
668 |       // returning an allocated array here for backwards compatibility reasons
669 |       char *file_path = reinterpret_cast<char *>(malloc(path.length() + 1));
670 |       STRCPY(file_path, path.length() + 1, path.c_str());
671 |       return file_path;
672 |     }
673 | 
674 |     if (fp) {
675 |       fclose(fp);
676 |     }
677 |   }
678 | 
679 |   // File not found
680 |   return 0;
681 | }
682 | 
683 | #endif  // COMMON_HELPER_STRING_H_
684 | 


--------------------------------------------------------------------------------
/src/libs/magicavoxel_file_writer/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Aiekick
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/libs/magicavoxel_file_writer/VoxWriter.cpp:
--------------------------------------------------------------------------------
  1 | // This is an independent project of an individual developer. Dear PVS-Studio, please check it.
  2 | // PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
  3 | 
  4 | // Copyright 2018 Stephane Cuillerdier @Aiekick
  5 | 
  6 | // Permission is hereby granted, free of charge, to any person obtaining a
  7 | // copy of this software and associated documentation files (the "Software"),
  8 | // to deal in the Software without restriction, including without
  9 | // limitation the rights to use, copy, modify, merge, publish, distribute,
 10 | // sublicense, and/or sell copies of the Software, and to permit persons to
 11 | // whom the Software is furnished to do so, subject to the following conditions:
 12 | 
 13 | // The above copyright notice and this permission notice shall be included
 14 | // in all copies or substantial portions of the Software.
 15 | 
 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 18 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 | // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 20 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 21 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | // This File is a helper for write a vox file after 0.99 release to support
 24 | // the world mode editor
 25 | // just add all color with the color Index with AddColor
 26 | // And add all voxels with the method AddVoxel with the voxel in world position, and finally save the model
 27 | // that's all, the file was initially created for my Proecedural soft
 28 | // "SdfMesher" cf :https://twitter.com/hashtag/sdfmesher?src=hash
 29 | // it support just my needs for the moment, but i put here because its a basis for more i thinck
 30 | 
 31 | #include "VoxWriter.h"
 32 | #include <cstdio>
 33 | #include <iostream>
 34 | 
 35 | // #define VERBOSE
 36 | 
 37 | namespace vox {
 38 | DICTstring::DICTstring() { bufferSize = 0; }
 39 | 
 40 | void DICTstring::write(FILE* fp) {
 41 |     bufferSize = (int32_t)buffer.size();
 42 |     fwrite(&bufferSize, sizeof(int32_t), 1, fp);
 43 |     fwrite(buffer.data(), sizeof(char), bufferSize, fp);
 44 | }
 45 | 
 46 | size_t DICTstring::getSize() {
 47 |     bufferSize = (int32_t)buffer.size();
 48 |     return sizeof(int32_t) + sizeof(char) * bufferSize;
 49 | }
 50 | 
 51 | //////////////////////////////////////////////////////////////////
 52 | 
 53 | DICTitem::DICTitem() {}
 54 | 
 55 | DICTitem::DICTitem(std::string vKey, std::string vValue) {
 56 |     key.buffer   = vKey;
 57 |     value.buffer = vValue;
 58 | }
 59 | 
 60 | void DICTitem::write(FILE* fp) {
 61 |     key.write(fp);
 62 |     value.write(fp);
 63 | }
 64 | 
 65 | size_t DICTitem::getSize() { return key.getSize() + value.getSize(); }
 66 | 
 67 | //////////////////////////////////////////////////////////////////
 68 | 
 69 | DICT::DICT() { count = 0; }
 70 | 
 71 | void DICT::write(FILE* fp) {
 72 |     count = (int32_t)keys.size();
 73 |     fwrite(&count, sizeof(int32_t), 1, fp);
 74 |     for (int i = 0; i < count; i++)
 75 |         keys[i].write(fp);
 76 | }
 77 | 
 78 | size_t DICT::getSize() {
 79 |     count    = (int32_t)keys.size();
 80 |     size_t s = sizeof(int32_t);
 81 |     for (int i = 0; i < count; i++)
 82 |         s += keys[i].getSize();
 83 |     return s;
 84 | }
 85 | 
 86 | void DICT::Add(std::string vKey, std::string vValue) { keys.push_back(DICTitem(vKey, vValue)); }
 87 | 
 88 | //////////////////////////////////////////////////////////////////
 89 | 
 90 | nTRN::nTRN(int32_t countFrames) {
 91 |     nodeId      = 0;
 92 |     reservedId  = -1;
 93 |     childNodeId = 0;
 94 |     numFrames   = 1;
 95 |     layerId     = -1;
 96 |     numFrames   = countFrames;
 97 |     while ((int32_t)frames.size() < numFrames)
 98 |         frames.push_back(DICT());
 99 | }
100 | 
101 | void nTRN::write(FILE* fp) {
102 |     // chunk header
103 |     int32_t id = GetMVID('n', 'T', 'R', 'N');
104 |     fwrite(&id, sizeof(int32_t), 1, fp);
105 |     size_t contentSize = getSize();
106 |     fwrite(&contentSize, sizeof(int32_t), 1, fp);
107 |     size_t childSize = 0;
108 |     fwrite(&childSize, sizeof(int32_t), 1, fp);
109 | 
110 |     // datas's
111 |     fwrite(&nodeId, sizeof(int32_t), 1, fp);
112 |     nodeAttribs.write(fp);
113 |     fwrite(&childNodeId, sizeof(int32_t), 1, fp);
114 |     fwrite(&reservedId, sizeof(int32_t), 1, fp);
115 |     fwrite(&layerId, sizeof(int32_t), 1, fp);
116 |     fwrite(&numFrames, sizeof(int32_t), 1, fp);
117 |     for (int i = 0; i < numFrames; i++)
118 |         frames[i].write(fp);
119 | }
120 | 
121 | size_t nTRN::getSize() {
122 |     size_t s = sizeof(int32_t) * 5 + nodeAttribs.getSize();
123 |     for (int i = 0; i < numFrames; i++)
124 |         s += frames[i].getSize();
125 |     return s;
126 | }
127 | 
128 | //////////////////////////////////////////////////////////////////
129 | 
130 | nGRP::nGRP(int32_t vCount) {
131 |     nodeId            = 0;
132 |     nodeChildrenNodes = vCount;
133 |     while ((int32_t)childNodes.size() < nodeChildrenNodes)
134 |         childNodes.push_back(0);
135 | }
136 | 
137 | void nGRP::write(FILE* fp) {
138 |     // chunk header
139 |     int32_t id = GetMVID('n', 'G', 'R', 'P');
140 |     fwrite(&id, sizeof(int32_t), 1, fp);
141 |     size_t contentSize = getSize();
142 |     fwrite(&contentSize, sizeof(int32_t), 1, fp);
143 |     size_t childSize = 0;
144 |     fwrite(&childSize, sizeof(int32_t), 1, fp);
145 | 
146 |     // datas's
147 |     fwrite(&nodeId, sizeof(int32_t), 1, fp);
148 |     nodeAttribs.write(fp);
149 |     fwrite(&nodeChildrenNodes, sizeof(int32_t), 1, fp);
150 |     fwrite(childNodes.data(), sizeof(int32_t), nodeChildrenNodes, fp);
151 | }
152 | 
153 | size_t nGRP::getSize() { return sizeof(int32_t) * (2 + nodeChildrenNodes) + nodeAttribs.getSize(); }
154 | 
155 | //////////////////////////////////////////////////////////////////
156 | 
157 | MODEL::MODEL() { modelId = 0; }
158 | 
159 | void MODEL::write(FILE* fp) {
160 |     fwrite(&modelId, sizeof(int32_t), 1, fp);
161 |     modelAttribs.write(fp);
162 | }
163 | 
164 | size_t MODEL::getSize() { return sizeof(int32_t) + modelAttribs.getSize(); }
165 | 
166 | //////////////////////////////////////////////////////////////////
167 | 
168 | nSHP::nSHP(int32_t vCount) {
169 |     nodeId    = 0;
170 |     numModels = vCount;
171 |     models.resize(numModels);
172 | }
173 | 
174 | void nSHP::write(FILE* fp) {
175 |     // chunk header
176 |     int32_t id = GetMVID('n', 'S', 'H', 'P');
177 |     fwrite(&id, sizeof(int32_t), 1, fp);
178 |     size_t contentSize = getSize();
179 |     fwrite(&contentSize, sizeof(int32_t), 1, fp);
180 |     size_t childSize = 0;
181 |     fwrite(&childSize, sizeof(int32_t), 1, fp);
182 | 
183 |     // datas's
184 |     fwrite(&nodeId, sizeof(int32_t), 1, fp);
185 |     nodeAttribs.write(fp);
186 |     fwrite(&numModels, sizeof(int32_t), 1, fp);
187 |     for (int i = 0; i < numModels; i++)
188 |         models[i].write(fp);
189 | }
190 | 
191 | size_t nSHP::getSize() {
192 |     size_t s = sizeof(int32_t) * 2 + nodeAttribs.getSize();
193 |     for (int i = 0; i < numModels; i++)
194 |         s += models[i].getSize();
195 |     return s;
196 | }
197 | 
198 | //////////////////////////////////////////////////////////////////
199 | 
200 | LAYR::LAYR() {
201 |     nodeId     = 0;
202 |     reservedId = -1;
203 | }
204 | 
205 | void LAYR::write(FILE* fp) {
206 |     // chunk header
207 |     int32_t id = GetMVID('L', 'A', 'Y', 'R');
208 |     fwrite(&id, sizeof(int32_t), 1, fp);
209 |     size_t contentSize = getSize();
210 |     fwrite(&contentSize, sizeof(int32_t), 1, fp);
211 |     size_t childSize = 0;
212 |     fwrite(&childSize, sizeof(int32_t), 1, fp);
213 | 
214 |     // datas's
215 |     fwrite(&nodeId, sizeof(int32_t), 1, fp);
216 |     nodeAttribs.write(fp);
217 |     fwrite(&reservedId, sizeof(int32_t), 1, fp);
218 | }
219 | 
220 | size_t LAYR::getSize() { return sizeof(int32_t) * 2 + nodeAttribs.getSize(); }
221 | 
222 | //////////////////////////////////////////////////////////////////
223 | 
224 | SIZE::SIZE() {
225 |     sizex = 0;
226 |     sizey = 0;
227 |     sizez = 0;
228 | }
229 | 
230 | void SIZE::write(FILE* fp) {
231 |     // chunk header
232 |     int32_t id = GetMVID('S', 'I', 'Z', 'E');
233 |     fwrite(&id, sizeof(int32_t), 1, fp);
234 |     size_t contentSize = getSize();
235 |     fwrite(&contentSize, sizeof(int32_t), 1, fp);
236 |     size_t childSize = 0;
237 |     fwrite(&childSize, sizeof(int32_t), 1, fp);
238 | 
239 |     // datas's
240 |     fwrite(&sizex, sizeof(int32_t), 1, fp);
241 |     fwrite(&sizey, sizeof(int32_t), 1, fp);
242 |     fwrite(&sizez, sizeof(int32_t), 1, fp);
243 | }
244 | 
245 | size_t SIZE::getSize() { return sizeof(int32_t) * 3; }
246 | 
247 | //////////////////////////////////////////////////////////////////
248 | 
249 | XYZI::XYZI() { numVoxels = 0; }
250 | 
251 | void XYZI::write(FILE* fp) {
252 |     // chunk header
253 |     int32_t id = GetMVID('X', 'Y', 'Z', 'I');
254 |     fwrite(&id, sizeof(int32_t), 1, fp);
255 |     size_t contentSize = getSize();
256 |     fwrite(&contentSize, sizeof(int32_t), 1, fp);
257 |     size_t childSize = 0;
258 |     fwrite(&childSize, sizeof(int32_t), 1, fp);
259 | 
260 |     // datas's
261 |     fwrite(&numVoxels, sizeof(int32_t), 1, fp);
262 |     fwrite(voxels.data(), sizeof(uint8_t), voxels.size(), fp);
263 | }
264 | 
265 | size_t XYZI::getSize() {
266 |     numVoxels = (int32_t)voxels.size() / 4;
267 |     return sizeof(int32_t) * (1 + numVoxels);
268 | }
269 | 
270 | //////////////////////////////////////////////////////////////////
271 | 
272 | RGBA::RGBA() {}
273 | 
274 | void RGBA::write(FILE* fp) {
275 |     // chunk header
276 |     int32_t id = GetMVID('R', 'G', 'B', 'A');
277 |     fwrite(&id, sizeof(int32_t), 1, fp);
278 |     size_t contentSize = getSize();
279 |     fwrite(&contentSize, sizeof(int32_t), 1, fp);
280 |     size_t childSize = 0;
281 |     fwrite(&childSize, sizeof(int32_t), 1, fp);
282 | 
283 |     // datas's
284 |     fwrite(colors, sizeof(uint8_t), contentSize, fp);
285 | }
286 | 
287 | size_t RGBA::getSize() { return sizeof(uint8_t) * 4 * 256; }
288 | 
289 | //////////////////////////////////////////////////////////////////
290 | 
291 | VoxCube::VoxCube() {
292 |     id = 0;
293 |     tx = 0;
294 |     ty = 0;
295 |     tz = 0;
296 | }
297 | 
298 | void VoxCube::write(FILE* fp) {
299 |     for (auto& xyzi : xyzis) {
300 |         size.write(fp);
301 |         xyzi.second.write(fp);
302 |     }
303 | }
304 | 
305 | //////////////////////////////////////////////////////////////////
306 | 
307 | VoxWriter* VoxWriter::Create(const std::string& vFilePathName, const uint32_t& vLimitX, const uint32_t& vLimitY, const uint32_t& vLimitZ, int32_t* vError) {
308 |     VoxWriter* vox = new VoxWriter(vLimitX, vLimitY, vLimitZ);
309 | 
310 |     *vError = vox->IsOk(vFilePathName);
311 | 
312 |     if (*vError == 0) {
313 |         return vox;
314 |     } else {
315 |         printf("Vox file creation failed, err : %s", GetErrnoMsg(*vError).c_str());
316 | 
317 |         SAFE_DELETE(vox);
318 |     }
319 | 
320 |     return vox;
321 | }
322 | 
323 | std::string VoxWriter::GetErrnoMsg(const int32_t& vError) {
324 |     std::string res;
325 | 
326 |     switch (vError) {
327 |         case 1: res = "Operation not permitted"; break;
328 |         case 2: res = "No such file or directory"; break;
329 |         case 3: res = "No such process"; break;
330 |         case 4: res = "Interrupted function"; break;
331 |         case 5: res = "I / O error"; break;
332 |         case 6: res = "No such device or address"; break;
333 |         case 7: res = "Argument list too long"; break;
334 |         case 8: res = "Exec format error"; break;
335 |         case 9: res = "Bad file number"; break;
336 |         case 10: res = "No spawned processes"; break;
337 |         case 11: res = "No more processes or not enough memory or maximum nesting level reached"; break;
338 |         case 12: res = "Not enough memory"; break;
339 |         case 13: res = "Permission denied"; break;
340 |         case 14: res = "Bad address"; break;
341 |         case 16: res = "Device or resource busy"; break;
342 |         case 17: res = "File exists"; break;
343 |         case 18: res = "Cross - device link"; break;
344 |         case 19: res = "No such device"; break;
345 |         case 20: res = "Not a director"; break;
346 |         case 21: res = "Is a directory"; break;
347 |         case 22: res = "Invalid argument"; break;
348 |         case 23: res = "Too many files open in system"; break;
349 |         case 24: res = "Too many open files"; break;
350 |         case 25: res = "Inappropriate I / O control operation"; break;
351 |         case 27: res = "File too large"; break;
352 |         case 28: res = "No space left on device"; break;
353 |         case 29: res = "Invalid seek"; break;
354 |         case 30: res = "Read - only file system"; break;
355 |         case 31: res = "Too many links"; break;
356 |         case 32: res = "Broken pipe"; break;
357 |         case 33: res = "Math argument"; break;
358 |         case 34: res = "Result too large"; break;
359 |         case 36: res = "Resource deadlock would occur"; break;
360 |         case 38: res = "Filename too long"; break;
361 |         case 39: res = "No locks available"; break;
362 |         case 40: res = "Function not supported"; break;
363 |         case 41: res = "Directory not empty"; break;
364 |         case 42: res = "Illegal byte sequence"; break;
365 |         case 80: res = "String was truncated"; break;
366 |     }
367 | 
368 |     return res;
369 | }
370 | 
371 | 
372 | //////////////////////////////////////////////////////////////////
373 | // the limit of magicavoxel is 127 for one cube, is 127 voxels (indexs : 0 -> 126)
374 | // vMaxVoxelPerCubeX,Y,Z define the limit of one cube
375 | VoxWriter::VoxWriter(const VoxelX& vMaxVoxelPerCubeX, const VoxelY& vMaxVoxelPerCubeY, const VoxelZ& vMaxVoxelPerCubeZ) {
376 |     // the limit of magicavoxel is 127 because the first voxel is 1 not 0
377 |     // so this is 0 to 126
378 |     // index limit, size is 127
379 |     m_MaxVoxelPerCubeX = ct::clamp<size_t>(vMaxVoxelPerCubeX, 0, 126);
380 |     m_MaxVoxelPerCubeY = ct::clamp<size_t>(vMaxVoxelPerCubeY, 0, 126);
381 |     m_MaxVoxelPerCubeZ = ct::clamp<size_t>(vMaxVoxelPerCubeZ, 0, 126);
382 | }
383 | 
384 | VoxWriter::~VoxWriter() {}
385 | 
386 | int32_t VoxWriter::IsOk(const std::string& vFilePathName) {
387 |     if (m_OpenFileForWriting(vFilePathName)) {
388 |         m_CloseFile();
389 |     }
390 |     return lastError;
391 | }
392 | 
393 | void VoxWriter::ClearVoxels() {
394 |     cubes.clear();
395 |     cubesId.clear();
396 |     voxelId.clear();
397 | }
398 | 
399 | void VoxWriter::ClearColors() { colors.clear(); }
400 | 
401 | void VoxWriter::StartTimeLogging() {
402 |     m_TimeLoggingEnabled = true;
403 |     m_StartTime          = std::chrono::steady_clock::now();
404 |     m_LastKeyFrameTime   = m_StartTime;
405 | };
406 | 
407 | void VoxWriter::StopTimeLogging() {
408 |     if (m_TimeLoggingEnabled) {
409 |         const auto now           = std::chrono::steady_clock::now();
410 |         m_FrameTimes[m_KeyFrame] = std::chrono::duration_cast<std::chrono::milliseconds>(now - m_LastKeyFrameTime).count() * 1e-3;
411 |         if (m_KeyFrameTimeLoggingFunctor) {
412 |             m_KeyFrameTimeLoggingFunctor(m_KeyFrame, m_FrameTimes.at(m_KeyFrame));
413 |         }
414 |         m_TotalTime              = std::chrono::duration_cast<std::chrono::milliseconds>(now - m_StartTime).count() * 1e-3;
415 |         m_TimeLoggingEnabled = false;
416 |     }
417 | }
418 | 
419 | void VoxWriter::SetKeyFrameTimeLoggingFunctor(const KeyFrameTimeLoggingFunctor& vKeyFrameTimeLoggingFunctor) {
420 |     m_KeyFrameTimeLoggingFunctor = vKeyFrameTimeLoggingFunctor;
421 | }
422 | 
423 | void VoxWriter::SetKeyFrame(uint32_t vKeyFrame) {
424 |     if (m_KeyFrame != vKeyFrame) {
425 |         if (m_TimeLoggingEnabled) {
426 |             const auto now           = std::chrono::steady_clock::now();
427 |             const auto elapsed       = now - m_LastKeyFrameTime;
428 |             m_FrameTimes[m_KeyFrame] = std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count() * 1e-3;
429 |             if (m_KeyFrameTimeLoggingFunctor) {
430 |                 m_KeyFrameTimeLoggingFunctor(m_KeyFrame, m_FrameTimes.at(m_KeyFrame));
431 |             }
432 |             m_LastKeyFrameTime = now;
433 |         }
434 |         m_KeyFrame = vKeyFrame;
435 |     }
436 | }
437 | 
438 | void VoxWriter::AddColor(const uint8_t& r, const uint8_t& g, const uint8_t& b, const uint8_t& a, const uint8_t& index) {
439 |     while (colors.size() <= index)
440 |         colors.push_back(0);
441 |     colors[index] = GetID(r, g, b, a);
442 | }
443 | 
444 | void VoxWriter::AddVoxel(const size_t& vX, const size_t& vY, const size_t& vZ, const uint8_t& vColorIndex) {
445 |     // cube pos
446 |     size_t  ox = (size_t)std::floor((double)vX / (double)m_MaxVoxelPerCubeX);
447 |     size_t oy = (size_t)std::floor((double)vY / (double)m_MaxVoxelPerCubeY);
448 |     size_t  oz = (size_t)std::floor((double)vZ / (double)m_MaxVoxelPerCubeZ);
449 | 
450 |     minCubeX = ct::mini<size_t>(minCubeX, ox);
451 |     minCubeY = ct::mini<size_t>(minCubeX, oy);
452 |     minCubeZ = ct::mini<size_t>(minCubeX, oz);
453 | 
454 |     auto cube = m_GetCube(ox, oy, oz);
455 | 
456 |     m_MergeVoxelInCube(vX, vY, vZ, vColorIndex, cube);
457 | }
458 | 
459 | void VoxWriter::SaveToFile(const std::string& vFilePathName) {
460 |     if (m_OpenFileForWriting(vFilePathName)) {
461 |         int32_t zero = 0;
462 | 
463 |         fwrite(&ID_VOX, sizeof(int32_t), 1, m_File);
464 |         fwrite(&MV_VERSION, sizeof(int32_t), 1, m_File);
465 | 
466 |         // MAIN CHUNCK
467 |         fwrite(&ID_MAIN, sizeof(int32_t), 1, m_File);
468 |         fwrite(&zero, sizeof(int32_t), 1, m_File);
469 | 
470 |         long numBytesMainChunkPos = m_GetFilePos();
471 |         fwrite(&zero, sizeof(int32_t), 1, m_File);
472 | 
473 |         long headerSize = m_GetFilePos();
474 | 
475 |         int count = (int)cubes.size();
476 | 
477 |         int  nodeIds = 0;
478 |         nTRN rootTransform(1);
479 |         rootTransform.nodeId      = nodeIds;
480 |         rootTransform.childNodeId = ++nodeIds;
481 | 
482 |         nGRP rootGroup(count);
483 |         rootGroup.nodeId            = nodeIds;  //
484 |         rootGroup.nodeChildrenNodes = count;
485 | 
486 |         std::vector<nSHP> shapes;
487 |         std::vector<nTRN> shapeTransforms;
488 |         size_t            cube_idx = 0U;
489 |         int32_t           model_id = 0U;
490 |         for (auto& cube : cubes) {
491 |             cube.write(m_File);
492 | 
493 |             // trans
494 |             nTRN trans(1);// not a trans anim so ony one frame
495 |             trans.nodeId                   = ++nodeIds;  //
496 |             rootGroup.childNodes[cube_idx] = nodeIds;
497 |             trans.childNodeId              = ++nodeIds;
498 |             trans.layerId                  = 0;
499 |             cube.tx = (int)std::floor((cube.tx - minCubeX + 0.5f) * m_MaxVoxelPerCubeX - maxVolume.lowerBound.x - maxVolume.Size().x * 0.5);
500 |             cube.ty = (int)std::floor((cube.ty - minCubeY + 0.5f) * m_MaxVoxelPerCubeY - maxVolume.lowerBound.y - maxVolume.Size().y * 0.5);
501 |             cube.tz = (int)std::floor((cube.tz - minCubeZ + 0.5f) * m_MaxVoxelPerCubeZ);
502 |             trans.frames[0].Add("_t", ct::toStr(cube.tx) + " " + ct::toStr(cube.ty) + " " + ct::toStr(cube.tz));
503 |             shapeTransforms.push_back(trans);
504 | 
505 |             // shape
506 |             nSHP shape((int32_t)cube.xyzis.size());
507 |             shape.nodeId            = nodeIds;
508 |             size_t model_array_id = 0U;
509 |             for (const auto& xyzi : cube.xyzis) {
510 |                 shape.models[model_array_id].modelId = model_id;
511 |                 shape.models[model_array_id].modelAttribs.Add("_f", ct::toStr(xyzi.first));
512 |                 ++model_array_id;
513 |                 ++model_id;
514 |             }
515 |             shapes.push_back(shape);
516 | 
517 |             ++cube_idx;
518 |         }
519 | 
520 |         rootTransform.write(m_File);
521 |         rootGroup.write(m_File);
522 | 
523 |         // trn & shp
524 |         for (int i = 0; i < count; i++) {
525 |             shapeTransforms[i].write(m_File);
526 |             shapes[i].write(m_File);
527 |         }
528 | 
529 |         // no layr in my cases
530 | 
531 |         // layr
532 |         /*for (int i = 0; i < 8; i++)
533 |         {
534 |             LAYR layr;
535 |             layr.nodeId = i;
536 |             layr.nodeAttribs.Add("_name", ct::toStr(i));
537 |             layr.write(m_File);
538 |         }*/
539 | 
540 |         // RGBA Palette
541 |         if (colors.size() > 0) {
542 |             RGBA palette;
543 |             for (int32_t i = 0; i < 255; i++) {
544 |                 if (i < (int32_t)colors.size()) {
545 |                     palette.colors[i] = colors[i];
546 |                 } else {
547 |                     palette.colors[i] = 0;
548 |                 }
549 |             }
550 | 
551 |             palette.write(m_File);
552 |         }
553 | 
554 |         const long mainChildChunkSize = m_GetFilePos() - headerSize;
555 |         m_SetFilePos(numBytesMainChunkPos);
556 |         uint32_t size = (uint32_t)mainChildChunkSize;
557 |         fwrite(&size, sizeof(uint32_t), 1, m_File);
558 | 
559 |         m_CloseFile();
560 |     }
561 | }
562 | 
563 | const size_t VoxWriter::GetVoxelsCount(const KeyFrame& vKeyFrame) const {
564 |     size_t voxel_count = 0U;
565 |     for (const auto& cube : cubes) {
566 |         if (cube.xyzis.find(vKeyFrame) != cube.xyzis.end()) {
567 |             voxel_count += cube.xyzis.at(vKeyFrame).numVoxels;
568 |         }
569 |     }
570 |     return voxel_count;
571 | }
572 | 
573 | const size_t VoxWriter::GetVoxelsCount() const {
574 |     size_t voxel_count = 0U;
575 |     for (const auto& cube : cubes) {
576 |         for (auto& key_xyzi : cube.xyzis) {
577 |             voxel_count += key_xyzi.second.numVoxels;
578 |         }
579 |     }
580 |     return voxel_count;
581 | }
582 | 
583 | void VoxWriter::PrintStats() const {
584 |     std::cout << "---- Stats ------------------------------" << std::endl;
585 |     std::cout << "Volume : " << maxVolume.Size().x << " x " << maxVolume.Size().y << " x " << maxVolume.Size().z << std::endl;
586 |     std::cout << "count cubes : " << cubes.size() << std::endl;
587 |     std::map<KeyFrame, size_t> frame_counts;
588 |     for (const auto& cube : cubes) {
589 |         for (auto& key_xyzi : cube.xyzis) {
590 |             frame_counts[key_xyzi.first] += key_xyzi.second.numVoxels;
591 |         }
592 |     }
593 |     size_t voxels_total = 0U;
594 |     if (frame_counts.size() > 1U) {
595 |         std::cout << "count key frames : " << frame_counts.size() << std::endl;
596 |         std::cout << "-----------------------------------------" << std::endl;
597 |         for (const auto& frame_count : frame_counts) {
598 |             std::cout << " o--\\-> key frame : " << frame_count.first << std::endl;
599 |             std::cout << "     \\-> voxels count : " << frame_count.second << std::endl;
600 |             if (m_FrameTimes.find(frame_count.first) != m_FrameTimes.end()) {
601 |                 std::cout << "      \\-> elapsed time : " << m_FrameTimes.at(frame_count.first) << " secs" << std::endl;
602 |             }
603 |             voxels_total += frame_count.second;
604 |         }
605 |         std::cout << "-----------------------------------------" << std::endl;
606 |     } else if (!frame_counts.empty()) {
607 |         voxels_total = frame_counts.begin()->second;
608 |     }
609 |     std::cout << "voxels total : " << voxels_total << std::endl;
610 |     std::cout << "total elapsed time : " << m_TotalTime << " secs" << std::endl;
611 |     std::cout << "-----------------------------------------" << std::endl;
612 | }
613 | 
614 | bool VoxWriter::m_OpenFileForWriting(const std::string& vFilePathName) {
615 | #if _MSC_VER
616 |     lastError = fopen_s(&m_File, vFilePathName.c_str(), "wb");
617 | #else
618 |     m_File    = fopen(vFilePathName.c_str(), "wb");
619 |     lastError = m_File ? 0 : errno;
620 | #endif
621 |     if (lastError != 0)
622 |         return false;
623 |     return true;
624 | }
625 | 
626 | void VoxWriter::m_CloseFile() { fclose(m_File); }
627 | 
628 | long VoxWriter::m_GetFilePos() const { return ftell(m_File); }
629 | 
630 | void VoxWriter::m_SetFilePos(const long& vPos) {
631 |     //  SEEK_SET	Beginning of file
632 |     //  SEEK_CUR	Current position of the file pointer
633 |     //	SEEK_END	End of file
634 |     fseek(m_File, vPos, SEEK_SET);
635 | }
636 | 
637 | const size_t VoxWriter::m_GetCubeId(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ) {
638 |     if (cubesId.find(vX) != cubesId.end()) {
639 |         if (cubesId[vX].find(vY) != cubesId[vX].end()) {
640 |             if (cubesId[vX][vY].find(vZ) != cubesId[vX][vY].end()) {
641 |                 return cubesId[vX][vY][vZ];
642 |             }
643 |         }
644 |     }
645 | 
646 |     cubesId[vX][vY][vZ] = maxCubeId++;
647 | 
648 |     return cubesId[vX][vY][vZ];
649 | }
650 | 
651 | // Wrap a position inside a particular cube dimension
652 | inline uint8_t Wrap(size_t v, size_t lim) {
653 |     v = v % lim;
654 |     if (v < 0) {
655 |         v += lim;
656 |     }
657 |     return (uint8_t)v;
658 | }
659 | 
660 | void VoxWriter::m_MergeVoxelInCube(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ, const uint8_t& vColorIndex, VoxCube* vCube) {
661 |     maxVolume.Combine(ct::dvec3((double)vX, (double)vY, (double)vZ));
662 | 
663 |     bool exist = false;
664 |     if (voxelId.find(m_KeyFrame) != voxelId.end()) {
665 |         auto& vidk = voxelId.at(m_KeyFrame);
666 |         if (vidk.find(vX) != vidk.end()) {
667 |             auto& vidkx = vidk.at(vX);
668 |             if (vidkx.find(vY) != vidkx.end()) {
669 |                 auto& vidkxy = vidkx.at(vY);
670 |                 if (vidkxy.find(vZ) != vidkxy.end()) {
671 |                     exist = true;
672 |                 }
673 |             }
674 |         }
675 |     }
676 | 
677 |     if (!exist) {
678 |         auto& xyzi = vCube->xyzis[m_KeyFrame];
679 |         xyzi.voxels.push_back(Wrap(vX, m_MaxVoxelPerCubeX));                      // x
680 |         xyzi.voxels.push_back(Wrap(vY, m_MaxVoxelPerCubeY));                      // y
681 |         xyzi.voxels.push_back(Wrap(vZ, m_MaxVoxelPerCubeZ));                      // z
682 | 
683 |         // correspond a la loc de la couleur du voxel en question
684 |         voxelId[m_KeyFrame][vX][vY][vZ] = (int)xyzi.voxels.size();
685 | 
686 |         xyzi.voxels.push_back(vColorIndex);  // color index
687 |     }
688 | }
689 | 
690 | VoxCube* VoxWriter::m_GetCube(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ) {
691 |     const auto& id = m_GetCubeId(vX, vY, vZ);
692 | 
693 |     if (id == cubes.size()) {
694 |         VoxCube c;
695 | 
696 |         c.id = (int32_t)id;
697 | 
698 |         c.tx = (int32_t)vX;
699 |         c.ty = (int32_t)vY;
700 |         c.tz = (int32_t)vZ;
701 | 
702 |         c.size.sizex = (int32_t)m_MaxVoxelPerCubeX;
703 |         c.size.sizey = (int32_t)m_MaxVoxelPerCubeY;
704 |         c.size.sizez = (int32_t)m_MaxVoxelPerCubeZ;
705 | 
706 |         cubes.push_back(c);
707 |     }
708 | 
709 |     if (id < cubes.size()) {
710 |         return &cubes[id];
711 |     }
712 | 
713 |     return nullptr;
714 | }
715 | 
716 | }  // namespace vox
717 | 


--------------------------------------------------------------------------------
/src/libs/magicavoxel_file_writer/VoxWriter.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2018 Stephane Cuillerdier @Aiekick
  2 | 
  3 | // Permission is hereby granted, free of charge, to any person obtaining a
  4 | // copy of this software and associated documentation files (the "Software"),
  5 | // to deal in the Software without restriction, including without
  6 | // limitation the rights to use, copy, modify, merge, publish, distribute,
  7 | // sublicense, and/or sell copies of the Software, and to permit persons to
  8 | // whom the Software is furnished to do so, subject to the following conditions:
  9 | 
 10 | // The above copyright notice and this permission notice shall be included
 11 | // in all copies or substantial portions of the Software.
 12 | 
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 14 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 15 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 16 | // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 17 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 18 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | 
 20 | // This File is a helper for write a vox file after 0.99 release to support
 21 | // the world mode editor
 22 | // just add all color with the color Index with AddColor
 23 | // And add all voxels with the method AddVoxel with the voxel in world position, and finally save the model
 24 | // that's all, the file was initially created for my Proecedural soft
 25 | // "SdfMesher" cf :https://twitter.com/hashtag/sdfmesher?src=hash
 26 | // it support just my needs for the moment, but i put here because its a basis for more i thinck
 27 | #ifndef __VOX_WRITER_H__
 28 | #define __VOX_WRITER_H__
 29 | 
 30 | #include <map>
 31 | #include <cmath>
 32 | #include <string>
 33 | #include <vector>
 34 | #include <chrono>
 35 | #include <cstdint>
 36 | #include <sstream>
 37 | #include <functional>
 38 | 
 39 | // extracted and adapted from https://github.com/aiekick/cTools (LICENSE MIT)
 40 | // for make VoxWriter lib free
 41 | #define SAFE_DELETE(a) \
 42 |     if (a != 0)        \
 43 |     delete a, a = 0
 44 | 
 45 | namespace ct {
 46 | template <typename T>
 47 | ::std::string toStr(const T& DOUBLE) {
 48 |     ::std::ostringstream os;
 49 |     os << DOUBLE;
 50 |     return os.str();
 51 | }
 52 | template <typename T>
 53 | inline T mini(const T& a, T& b) {
 54 |     return a < b ? a : b;
 55 | }
 56 | template <typename T>
 57 | inline T maxi(const T& a, T& b) {
 58 |     return a > b ? a : b;
 59 | }
 60 | template <typename T>
 61 | inline T clamp(const T& n) {
 62 |     return n >= T(0) && n <= T(1) ? n : T(n > T(0));
 63 | }  // clamp n => 0 to 1
 64 | template <typename T>
 65 | inline T clamp(const T& n, const T& b) {
 66 |     return n >= T(0) && n <= b ? n : T(n > T(0)) * b;
 67 | }  // clamp n => 0 to b
 68 | template <typename T>
 69 | inline T clamp(const T& n, const T& a, const T& b) {
 70 |     return n >= a && n <= b ? n : n < a ? a : b;
 71 | }  // clamp n => a to b
 72 | 
 73 | // specialized
 74 | struct dvec3 {
 75 |     double x, y, z;
 76 |     dvec3() { x = 0.0, y = 0.0, z = 0.0; }
 77 |     dvec3(const double& vxyz) { x = vxyz, y = vxyz, z = vxyz; }
 78 |     dvec3(const double& vx, const double& vy, const double& vz) { x = vx, y = vy, z = vz; }
 79 |     void operator+=(const double v) {
 80 |         x += v;
 81 |         y += v;
 82 |         z += v;
 83 |     }
 84 |     void operator-=(const double v) {
 85 |         x -= v;
 86 |         y -= v;
 87 |         z -= v;
 88 |     }
 89 |     void operator+=(const dvec3 v) {
 90 |         x += v.x;
 91 |         y += v.y;
 92 |         z += v.z;
 93 |     }
 94 |     void operator-=(const dvec3 v) {
 95 |         x -= v.x;
 96 |         y -= v.y;
 97 |         z -= v.z;
 98 |     }
 99 |     void operator*=(double v) {
100 |         x *= v;
101 |         y *= v;
102 |         z *= v;
103 |     }
104 |     void operator/=(double v) {
105 |         x /= v;
106 |         y /= v;
107 |         z /= v;
108 |     }
109 |     void operator*=(dvec3 v) {
110 |         x *= v.x;
111 |         y *= v.y;
112 |         z *= v.z;
113 |     }
114 |     void operator/=(dvec3 v) {
115 |         x /= v.x;
116 |         y /= v.y;
117 |         z /= v.z;
118 |     }
119 | };
120 | inline dvec3 operator+(const dvec3& v, const double& f) { return dvec3(v.x + f, v.y + f, v.z + f); }
121 | inline dvec3 operator+(const dvec3& v, dvec3 f) { return dvec3(v.x + f.x, v.y + f.y, v.z + f.z); }
122 | inline dvec3 operator-(const dvec3& v, const double& f) { return dvec3(v.x - f, v.y - f, v.z - f); }
123 | inline dvec3 operator-(const dvec3& v, dvec3 f) { return dvec3(v.x - f.x, v.y - f.y, v.z - f.z); }
124 | inline dvec3 operator*(const dvec3& v, const double& f) { return dvec3(v.x * f, v.y * f, v.z * f); }
125 | inline dvec3 operator*(const dvec3& v, dvec3 f) { return dvec3(v.x * f.x, v.y * f.y, v.z * f.z); }
126 | inline dvec3 operator/(const dvec3& v, const double& f) { return dvec3(v.x / f, v.y / f, v.z / f); }
127 | inline dvec3 operator/(dvec3& v, const double& f) { return dvec3(v.x / f, v.y / f, v.z / f); }
128 | inline dvec3 operator/(const double& f, dvec3& v) { return dvec3(f / v.x, f / v.y, f / v.z); }
129 | inline dvec3 operator/(const dvec3& v, dvec3 f) { return dvec3(v.x / f.x, v.y / f.y, v.z / f.z); }
130 | 
131 | // specialized
132 | struct dAABBCC  // copy of b2AABB struct
133 | {
134 |     dvec3 lowerBound;  ///< the lower left vertex
135 |     dvec3 upperBound;  ///< the upper right vertex
136 | 
137 |     dAABBCC() : lowerBound(0.0), upperBound(0.0) {}
138 |     dAABBCC(dvec3 vlowerBound, dvec3 vUpperBound) {
139 |         lowerBound   = vlowerBound;
140 |         upperBound   = vUpperBound;
141 |     }
142 |     /// Add a vector to this vector.
143 |     void operator+=(const dvec3& v) {
144 |         lowerBound += v;
145 |         upperBound += v;
146 |     }
147 | 
148 |     /// Subtract a vector from this vector.
149 |     void operator-=(const dvec3& v) {
150 |         lowerBound -= v;
151 |         upperBound -= v;
152 |     }
153 | 
154 |     /// Multiply this vector by a scalar.
155 |     void operator*=(double a) {
156 |         lowerBound *= a;
157 |         upperBound *= a;
158 |     }
159 | 
160 |     /// Divide this vector by a scalar.
161 |     void operator/=(double a) {
162 |         lowerBound /= a;
163 |         upperBound /= a;
164 |     }
165 | 
166 |     /// Get the center of the AABB.
167 |     const dvec3 GetCenter() const { return (lowerBound + upperBound) * 0.5; }
168 | 
169 |     /// Get the extents of the AABB (half-widths).
170 |     const dvec3 GetExtents() const { return (upperBound - lowerBound) * 0.5; }
171 | 
172 |     /// Get the perimeter length
173 |     double GetPerimeter() const {
174 |         double wx = upperBound.x - lowerBound.x;
175 |         double wy = upperBound.y - lowerBound.y;
176 |         double wz = upperBound.z - lowerBound.z;
177 |         return 2.0 * (wx + wy + wz);
178 |     }
179 | 
180 |     /// Combine a point into this one.
181 |     void Combine(dvec3 pt) {
182 |         lowerBound.x = mini<double>(lowerBound.x, pt.x);
183 |         lowerBound.y = mini<double>(lowerBound.y, pt.y);
184 |         lowerBound.z = mini<double>(lowerBound.z, pt.z);
185 |         upperBound.x = maxi<double>(upperBound.x, pt.x);
186 |         upperBound.y = maxi<double>(upperBound.y, pt.y);
187 |         upperBound.z = maxi<double>(upperBound.z, pt.z);
188 |     }
189 | 
190 |     /// Does this aabb contain the provided vec2.
191 |     bool ContainsPoint(const dvec3& pt) const {
192 |         bool result = true;
193 |         result      = result && lowerBound.x <= pt.x;
194 |         result      = result && lowerBound.y <= pt.y;
195 |         result      = result && lowerBound.z <= pt.z;
196 |         result      = result && pt.x <= upperBound.x;
197 |         result      = result && pt.y <= upperBound.y;
198 |         result      = result && pt.z <= upperBound.z;
199 |         return result;
200 |     }
201 | 
202 |     bool Intersects(const dAABBCC& other) {
203 |         bool result = true;
204 |         result      = result || lowerBound.x <= other.lowerBound.x;
205 |         result      = result || lowerBound.y <= other.lowerBound.y;
206 |         result      = result || lowerBound.z <= other.lowerBound.z;
207 |         result      = result || other.upperBound.x <= upperBound.x;
208 |         result      = result || other.upperBound.y <= upperBound.y;
209 |         result      = result || other.upperBound.z <= upperBound.z;
210 |         return result;
211 |     }
212 | 
213 |     const dvec3 Size() const { return dvec3(upperBound - lowerBound); }
214 | };
215 | 
216 | /// Add a float to a dAABBCC.
217 | inline dAABBCC operator+(const dAABBCC& v, float f) { return dAABBCC(v.lowerBound + f, v.upperBound + f); }
218 | 
219 | /// Add a dAABBCC to a dAABBCC.
220 | inline dAABBCC operator+(const dAABBCC& v, dAABBCC f) { return dAABBCC(v.lowerBound + f.lowerBound, v.upperBound + f.upperBound); }
221 | 
222 | /// Substract a float from a dAABBCC.
223 | inline dAABBCC operator-(const dAABBCC& v, float f) { return dAABBCC(v.lowerBound - f, v.upperBound - f); }
224 | 
225 | /// Substract a dAABBCC to a dAABBCC.
226 | inline dAABBCC operator-(const dAABBCC& v, dAABBCC f) { return dAABBCC(v.lowerBound - f.lowerBound, v.upperBound - f.upperBound); }
227 | 
228 | /// Multiply a float with a dAABBCC.
229 | inline dAABBCC operator*(const dAABBCC& v, float f) { return dAABBCC(v.lowerBound * f, v.upperBound * f); }
230 | 
231 | /// Multiply a dAABBCC with a dAABBCC.
232 | inline dAABBCC operator*(const dAABBCC& v, dAABBCC f) { return dAABBCC(v.lowerBound * f.lowerBound, v.upperBound * f.upperBound); }
233 | 
234 | /// Divide a dAABBCC by a float.
235 | inline dAABBCC operator/(const dAABBCC& v, float f) { return dAABBCC(v.lowerBound / f, v.upperBound / f); }
236 | 
237 | /// Divide a dAABBCC by a float.
238 | inline dAABBCC operator/(dAABBCC& v, float f) { return dAABBCC(v.lowerBound / f, v.upperBound / f); }
239 | 
240 | /// Divide a dAABBCC by a dAABBCC.
241 | inline dAABBCC operator/(const dAABBCC& v, dAABBCC f) { return dAABBCC(v.lowerBound / f.lowerBound, v.upperBound / f.upperBound); }
242 | }  // namespace ct
243 | 
244 | namespace vox {
245 | 
246 | typedef uint32_t KeyFrame;
247 | 
248 | typedef size_t CubeX;
249 | typedef size_t  CubeY;
250 | typedef size_t  CubeZ;
251 | typedef size_t  CubeID;
252 | typedef size_t  VoxelX;
253 | typedef size_t  VoxelY;
254 | typedef size_t  VoxelZ;
255 | typedef size_t  VoxelID;
256 | typedef int32_t TagID;
257 | typedef int32_t Version;
258 | typedef int32_t ColorID;
259 | 
260 | typedef ct::dAABBCC Volume;
261 | 
262 | typedef std::function<void(const KeyFrame& vKeyFrame, const double& vValue)> KeyFrameTimeLoggingFunctor;
263 | 
264 | inline uint32_t GetMVID(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { return (a) | (b << 8) | (c << 16) | (d << 24); }
265 | 
266 | struct DICTstring {
267 |     int32_t     bufferSize;
268 |     std::string buffer;
269 | 
270 |     DICTstring();
271 | 
272 |     void   write(FILE* fp);
273 |     size_t getSize();
274 | };
275 | 
276 | struct DICTitem {
277 |     DICTstring key;
278 |     DICTstring value;
279 | 
280 |     DICTitem();
281 |     DICTitem(std::string vKey, std::string vValue);
282 | 
283 |     void   write(FILE* fp);
284 |     size_t getSize();
285 | };
286 | 
287 | struct DICT {
288 |     int32_t               count;
289 |     std::vector<DICTitem> keys;
290 | 
291 |     DICT();
292 |     void   write(FILE* fp);
293 |     size_t getSize();
294 |     void   Add(std::string vKey, std::string vValue);
295 | };
296 | 
297 | struct nTRN {
298 |     int32_t           nodeId;
299 |     DICT              nodeAttribs;
300 |     int32_t           childNodeId;
301 |     int32_t           reservedId;
302 |     int32_t           layerId;
303 |     int32_t           numFrames;
304 |     std::vector<DICT> frames;
305 | 
306 |     nTRN(int32_t countFrames);
307 | 
308 |     void   write(FILE* fp);
309 |     size_t getSize();
310 | };
311 | 
312 | struct nGRP {
313 |     int32_t              nodeId;
314 |     DICT                 nodeAttribs;
315 |     int32_t              nodeChildrenNodes;
316 |     std::vector<int32_t> childNodes;
317 | 
318 |     nGRP(int32_t vCount);
319 | 
320 |     void   write(FILE* fp);
321 |     size_t getSize();
322 | };
323 | 
324 | struct MODEL {
325 |     int32_t modelId;
326 |     DICT    modelAttribs;
327 | 
328 |     MODEL();
329 | 
330 |     void   write(FILE* fp);
331 |     size_t getSize();
332 | };
333 | 
334 | struct nSHP {
335 |     int32_t            nodeId;
336 |     DICT               nodeAttribs;
337 |     int32_t            numModels;
338 |     std::vector<MODEL> models;
339 | 
340 |     nSHP(int32_t vCount);
341 | 
342 |     void   write(FILE* fp);
343 |     size_t getSize();
344 | };
345 | 
346 | struct LAYR {
347 |     int32_t nodeId;
348 |     DICT    nodeAttribs;
349 |     int32_t reservedId;
350 | 
351 |     LAYR();
352 |     void   write(FILE* fp);
353 |     size_t getSize();
354 | };
355 | 
356 | struct SIZE {
357 |     int32_t sizex;
358 |     int32_t sizey;
359 |     int32_t sizez;
360 | 
361 |     SIZE();
362 | 
363 |     void   write(FILE* fp);
364 |     size_t getSize();
365 | };
366 | 
367 | struct XYZI {
368 |     int32_t              numVoxels;
369 |     std::vector<uint8_t> voxels;
370 | 
371 |     XYZI();
372 |     void   write(FILE* fp);
373 |     size_t getSize();
374 | };
375 | 
376 | struct RGBA {
377 |     int32_t colors[256];
378 | 
379 |     RGBA();
380 |     void   write(FILE* fp);
381 |     size_t getSize();
382 | };
383 | 
384 | struct VoxCube {
385 |     int id;
386 | 
387 |     // translate
388 |     int tx;
389 |     int ty;
390 |     int tz;
391 | 
392 |     SIZE                     size;
393 |     std::map<KeyFrame, XYZI> xyzis;
394 | 
395 |     VoxCube();
396 | 
397 |     void write(FILE* fp);
398 | };
399 | 
400 | 
401 | class VoxWriter {
402 | public:
403 |     static VoxWriter*  Create(const std::string& vFilePathName, const uint32_t& vLimitX, const uint32_t& vLimitY, const uint32_t& vLimitZ, int32_t* vError);
404 |     static std::string GetErrnoMsg(const int32_t& vError);
405 | 
406 | private:
407 |     static const uint32_t GetID(const uint8_t& a, const uint8_t& b, const uint8_t& c, const uint8_t& d) { return (a) | (b << 8) | (c << 16) | (d << 24); }
408 | 
409 | private:
410 |     Version MV_VERSION = 150;  // the old version of MV not open another file than if version is 150 (answer by @ephtracy)
411 | 
412 |     TagID   ID_VOX  = GetID('V', 'O', 'X', ' ');
413 |     TagID   ID_PACK = GetID('P', 'A', 'C', 'K');
414 |     TagID   ID_MAIN = GetID('M', 'A', 'I', 'N');
415 |     TagID   ID_SIZE = GetID('S', 'I', 'Z', 'E');
416 |     TagID   ID_XYZI = GetID('X', 'Y', 'Z', 'I');
417 |     TagID   ID_RGBA = GetID('R', 'G', 'B', 'A');
418 |     TagID   ID_NTRN = GetID('n', 'T', 'R', 'N');
419 |     TagID   ID_NGRP = GetID('n', 'G', 'R', 'P');
420 |     TagID   ID_NSHP = GetID('n', 'S', 'H', 'P');
421 | 
422 |     VoxelX m_MaxVoxelPerCubeX = 0;
423 |     VoxelY m_MaxVoxelPerCubeY = 0;
424 |     VoxelZ m_MaxVoxelPerCubeZ = 0;
425 | 
426 |     CubeID maxCubeId = 0;
427 |     CubeX  minCubeX  = (CubeX)1e7;
428 |     CubeY  minCubeY  = (CubeY)1e7;
429 |     CubeZ  minCubeZ  = (CubeZ)1e7;
430 | 
431 |     FILE*  m_File    = nullptr;
432 | 
433 |     Volume maxVolume = Volume(1e7, -1e7);
434 | 
435 |     KeyFrame m_KeyFrame = 0;
436 | 
437 |     std::vector<ColorID> colors;
438 | 
439 |     std::vector<VoxCube> cubes;
440 | 
441 |     std::map<CubeX, std::map<CubeY, std::map<CubeZ, CubeID>>>                         cubesId;
442 |     std::map<KeyFrame, std::map<VoxelX, std::map<VoxelY, std::map<VoxelZ, VoxelID>>>> voxelId;
443 | 
444 |     int32_t lastError = 0;
445 | 
446 |     bool m_TimeLoggingEnabled = false; // for log elapsed time between key frames and total
447 | 
448 |     std::chrono::steady_clock::time_point m_StartTime;
449 |     std::chrono::steady_clock::time_point m_LastKeyFrameTime;
450 |     std::map<KeyFrame, double>            m_FrameTimes;
451 |     double                                m_TotalTime;
452 | 
453 |     KeyFrameTimeLoggingFunctor m_KeyFrameTimeLoggingFunctor;
454 | 
455 | public:
456 |     VoxWriter(const VoxelX& vMaxVoxelPerCubeX = 126, const VoxelY& vMaxVoxelPerCubeY = 126, const VoxelZ& vMaxVoxelPerCubeZ = 126);
457 |     ~VoxWriter();
458 | 
459 |     int32_t IsOk(const std::string& vFilePathName);
460 | 
461 |     void ClearVoxels();
462 |     void ClearColors();
463 |     
464 |     void StartTimeLogging();
465 |     void StopTimeLogging();
466 |     void SetKeyFrameTimeLoggingFunctor(const KeyFrameTimeLoggingFunctor& vKeyFrameTimeLoggingFunctor);
467 |     void SetKeyFrame(uint32_t vKeyFrame);
468 |     void AddColor(const uint8_t& r, const uint8_t& g, const uint8_t& b, const uint8_t& a, const uint8_t& index);
469 |     void AddVoxel(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ, const uint8_t& vColorIndex);
470 |     void SaveToFile(const std::string& vFilePathName);
471 | 
472 |     const size_t GetVoxelsCount(const KeyFrame& vKeyFrame) const;
473 |     const size_t GetVoxelsCount() const;
474 |     void         PrintStats() const;
475 | 
476 | private:
477 |     bool          m_OpenFileForWriting(const std::string& vFilePathName);
478 |     void          m_CloseFile();
479 |     long          m_GetFilePos() const;
480 |     void          m_SetFilePos(const long& vPos);
481 |     const size_t  m_GetCubeId(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ);
482 |     VoxCube*      m_GetCube(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ);
483 |     void          m_MergeVoxelInCube(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ, const uint8_t& vColorIndex, VoxCube* vCube);
484 | };
485 | }  // namespace vox
486 | #endif  //__VOX_WRITER_H__
487 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
  2 | #define WINDOWS_LEAN_AND_MEAN // Please, not too much windows shenanigans
  3 | #endif
  4 | 
  5 | // Standard libs
  6 | #include <string>
  7 | #include <cstdio>
  8 | 
  9 | // Trimesh for model importing
 10 | #include "TriMesh.h"
 11 | // Util
 12 | #include "util.h"
 13 | #include "util_io.h"
 14 | #include "util_cuda.h"
 15 | #include "timer.h"
 16 | // CPU voxelizer fallback
 17 | #include "cpu_voxelizer.h"
 18 | 
 19 | using namespace std;
 20 | string version_number = "v0.6";
 21 | 
 22 | // Forward declaration of CUDA functions
 23 | void voxelize(const voxinfo & v, float* triangle_data, unsigned int* vtable, bool morton_code);
 24 | void voxelize_solid(const voxinfo& v, float* triangle_data, unsigned int* vtable, bool morton_code);
 25 | 
 26 | // Output formats
 27 | enum class OutputFormat { output_binvox = 0, output_morton = 1, output_obj_points = 2, output_obj_cubes = 3, output_vox = 4};
 28 | char *OutputFormats[] = { "binvox file", "morton encoded blob", "obj file (pointcloud)", "obj file (cubes)", "magicavoxel file"};
 29 | 
 30 | // Default options
 31 | string filename = "";
 32 | string filename_base = "";
 33 | OutputFormat outputformat = OutputFormat::output_vox;
 34 | unsigned int gridsize = 256;
 35 | bool forceCPU = false;
 36 | bool solidVoxelization = false;
 37 | 
 38 | void printHeader(){
 39 | 	fprintf(stdout, "## CUDA VOXELIZER \n");
 40 | 	cout << "CUDA Voxelizer " << version_number << " by Jeroen Baert" << endl; 
 41 | 	cout << "https://github.com/Forceflow/cuda_voxelizer - mail (at) jeroen-baert (dot) be" << endl;
 42 | }
 43 | 
 44 | void printExample() {
 45 | 	cout << "Example: cuda_voxelizer -f /home/jeroen/bunny.ply -s 512" << endl;
 46 | }
 47 | 
 48 | void printHelp(){
 49 | 	fprintf(stdout, "\n## HELP  \n");
 50 | 	cout << "Program options: " << endl << endl;
 51 | 	cout << " -f <path to model file: .ply, .obj, .3ds> (required)" << endl;
 52 | 	cout << " -s <voxelization grid size, power of 2: 8 -> 512, 1024, ... (default: 256)>" << endl;
 53 | 	cout << " -o <output format: vox, binvox, obj, obj_points or morton (default: vox)>" << endl;
 54 | 	cout << " -cpu : Force CPU-based voxelization (slow, but works if no compatible GPU can be found)" << endl;
 55 | 	cout << " -solid : Force solid voxelization (experimental, needs watertight model)" << endl << endl;
 56 | 	printExample();
 57 | 	cout << endl;
 58 | }
 59 | 
 60 | // METHOD 1: Helper function to transfer triangles to automatically managed CUDA memory ( > CUDA 7.x)
 61 | float* meshToGPU_managed(const trimesh::TriMesh *mesh) {
 62 | 	Timer t; t.start();
 63 | 	size_t n_floats = sizeof(float) * 9 * (mesh->faces.size());
 64 | 	float* device_triangles = 0;
 65 | 	fprintf(stdout, "[Mesh] Allocating %s of CUDA-managed UNIFIED memory for triangle data \n", (readableSize(n_floats)).c_str());
 66 | 	checkCudaErrors(cudaMallocManaged((void**) &device_triangles, n_floats)); // managed memory
 67 | 	fprintf(stdout, "[Mesh] Copy %llu triangles to CUDA-managed UNIFIED memory \n", (size_t)(mesh->faces.size()));
 68 | 	for (size_t i = 0; i < mesh->faces.size(); i++) {
 69 | 		float3 v0 = trimesh_to_float3<trimesh::point>(mesh->vertices[mesh->faces[i][0]]);
 70 | 		float3 v1 = trimesh_to_float3<trimesh::point>(mesh->vertices[mesh->faces[i][1]]);
 71 | 		float3 v2 = trimesh_to_float3<trimesh::point>(mesh->vertices[mesh->faces[i][2]]);
 72 | 		size_t j = i * 9;
 73 | 		// Memcpy assuming the floats are laid out next to eachother
 74 | 		memcpy((device_triangles)+j, &v0.x, 3*sizeof(float)); 
 75 | 		memcpy((device_triangles)+j+3, &v1.x, 3*sizeof(float));
 76 | 		memcpy((device_triangles)+j+6, &v2.x, 3*sizeof(float));
 77 | 	}
 78 | 	t.stop();fprintf(stdout, "[Perf] Mesh transfer time to GPU: %.1f ms \n", t.elapsed_time_milliseconds);
 79 | 	return device_triangles;
 80 | }
 81 | 
 82 | // METHOD 2: Helper function to transfer triangles to old-style, self-managed CUDA memory ( < CUDA 7.x )
 83 | // Leaving this here for reference, the function above should be faster and better managed on all versions CUDA 7+
 84 | // 
 85 | //float* meshToGPU(const trimesh::TriMesh *mesh){
 86 | //	size_t n_floats = sizeof(float) * 9 * (mesh->faces.size());
 87 | //	float* pagelocktriangles;
 88 | //	fprintf(stdout, "Allocating %llu kb of page-locked HOST memory \n", (size_t)(n_floats / 1024.0f));
 89 | //	checkCudaErrors(cudaHostAlloc((void**)&pagelocktriangles, n_floats, cudaHostAllocDefault)); // pinned memory to easily copy from
 90 | //	fprintf(stdout, "Copy %llu triangles to page-locked HOST memory \n", (size_t)(mesh->faces.size()));
 91 | //	for (size_t i = 0; i < mesh->faces.size(); i++){
 92 | //		glm::vec3 v0 = trimesh_to_glm<trimesh::point>(mesh->vertices[mesh->faces[i][0]]);
 93 | //		glm::vec3 v1 = trimesh_to_glm<trimesh::point>(mesh->vertices[mesh->faces[i][1]]);
 94 | //		glm::vec3 v2 = trimesh_to_glm<trimesh::point>(mesh->vertices[mesh->faces[i][2]]);
 95 | //		size_t j = i * 9;
 96 | //		memcpy((pagelocktriangles)+j, glm::value_ptr(v0), sizeof(glm::vec3));
 97 | //		memcpy((pagelocktriangles)+j+3, glm::value_ptr(v1), sizeof(glm::vec3));
 98 | //		memcpy((pagelocktriangles)+j+6, glm::value_ptr(v2), sizeof(glm::vec3));
 99 | //	}
100 | //	float* device_triangles;
101 | //	fprintf(stdout, "Allocating %llu kb of DEVICE memory \n", (size_t)(n_floats / 1024.0f));
102 | //	checkCudaErrors(cudaMalloc((void **) &device_triangles, n_floats));
103 | //	fprintf(stdout, "Copy %llu triangles from page-locked HOST memory to DEVICE memory \n", (size_t)(mesh->faces.size()));
104 | //	checkCudaErrors(cudaMemcpy((void *) device_triangles, (void*) pagelocktriangles, n_floats, cudaMemcpyDefault));
105 | //	return device_triangles;
106 | //}
107 | 
108 | // Parse the program parameters and set them as global variables
109 | void parseProgramParameters(int argc, char* argv[]){
110 | 	if(argc<2){ // not enough arguments
111 | 		fprintf(stdout, "Not enough program parameters. \n \n");
112 | 		printHelp();
113 | 		exit(0);
114 | 	} 
115 | 	bool filegiven = false;
116 | 	for (int i = 1; i < argc; i++) {
117 | 		if (string(argv[i]) == "-f") {
118 | 			filename = argv[i + 1];
119 | 			filename_base = filename.substr(0, filename.find_last_of("."));
120 | 			filegiven = true;
121 | 			if (!file_exists(filename)) {fprintf(stdout, "[Err] File does not exist / cannot access: %s \n", filename.c_str());exit(1);}
122 | 			i++;
123 | 		}
124 | 		else if (string(argv[i]) == "-s") {
125 | 			gridsize = atoi(argv[i + 1]);
126 | 			i++;
127 | 		} else if (string(argv[i]) == "-h") {
128 | 			printHelp(); exit(0);
129 | 		} else if (string(argv[i]) == "-o") {
130 | 			string output = (argv[i + 1]);
131 | 			transform(output.begin(), output.end(), output.begin(), ::tolower); // to lowercase
132 | 			if (output == "binvox"){outputformat = OutputFormat::output_binvox;}
133 | 			else if (output == "morton"){outputformat = OutputFormat::output_morton;}
134 | 			else if (output == "obj"){outputformat = OutputFormat::output_obj_cubes;}
135 | 			else if (output == "obj_points") { outputformat = OutputFormat::output_obj_points; }
136 | 			else if (output == "vox") { outputformat = OutputFormat::output_vox; }
137 | 			else {fprintf(stdout, "[Err] Unrecognized output format: %s, valid options are binvox (default), morton, obj or obj_points \n", output.c_str());exit(1);}
138 | 		}
139 | 		else if (string(argv[i]) == "-cpu") {
140 | 			forceCPU = true;
141 | 		}
142 | 		else if (string(argv[i])=="-solid"){
143 | 			solidVoxelization = true;
144 | 		}
145 | 	}
146 | 	if (!filegiven) {
147 | 		fprintf(stdout, "[Err] You didn't specify a file using -f (path). This is required. Exiting. \n");
148 | 		printExample(); exit(1);
149 | 	}
150 | 	fprintf(stdout, "[Info] Filename: %s \n", filename.c_str());
151 | 	fprintf(stdout, "[Info] Grid size: %i \n", gridsize);
152 | 	fprintf(stdout, "[Info] Output format: %s \n", OutputFormats[int(outputformat)]);
153 | 	fprintf(stdout, "[Info] Using CPU-based voxelization: %s (default: No)\n", forceCPU ? "Yes" : "No");
154 | 	fprintf(stdout, "[Info] Using Solid Voxelization: %s (default: No)\n", solidVoxelization ? "Yes" : "No");
155 | }
156 | 
157 | int main(int argc, char* argv[]) {
158 | 	// PRINT PROGRAM INFO
159 | 	Timer t; t.start();
160 | 	printHeader();
161 | 
162 | 	// PARSE PROGRAM PARAMETERS
163 | 	fprintf(stdout, "\n## PROGRAM PARAMETERS \n");
164 | 	parseProgramParameters(argc, argv);
165 | 	fflush(stdout);
166 | 	trimesh::TriMesh::set_verbose(false);
167 | 
168 | 	// READ THE MESH
169 | 	fprintf(stdout, "\n## READ MESH \n");
170 | #ifdef _DEBUG
171 | 	trimesh::TriMesh::set_verbose(true);
172 | #endif
173 | 	fprintf(stdout, "[I/O] Reading mesh from %s \n", filename.c_str());
174 | 	trimesh::TriMesh* themesh = trimesh::TriMesh::read(filename.c_str());
175 | 	themesh->need_faces(); // Trimesh: Unpack (possible) triangle strips so we have faces for sure
176 | 	fprintf(stdout, "[Mesh] Number of triangles: %zu \n", themesh->faces.size());
177 | 	fprintf(stdout, "[Mesh] Number of vertices: %zu \n", themesh->vertices.size());
178 | 	fprintf(stdout, "[Mesh] Computing bbox \n");
179 | 	themesh->need_bbox(); // Trimesh: Compute the bounding box (in model coordinates)
180 | 
181 | 	// COMPUTE BOUNDING BOX AND VOXELISATION PARAMETERS
182 | 	fprintf(stdout, "\n## VOXELISATION SETUP \n");
183 | 	// Initialize our own AABox, pad it so it's a cube
184 | 	AABox<float3> bbox_mesh_cubed = createMeshBBCube<float3>(AABox<float3>(trimesh_to_float3(themesh->bbox.min), trimesh_to_float3(themesh->bbox.max)));
185 | 	// Create voxinfo struct and print all info
186 | 	voxinfo voxelization_info(bbox_mesh_cubed, make_uint3(gridsize, gridsize, gridsize), themesh->faces.size());
187 | 	voxelization_info.print();
188 | 	// Compute space needed to hold voxel table (1 voxel / bit)
189 | 	unsigned int* vtable = 0; // Both voxelization paths (GPU and CPU) need this
190 | 	size_t vtable_size = static_cast<size_t>(ceil(static_cast<size_t>(voxelization_info.gridsize.x) * static_cast<size_t>(voxelization_info.gridsize.y) * static_cast<size_t>(voxelization_info.gridsize.z) / 32.0f) * 4);
191 | 
192 | 	// CUDA initialization
193 | 	bool cuda_ok = false;
194 | 	if (!forceCPU)
195 | 	{
196 | 		// SECTION: Try to figure out if we have a CUDA-enabled GPU
197 | 		fprintf(stdout, "\n## CUDA INIT \n");
198 | 		cuda_ok = initCuda();
199 | 		if (! cuda_ok ) fprintf(stdout, "[Info] CUDA GPU not found\n");
200 | 	}
201 | 
202 | 	// SECTION: The actual voxelization
203 | 	if (cuda_ok && !forceCPU) { 
204 | 		// GPU voxelization
205 | 		fprintf(stdout, "\n## TRIANGLES TO GPU TRANSFER \n");
206 | 
207 | 		float* device_triangles;
208 | 
209 | 		// Transfer triangle data to GPU
210 | 		device_triangles = meshToGPU_managed(themesh);
211 | 
212 | 		// Allocate memory for voxel grid
213 | 		fprintf(stdout, "[Voxel Grid] Allocating %s of CUDA-managed UNIFIED memory for Voxel Grid\n", readableSize(vtable_size).c_str());
214 | 		checkCudaErrors(cudaMallocManaged((void**)&vtable, vtable_size));
215 | 		
216 | 		fprintf(stdout, "\n## GPU VOXELISATION \n");
217 | 		if (solidVoxelization){
218 | 			voxelize_solid(voxelization_info, device_triangles, vtable, (outputformat == OutputFormat::output_morton));
219 | 		}
220 | 		else{
221 | 			voxelize(voxelization_info, device_triangles, vtable, (outputformat == OutputFormat::output_morton));
222 | 		}
223 | 	} else { 
224 | 		// CPU VOXELIZATION FALLBACK
225 | 		fprintf(stdout, "\n## CPU VOXELISATION \n");
226 | 		if (!forceCPU) { fprintf(stdout, "[Info] No suitable CUDA GPU was found: Falling back to CPU voxelization\n"); }
227 | 		else { fprintf(stdout, "[Info] Doing CPU voxelization (forced using command-line switch -cpu)\n"); }
228 | 		// allocate zero-filled array
229 | 		vtable = (unsigned int*) calloc(1, vtable_size);
230 | 		if (!solidVoxelization) {
231 | 			cpu_voxelizer::cpu_voxelize_mesh(voxelization_info, themesh, vtable, (outputformat == OutputFormat::output_morton));
232 | 		}
233 | 		else {
234 | 			cpu_voxelizer::cpu_voxelize_mesh_solid(voxelization_info, themesh, vtable, (outputformat == OutputFormat::output_morton));
235 | 		}
236 | 	}
237 | 
238 | 	//// DEBUG: print vtable
239 | 	//for (int i = 0; i < vtable_size; i++) {
240 | 	//	char* vtable_p = (char*)vtable;
241 | 	//	cout << (int) vtable_p[i] << endl;
242 | 	//}
243 | 
244 | 	fprintf(stdout, "\n## FILE OUTPUT \n");
245 | 	if (outputformat == OutputFormat::output_morton){
246 | 		write_binary(vtable, vtable_size, filename);
247 | 	} else if (outputformat == OutputFormat::output_binvox){
248 | 		write_binvox(vtable, voxelization_info, filename);
249 | 	}
250 | 	else if (outputformat == OutputFormat::output_obj_points) {
251 | 		write_obj_pointcloud(vtable, voxelization_info, filename);
252 | 	}
253 | 	else if (outputformat == OutputFormat::output_obj_cubes) {
254 | 		write_obj_cubes(vtable, voxelization_info, filename);
255 | 	}
256 | 	else if (outputformat == OutputFormat::output_vox) {
257 | 		write_vox(vtable, voxelization_info, filename);
258 | 	}
259 | 
260 | 	fprintf(stdout, "\n## STATS \n");
261 | 	t.stop(); fprintf(stdout, "[Perf] Total runtime: %.1f ms \n", t.elapsed_time_milliseconds);
262 | }


--------------------------------------------------------------------------------
/src/morton_LUTs.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <stdint.h>
  3 | 
  4 | // LUT tables to copy to GPU memory for quick morton decode / encode
  5 | static const uint32_t host_morton256_x[256] =
  6 | {
  7 | 	0x00000000,
  8 | 	0x00000001, 0x00000008, 0x00000009, 0x00000040, 0x00000041, 0x00000048, 0x00000049, 0x00000200,
  9 | 	0x00000201, 0x00000208, 0x00000209, 0x00000240, 0x00000241, 0x00000248, 0x00000249, 0x00001000,
 10 | 	0x00001001, 0x00001008, 0x00001009, 0x00001040, 0x00001041, 0x00001048, 0x00001049, 0x00001200,
 11 | 	0x00001201, 0x00001208, 0x00001209, 0x00001240, 0x00001241, 0x00001248, 0x00001249, 0x00008000,
 12 | 	0x00008001, 0x00008008, 0x00008009, 0x00008040, 0x00008041, 0x00008048, 0x00008049, 0x00008200,
 13 | 	0x00008201, 0x00008208, 0x00008209, 0x00008240, 0x00008241, 0x00008248, 0x00008249, 0x00009000,
 14 | 	0x00009001, 0x00009008, 0x00009009, 0x00009040, 0x00009041, 0x00009048, 0x00009049, 0x00009200,
 15 | 	0x00009201, 0x00009208, 0x00009209, 0x00009240, 0x00009241, 0x00009248, 0x00009249, 0x00040000,
 16 | 	0x00040001, 0x00040008, 0x00040009, 0x00040040, 0x00040041, 0x00040048, 0x00040049, 0x00040200,
 17 | 	0x00040201, 0x00040208, 0x00040209, 0x00040240, 0x00040241, 0x00040248, 0x00040249, 0x00041000,
 18 | 	0x00041001, 0x00041008, 0x00041009, 0x00041040, 0x00041041, 0x00041048, 0x00041049, 0x00041200,
 19 | 	0x00041201, 0x00041208, 0x00041209, 0x00041240, 0x00041241, 0x00041248, 0x00041249, 0x00048000,
 20 | 	0x00048001, 0x00048008, 0x00048009, 0x00048040, 0x00048041, 0x00048048, 0x00048049, 0x00048200,
 21 | 	0x00048201, 0x00048208, 0x00048209, 0x00048240, 0x00048241, 0x00048248, 0x00048249, 0x00049000,
 22 | 	0x00049001, 0x00049008, 0x00049009, 0x00049040, 0x00049041, 0x00049048, 0x00049049, 0x00049200,
 23 | 	0x00049201, 0x00049208, 0x00049209, 0x00049240, 0x00049241, 0x00049248, 0x00049249, 0x00200000,
 24 | 	0x00200001, 0x00200008, 0x00200009, 0x00200040, 0x00200041, 0x00200048, 0x00200049, 0x00200200,
 25 | 	0x00200201, 0x00200208, 0x00200209, 0x00200240, 0x00200241, 0x00200248, 0x00200249, 0x00201000,
 26 | 	0x00201001, 0x00201008, 0x00201009, 0x00201040, 0x00201041, 0x00201048, 0x00201049, 0x00201200,
 27 | 	0x00201201, 0x00201208, 0x00201209, 0x00201240, 0x00201241, 0x00201248, 0x00201249, 0x00208000,
 28 | 	0x00208001, 0x00208008, 0x00208009, 0x00208040, 0x00208041, 0x00208048, 0x00208049, 0x00208200,
 29 | 	0x00208201, 0x00208208, 0x00208209, 0x00208240, 0x00208241, 0x00208248, 0x00208249, 0x00209000,
 30 | 	0x00209001, 0x00209008, 0x00209009, 0x00209040, 0x00209041, 0x00209048, 0x00209049, 0x00209200,
 31 | 	0x00209201, 0x00209208, 0x00209209, 0x00209240, 0x00209241, 0x00209248, 0x00209249, 0x00240000,
 32 | 	0x00240001, 0x00240008, 0x00240009, 0x00240040, 0x00240041, 0x00240048, 0x00240049, 0x00240200,
 33 | 	0x00240201, 0x00240208, 0x00240209, 0x00240240, 0x00240241, 0x00240248, 0x00240249, 0x00241000,
 34 | 	0x00241001, 0x00241008, 0x00241009, 0x00241040, 0x00241041, 0x00241048, 0x00241049, 0x00241200,
 35 | 	0x00241201, 0x00241208, 0x00241209, 0x00241240, 0x00241241, 0x00241248, 0x00241249, 0x00248000,
 36 | 	0x00248001, 0x00248008, 0x00248009, 0x00248040, 0x00248041, 0x00248048, 0x00248049, 0x00248200,
 37 | 	0x00248201, 0x00248208, 0x00248209, 0x00248240, 0x00248241, 0x00248248, 0x00248249, 0x00249000,
 38 | 	0x00249001, 0x00249008, 0x00249009, 0x00249040, 0x00249041, 0x00249048, 0x00249049, 0x00249200,
 39 | 	0x00249201, 0x00249208, 0x00249209, 0x00249240, 0x00249241, 0x00249248, 0x00249249
 40 | };
 41 | 
 42 | static const uint32_t host_morton256_y[256] = {
 43 | 	0x00000000,
 44 | 	0x00000002, 0x00000010, 0x00000012, 0x00000080, 0x00000082, 0x00000090, 0x00000092, 0x00000400,
 45 | 	0x00000402, 0x00000410, 0x00000412, 0x00000480, 0x00000482, 0x00000490, 0x00000492, 0x00002000,
 46 | 	0x00002002, 0x00002010, 0x00002012, 0x00002080, 0x00002082, 0x00002090, 0x00002092, 0x00002400,
 47 | 	0x00002402, 0x00002410, 0x00002412, 0x00002480, 0x00002482, 0x00002490, 0x00002492, 0x00010000,
 48 | 	0x00010002, 0x00010010, 0x00010012, 0x00010080, 0x00010082, 0x00010090, 0x00010092, 0x00010400,
 49 | 	0x00010402, 0x00010410, 0x00010412, 0x00010480, 0x00010482, 0x00010490, 0x00010492, 0x00012000,
 50 | 	0x00012002, 0x00012010, 0x00012012, 0x00012080, 0x00012082, 0x00012090, 0x00012092, 0x00012400,
 51 | 	0x00012402, 0x00012410, 0x00012412, 0x00012480, 0x00012482, 0x00012490, 0x00012492, 0x00080000,
 52 | 	0x00080002, 0x00080010, 0x00080012, 0x00080080, 0x00080082, 0x00080090, 0x00080092, 0x00080400,
 53 | 	0x00080402, 0x00080410, 0x00080412, 0x00080480, 0x00080482, 0x00080490, 0x00080492, 0x00082000,
 54 | 	0x00082002, 0x00082010, 0x00082012, 0x00082080, 0x00082082, 0x00082090, 0x00082092, 0x00082400,
 55 | 	0x00082402, 0x00082410, 0x00082412, 0x00082480, 0x00082482, 0x00082490, 0x00082492, 0x00090000,
 56 | 	0x00090002, 0x00090010, 0x00090012, 0x00090080, 0x00090082, 0x00090090, 0x00090092, 0x00090400,
 57 | 	0x00090402, 0x00090410, 0x00090412, 0x00090480, 0x00090482, 0x00090490, 0x00090492, 0x00092000,
 58 | 	0x00092002, 0x00092010, 0x00092012, 0x00092080, 0x00092082, 0x00092090, 0x00092092, 0x00092400,
 59 | 	0x00092402, 0x00092410, 0x00092412, 0x00092480, 0x00092482, 0x00092490, 0x00092492, 0x00400000,
 60 | 	0x00400002, 0x00400010, 0x00400012, 0x00400080, 0x00400082, 0x00400090, 0x00400092, 0x00400400,
 61 | 	0x00400402, 0x00400410, 0x00400412, 0x00400480, 0x00400482, 0x00400490, 0x00400492, 0x00402000,
 62 | 	0x00402002, 0x00402010, 0x00402012, 0x00402080, 0x00402082, 0x00402090, 0x00402092, 0x00402400,
 63 | 	0x00402402, 0x00402410, 0x00402412, 0x00402480, 0x00402482, 0x00402490, 0x00402492, 0x00410000,
 64 | 	0x00410002, 0x00410010, 0x00410012, 0x00410080, 0x00410082, 0x00410090, 0x00410092, 0x00410400,
 65 | 	0x00410402, 0x00410410, 0x00410412, 0x00410480, 0x00410482, 0x00410490, 0x00410492, 0x00412000,
 66 | 	0x00412002, 0x00412010, 0x00412012, 0x00412080, 0x00412082, 0x00412090, 0x00412092, 0x00412400,
 67 | 	0x00412402, 0x00412410, 0x00412412, 0x00412480, 0x00412482, 0x00412490, 0x00412492, 0x00480000,
 68 | 	0x00480002, 0x00480010, 0x00480012, 0x00480080, 0x00480082, 0x00480090, 0x00480092, 0x00480400,
 69 | 	0x00480402, 0x00480410, 0x00480412, 0x00480480, 0x00480482, 0x00480490, 0x00480492, 0x00482000,
 70 | 	0x00482002, 0x00482010, 0x00482012, 0x00482080, 0x00482082, 0x00482090, 0x00482092, 0x00482400,
 71 | 	0x00482402, 0x00482410, 0x00482412, 0x00482480, 0x00482482, 0x00482490, 0x00482492, 0x00490000,
 72 | 	0x00490002, 0x00490010, 0x00490012, 0x00490080, 0x00490082, 0x00490090, 0x00490092, 0x00490400,
 73 | 	0x00490402, 0x00490410, 0x00490412, 0x00490480, 0x00490482, 0x00490490, 0x00490492, 0x00492000,
 74 | 	0x00492002, 0x00492010, 0x00492012, 0x00492080, 0x00492082, 0x00492090, 0x00492092, 0x00492400,
 75 | 	0x00492402, 0x00492410, 0x00492412, 0x00492480, 0x00492482, 0x00492490, 0x00492492
 76 | };
 77 | 
 78 | static const uint32_t host_morton256_z[256] = {
 79 | 	0x00000000,
 80 | 	0x00000004, 0x00000020, 0x00000024, 0x00000100, 0x00000104, 0x00000120, 0x00000124, 0x00000800,
 81 | 	0x00000804, 0x00000820, 0x00000824, 0x00000900, 0x00000904, 0x00000920, 0x00000924, 0x00004000,
 82 | 	0x00004004, 0x00004020, 0x00004024, 0x00004100, 0x00004104, 0x00004120, 0x00004124, 0x00004800,
 83 | 	0x00004804, 0x00004820, 0x00004824, 0x00004900, 0x00004904, 0x00004920, 0x00004924, 0x00020000,
 84 | 	0x00020004, 0x00020020, 0x00020024, 0x00020100, 0x00020104, 0x00020120, 0x00020124, 0x00020800,
 85 | 	0x00020804, 0x00020820, 0x00020824, 0x00020900, 0x00020904, 0x00020920, 0x00020924, 0x00024000,
 86 | 	0x00024004, 0x00024020, 0x00024024, 0x00024100, 0x00024104, 0x00024120, 0x00024124, 0x00024800,
 87 | 	0x00024804, 0x00024820, 0x00024824, 0x00024900, 0x00024904, 0x00024920, 0x00024924, 0x00100000,
 88 | 	0x00100004, 0x00100020, 0x00100024, 0x00100100, 0x00100104, 0x00100120, 0x00100124, 0x00100800,
 89 | 	0x00100804, 0x00100820, 0x00100824, 0x00100900, 0x00100904, 0x00100920, 0x00100924, 0x00104000,
 90 | 	0x00104004, 0x00104020, 0x00104024, 0x00104100, 0x00104104, 0x00104120, 0x00104124, 0x00104800,
 91 | 	0x00104804, 0x00104820, 0x00104824, 0x00104900, 0x00104904, 0x00104920, 0x00104924, 0x00120000,
 92 | 	0x00120004, 0x00120020, 0x00120024, 0x00120100, 0x00120104, 0x00120120, 0x00120124, 0x00120800,
 93 | 	0x00120804, 0x00120820, 0x00120824, 0x00120900, 0x00120904, 0x00120920, 0x00120924, 0x00124000,
 94 | 	0x00124004, 0x00124020, 0x00124024, 0x00124100, 0x00124104, 0x00124120, 0x00124124, 0x00124800,
 95 | 	0x00124804, 0x00124820, 0x00124824, 0x00124900, 0x00124904, 0x00124920, 0x00124924, 0x00800000,
 96 | 	0x00800004, 0x00800020, 0x00800024, 0x00800100, 0x00800104, 0x00800120, 0x00800124, 0x00800800,
 97 | 	0x00800804, 0x00800820, 0x00800824, 0x00800900, 0x00800904, 0x00800920, 0x00800924, 0x00804000,
 98 | 	0x00804004, 0x00804020, 0x00804024, 0x00804100, 0x00804104, 0x00804120, 0x00804124, 0x00804800,
 99 | 	0x00804804, 0x00804820, 0x00804824, 0x00804900, 0x00804904, 0x00804920, 0x00804924, 0x00820000,
100 | 	0x00820004, 0x00820020, 0x00820024, 0x00820100, 0x00820104, 0x00820120, 0x00820124, 0x00820800,
101 | 	0x00820804, 0x00820820, 0x00820824, 0x00820900, 0x00820904, 0x00820920, 0x00820924, 0x00824000,
102 | 	0x00824004, 0x00824020, 0x00824024, 0x00824100, 0x00824104, 0x00824120, 0x00824124, 0x00824800,
103 | 	0x00824804, 0x00824820, 0x00824824, 0x00824900, 0x00824904, 0x00824920, 0x00824924, 0x00900000,
104 | 	0x00900004, 0x00900020, 0x00900024, 0x00900100, 0x00900104, 0x00900120, 0x00900124, 0x00900800,
105 | 	0x00900804, 0x00900820, 0x00900824, 0x00900900, 0x00900904, 0x00900920, 0x00900924, 0x00904000,
106 | 	0x00904004, 0x00904020, 0x00904024, 0x00904100, 0x00904104, 0x00904120, 0x00904124, 0x00904800,
107 | 	0x00904804, 0x00904820, 0x00904824, 0x00904900, 0x00904904, 0x00904920, 0x00904924, 0x00920000,
108 | 	0x00920004, 0x00920020, 0x00920024, 0x00920100, 0x00920104, 0x00920120, 0x00920124, 0x00920800,
109 | 	0x00920804, 0x00920820, 0x00920824, 0x00920900, 0x00920904, 0x00920920, 0x00920924, 0x00924000,
110 | 	0x00924004, 0x00924020, 0x00924024, 0x00924100, 0x00924104, 0x00924120, 0x00924124, 0x00924800,
111 | 	0x00924804, 0x00924820, 0x00924824, 0x00924900, 0x00924904, 0x00924920, 0x00924924
112 | };


--------------------------------------------------------------------------------
/src/timer.h:
--------------------------------------------------------------------------------
 1 | // Portable high-precision timer
 2 | // Using QueryPerformanceCounter for Win32/Win64
 3 | // And POSIX get_clock() for other platforms
 4 | 
 5 | #pragma once
 6 | 
 7 | #if _MSC_VER
 8 | #include <Windows.h>
 9 | #elif __GNUC__
10 | #include "time.h"
11 | #endif
12 | 
13 | using namespace std;
14 | 
15 | #if _MSC_VER
16 | struct Timer { // High performance Win64 timer using QPC events
17 | 	double pc_frequency = 0.0;
18 | 	double elapsed_time_milliseconds = 0.0;
19 | 	LARGE_INTEGER start_time = { 0 };
20 | 	LARGE_INTEGER end_time = { 0 };
21 | 
22 | 	inline Timer() {
23 | 		LARGE_INTEGER li;
24 | 		QueryPerformanceFrequency(&li);
25 | 		pc_frequency = static_cast<double>(li.QuadPart) / 1000.0;
26 | 	}
27 | 
28 | 	inline void reset() {
29 | 		elapsed_time_milliseconds = 0.0;
30 | 	}
31 | 
32 | 	inline void start() {
33 | 		QueryPerformanceCounter(&start_time);
34 | 	}
35 | 
36 | 	inline void stop() {
37 | 		QueryPerformanceCounter(&end_time);
38 | 		elapsed_time_milliseconds += static_cast<double>((end_time.QuadPart - start_time.QuadPart) / pc_frequency);
39 | 	}
40 | };
41 | #else
42 | 
43 | #define MILLION 1000000.0f
44 | 
45 | struct Timer { // High performance timer using standard c++11 chrono
46 | 	double elapsed_time_milliseconds = 0;
47 | 	timespec t1;
48 | 	timespec t2;
49 | 
50 | 	inline Timer() {
51 | 	}
52 | 
53 | 	inline void start() {
54 | 		clock_gettime(CLOCK_REALTIME, &t1);
55 | 	}
56 | 
57 | 	inline void stop() {
58 | 		clock_gettime(CLOCK_REALTIME, &t2);
59 | 		elapsed_time_milliseconds += (t2.tv_sec - t1.tv_sec) * 1000.0f;
60 | 		elapsed_time_milliseconds += ((float)(t2.tv_nsec - t1.tv_nsec)) / MILLION;
61 | 	}
62 | };
63 | #endif
64 | 


--------------------------------------------------------------------------------
/src/todo.txt:
--------------------------------------------------------------------------------
 1 | Readme.md
 2 | - Performance (using schwarz algorithm, against binvox)
 3 | - Switch to cuda libs for helper
 4 | 
 5 | - estimate block/grid size
 6 | - optimize, output to magicavoxel? Png? interact with polyvox?
 7 | - is magicbits faster (less memory lookup)
 8 | 
 9 | ### VOXELISATION
10 | - Cleanup writeout to binvox, use original file name, append gridsize (see ooc_svo_builder)
11 | 
12 | OPTIMIZATIONS
13 | - model bbox can be computed in GPU pass
14 | - buffered bitset (no speedup)
15 | 
16 | VOXEL TABLE
17 | - Variable size queue voor gevonden voxels? (hoe werkt dat in CUDA?) (zie stack overflow question)
18 | - Implementeren als hashmap+ telkens memory bij alloceren bij element toevoegen aan lijst, spinlock
19 | 
20 | 


--------------------------------------------------------------------------------
/src/util.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | // This file contains various utility functions that are used throughout the program and didn't really belong in their own header
  3 | 
  4 | #include <stdint.h>
  5 | #include "TriMesh.h"
  6 | #include "cuda.h"
  7 | #include "cuda_runtime.h"
  8 | #include <string>
  9 | #include <fstream>
 10 | 
 11 | template<typename trimeshtype>
 12 | inline float3 trimesh_to_float3(const trimeshtype a) {
 13 | 	return make_float3(a.x, a.y, a.z);
 14 | }
 15 | template<typename trimeshtype>
 16 | inline trimeshtype float3_to_trimesh(const float3 a) {
 17 | 	return trimeshtype(a.x, a.y, a.z);
 18 | }
 19 | 
 20 | __host__ __device__ inline int3 float3_to_int3(const float3 a) {
 21 | 	return make_int3(static_cast<int>(a.x), static_cast<int>(a.y), static_cast<int>(a.z));
 22 | }
 23 | 
 24 | // Check if a voxel in the voxel table is set
 25 | __host__ __device__ inline bool checkVoxel(size_t x, size_t y, size_t z, const uint3 gridsize, const unsigned int* vtable){
 26 | 	size_t location = x + (y*gridsize.x) + (z*gridsize.x*gridsize.y);
 27 | 	size_t int_location = location / size_t(32);
 28 | 	/*size_t max_index = (gridsize*gridsize*gridsize) / __int64(32);
 29 | 	if (int_location >= max_index){
 30 | 	fprintf(stdout, "Requested index too big: %llu \n", int_location);
 31 | 	fprintf(stdout, "X %llu Y %llu Z %llu \n", int_location);
 32 | 	}*/
 33 | 	unsigned int bit_pos = size_t(31) - (location % size_t(32)); // we count bit positions RtL, but array indices LtR
 34 | 	if ((vtable[int_location]) & (1 << bit_pos)){
 35 | 		return true;
 36 | 	}
 37 | 	return false;
 38 | }
 39 | 
 40 | // An Axis Aligned Box (AAB) of a certain type - to be initialized with a min and max
 41 | template <typename T>
 42 | struct AABox {
 43 | 	T min;
 44 | 	T max;
 45 | 	__device__ __host__ AABox() : min(T()), max(T()) {}
 46 | 	__device__ __host__ AABox(T min, T max) : min(min), max(max) {}
 47 | };
 48 | 
 49 | // Voxelisation info (global parameters for the voxelization process)
 50 | struct voxinfo {
 51 | 	AABox<float3> bbox;
 52 | 	uint3 gridsize;
 53 | 	size_t n_triangles;
 54 | 	float3 unit;
 55 | 
 56 | 	voxinfo(const AABox<float3> bbox, const uint3 gridsize, const size_t n_triangles)
 57 | 		: gridsize(gridsize), bbox(bbox), n_triangles(n_triangles) {
 58 | 		unit.x = (bbox.max.x - bbox.min.x) / float(gridsize.x);
 59 | 		unit.y = (bbox.max.y - bbox.min.y) / float(gridsize.y);
 60 | 		unit.z = (bbox.max.z - bbox.min.z) / float(gridsize.z);
 61 | 	}
 62 | 
 63 | 	void print() {
 64 | 		fprintf(stdout, "[Voxelization] Bounding Box: (%f,%f,%f)-(%f,%f,%f) \n", bbox.min.x, bbox.min.y, bbox.min.z, bbox.max.x, bbox.max.y, bbox.max.z);
 65 | 		fprintf(stdout, "[Voxelization] Grid size: %i %i %i \n", gridsize.x, gridsize.y, gridsize.z);
 66 | 		fprintf(stdout, "[Voxelization] Triangles: %zu \n", n_triangles);
 67 | 		fprintf(stdout, "[Voxelization] Unit length: x: %f y: %f z: %f\n", unit.x, unit.y, unit.z);
 68 | 	}
 69 | };
 70 | 
 71 | // Create mesh BBOX _cube_, using the maximum length between bbox min and bbox max
 72 | // We want to end up with a cube that is this max length.
 73 | // So we pad the directions in which this length is not reached
 74 | //
 75 | // Example: (1,2,3) to (4,4,4) becomes:
 76 | // Max distance is 3
 77 | //
 78 | // (1, 1.5, 2) to (4,4.5,5), which is a cube with side 3
 79 | //
 80 | template <typename T>
 81 | inline AABox<T> createMeshBBCube(AABox<T> box) {
 82 | 	AABox<T> answer(box.min, box.max); // initialize answer
 83 | 	float3 lengths = box.max - box.min; // check length of given bbox in every direction
 84 | 	float max_length = std::max(lengths.x, std::max(lengths.y, lengths.z)); // find max length
 85 | 
 86 | 	if (max_length != lengths.x) {
 87 | 		float delta = max_length - lengths.x; // compute difference between largest length and current (X,Y or Z) length
 88 | 		answer.min.x = box.min.x - (delta / 2.0f); // pad with half the difference before current min
 89 | 		answer.max.x = box.max.x + (delta / 2.0f); // pad with half the difference behind current max
 90 | 	}
 91 | 	if (max_length != lengths.y) {
 92 | 		float delta = max_length - lengths.y; // compute difference between largest length and current (X,Y or Z) length
 93 | 		answer.min.y = box.min.y - (delta / 2.0f); // pad with half the difference before current min
 94 | 		answer.max.y = box.max.y + (delta / 2.0f); // pad with half the difference behind current max
 95 | 	}
 96 | 	if (max_length != lengths.z) {
 97 | 		float delta = max_length - lengths.z; // compute difference between largest length and current (X,Y or Z) length
 98 | 		answer.min.z = box.min.z - (delta / 2.0f); // pad with half the difference before current min
 99 | 		answer.max.z = box.max.z + (delta / 2.0f); // pad with half the difference behind current max
100 | 	}
101 | 
102 | 	// Next snippet adresses the problem reported here: https://github.com/Forceflow/cuda_voxelizer/issues/7
103 | 	// Suspected cause: If a triangle is axis-aligned and lies perfectly on a voxel edge, it sometimes gets counted / not counted
104 | 	// Probably due to a numerical instability (division by zero?)
105 | 	// Ugly fix: we pad the bounding box on all sides by 1/10001th of its total length, bringing all triangles ever so slightly off-grid
106 | 	float3 epsilon = (answer.max - answer.min) / 10001.0f;
107 | 	answer.min -= epsilon;
108 | 	answer.max += epsilon;
109 | 	return answer;
110 | }
111 | 
112 | // Helper method to print bits
113 | void inline printBits(size_t const size, void const * const ptr) {
114 | 	unsigned char *b = (unsigned char*)ptr;
115 | 	unsigned char byte;
116 | 	int i, j;
117 | 	for (i = static_cast<int>(size) - 1; i >= 0; i--) {
118 | 		for (j = 7; j >= 0; j--) {
119 | 			byte = b[i] & (1 << j);
120 | 			byte >>= j;
121 | 			if (byte) {
122 | 				printf("X");
123 | 			}
124 | 			else {
125 | 				printf(".");
126 | 			}
127 | 			//printf("%u", byte);
128 | 		}
129 | 	}
130 | 	puts("");
131 | }
132 | 
133 | // readablesizestrings
134 | inline std::string readableSize(size_t bytes) {
135 | 	double bytes_d = static_cast<double>(bytes);
136 | 	std::string r;
137 | 	if (bytes_d <= 0) r = "0 Bytes";
138 | 	else if (bytes_d >= 1099511627776.0) r = std::to_string(static_cast<size_t>(bytes_d / 1099511627776.0)) + " TB";
139 | 	else if (bytes_d >= 1073741824.0) r = std::to_string(static_cast<size_t>(bytes_d / 1073741824.0)) + " GB";
140 | 	else if (bytes_d >= 1048576.0) r = std::to_string(static_cast<size_t>(bytes_d / 1048576.0)) + " MB";
141 | 	else if (bytes_d >= 1024.0) r = std::to_string(static_cast<size_t>(bytes_d / 1024.0)) + " KB";
142 | 	else r = std::to_string(static_cast<size_t>(bytes_d)) + " bytes";
143 | 	return r;
144 | };
145 | 
146 | // check if file exists
147 | inline bool file_exists(const std::string& name) {
148 | 	std::ifstream f(name.c_str());
149 | 	bool exists = f.good();
150 | 	f.close();
151 | 	return exists;
152 | }
153 | 


--------------------------------------------------------------------------------
/src/util_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include "util_cuda.h"
 2 | 
 3 | // Check if CUDA requirements are met
 4 | bool initCuda(){
 5 | 
 6 | 	int device_count = 0;
 7 | 	// Check if CUDA runtime calls work at all
 8 | 	cudaError t = cudaGetDeviceCount(&device_count);
 9 | 	if (t != cudaSuccess) {
10 | 		fprintf(stderr, "[CUDA] First call to CUDA Runtime API failed. Are the drivers installed? \n");
11 | 		return false;
12 | 	}
13 | 
14 | 	// Is there a CUDA device at all?
15 | 	checkCudaErrors(cudaGetDeviceCount(&device_count));
16 | 	if(device_count < 1){
17 | 		fprintf(stderr, "[CUDA] No CUDA devices found. Make sure CUDA device is powered, connected and available. \n \n");
18 | 		fprintf(stderr, "[CUDA] On laptops: disable powersave/battery mode. \n");
19 | 		fprintf(stderr, "[CUDA] Exiting... \n");
20 | 		return false;
21 | 	}
22 | 
23 | 	fprintf(stderr, "[CUDA] CUDA device(s) found, picking best one \n");
24 | 	fprintf(stdout, "[CUDA] ");
25 | 	// We have at least 1 CUDA device, so now select the fastest (method from Nvidia helper library)
26 | 	int device = findCudaDevice(0, 0);
27 | 
28 | 	// Print available device memory
29 | 	cudaDeviceProp properties;
30 | 	checkCudaErrors(cudaGetDeviceProperties(&properties,device));
31 | 	fprintf(stdout, "[CUDA] Best device: %s \n", properties.name);
32 | 	size_t free, total;
33 | 	checkCudaErrors(cudaMemGetInfo(&free, &total));
34 | 	fprintf(stdout,"[CUDA] Available device memory: %llu of %llu MB \n", (free >> 20), (total >> 20));
35 | 
36 | 	// Check compute capability
37 | 	if (properties.major < 2){
38 | 		fprintf(stderr, "[CUDA] Your cuda device has compute capability %i.%i. We need at least 2.0 for atomic operations. \n", properties.major, properties.minor);
39 | 		return false;
40 | 	}
41 | 	return true;
42 | }


--------------------------------------------------------------------------------
/src/util_cuda.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Standard libs
 4 | #include <stdio.h>
 5 | #include <cstdlib>
 6 | // Cuda
 7 | #include "cuda_runtime.h"
 8 | #include "libs/cuda/helper_cuda.h"
 9 | #include "libs/cuda/helper_math.h"
10 | 
11 | // Function to check cuda requirements
12 | bool initCuda();


--------------------------------------------------------------------------------
/src/util_io.cpp:
--------------------------------------------------------------------------------
  1 | #include "util_io.h"
  2 | 
  3 | using namespace std;
  4 | 
  5 | // helper function to get file length (in number of ASCII characters)
  6 | size_t get_file_length(const std::string base_filename){
  7 | 	// open file at the end
  8 | 	std::ifstream input(base_filename.c_str(), ios_base::ate | ios_base::binary);
  9 | 	assert(input);
 10 | 	size_t length = input.tellg();
 11 | 	input.close();
 12 | 	return length; // get file length
 13 | }
 14 | 
 15 | // read raw bytes from file
 16 | void read_binary(void* data, const size_t length, const std::string base_filename){
 17 | 	// open file
 18 | 	std::ifstream input(base_filename.c_str(), ios_base::in | ios_base::binary);
 19 | 	assert(input);
 20 | #ifndef SILENT
 21 | 	fprintf(stdout, "[I/O] Reading %llu kb of binary data from file %s \n", size_t(length / 1024.0f), base_filename.c_str()); fflush(stdout);
 22 | #endif
 23 | 	input.seekg(0, input.beg);
 24 | 	input.read((char*) data, 8);
 25 | 	input.close();
 26 | 	return;
 27 | }
 28 | 
 29 | // Helper function to write single vertex normal to OBJ file
 30 | static void write_vertex_normal(ofstream& output, const int3& v) {
 31 | 	output << "vn " << v.x << " " << v.y << " " << v.z << endl;
 32 | }
 33 | 
 34 | // Helper function to write single vertex to OBJ file
 35 | static void write_vertex(ofstream& output, const int3& v) {
 36 | 	output << "v " << v.x << " " << v.y << " " << v.z << endl;
 37 | }
 38 | 
 39 | // Helper function to write single vertex
 40 | static void write_face(ofstream& output, const int3& v) {
 41 | 	output << "f " << v.x << " " << v.y << " " << v.z << endl;
 42 | }
 43 | 
 44 | // Helper function to write full cube (using relative vertex positions in the OBJ file - support for this should be widespread by now)
 45 | void write_cube(const int x, const int y, const int z, ofstream& output) {
 46 | 	//	   2-------1
 47 | 	//	  /|      /|
 48 | 	//	 / |     / |
 49 | 	//	7--|----8  |
 50 | 	//	|  4----|--3
 51 | 	//	| /     | /
 52 | 	//	5-------6
 53 |     // Create vertices
 54 | 	int3 v1 = make_int3(x+1, y+1, z + 1);
 55 | 	int3 v2 = make_int3(x, y+1, z + 1);
 56 | 	int3 v3 = make_int3(x+1, y, z + 1);
 57 | 	int3 v4 = make_int3(x, y, z + 1);
 58 | 	int3 v5 = make_int3(x, y, z);
 59 | 	int3 v6 = make_int3(x+1, y, z);
 60 | 	int3 v7 = make_int3(x, y+1, z);
 61 | 	int3 v8 = make_int3(x+1, y+1, z);
 62 | 	// write them in reverse order, so relative position is -i for v_i
 63 | 	write_vertex(output, v8);
 64 | 	write_vertex(output, v7);
 65 | 	write_vertex(output, v6);
 66 | 	write_vertex(output, v5);
 67 | 	write_vertex(output, v4);
 68 | 	write_vertex(output, v3);
 69 | 	write_vertex(output, v2);
 70 | 	write_vertex(output, v1);
 71 | 	// create faces
 72 | 	// back
 73 | 	write_face(output, make_int3(-1, -3, -4));
 74 | 	write_face(output, make_int3(-1, -4, -2));
 75 | 	// bottom
 76 | 	write_face(output, make_int3(-4, -3, -6));
 77 | 	write_face(output, make_int3(-4, -6, -5));
 78 | 	// right
 79 | 	write_face(output, make_int3(-3, -1, -8));
 80 | 	write_face(output, make_int3(-3, -8, -6));
 81 | 	// top
 82 | 	write_face(output, make_int3(-1, -2, -7));
 83 | 	write_face(output, make_int3(-1, -7, -8));
 84 | 	// left
 85 | 	write_face(output, make_int3(-2, -4, -5));
 86 | 	write_face(output, make_int3(-2, -5, -7));
 87 | 	// front
 88 | 	write_face(output, make_int3(-5, -6, -8));
 89 | 	write_face(output, make_int3(-5, -8, -7));
 90 | }
 91 | 
 92 | void write_obj_cubes(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename) {
 93 | 	string filename_output = base_filename + string("_") + to_string(v_info.gridsize.x) + string("_voxels.obj");
 94 | 	ofstream output(filename_output.c_str(), ios::out);
 95 | 
 96 | #ifndef SILENT
 97 | 	fprintf(stdout, "[I/O] Writing data in obj voxels format to file %s \n", filename_output.c_str());
 98 | 	// Write stats
 99 | 	size_t voxels_seen = 0;
100 | 	const size_t write_stats_25 = (size_t(v_info.gridsize.x) * size_t(v_info.gridsize.y) * size_t(v_info.gridsize.z)) / 4.0f;
101 | 	fprintf(stdout, "[I/O] Writing to file: 0%%...");
102 | #endif
103 | 	
104 | 
105 | 	// Write vertex normals once
106 | 	//write_vertex_normal(output, glm::ivec3(0, 0, -1)); // forward = 1
107 | 	//write_vertex_normal(output, glm::ivec3(0, 0, 1)); // backward = 2
108 | 	//write_vertex_normal(output, glm::ivec3(-1, 0, 0)); // left = 3
109 | 	//write_vertex_normal(output, glm::ivec3(1, 0, 0)); // right = 4
110 | 	//write_vertex_normal(output, glm::ivec3(0, -1, 0)); // bottom = 5
111 | 	//write_vertex_normal(output, glm::ivec3(0, 1, 0)); // top = 6
112 | 	//size_t voxels_written = 0;
113 | 
114 | 	assert(output);
115 | 	for (size_t x = 0; x < v_info.gridsize.x; x++) {
116 | 		for (size_t y = 0; y < v_info.gridsize.y; y++) {
117 | 			for (size_t z = 0; z < v_info.gridsize.z; z++) {
118 | #ifndef SILENT
119 | 				voxels_seen++;
120 | 				if (voxels_seen == write_stats_25) {fprintf(stdout, "25%%...");}
121 | 				else if (voxels_seen == write_stats_25 * size_t(2)) {fprintf(stdout, "50%%...");}
122 | 				else if (voxels_seen == write_stats_25 * size_t(3)) {fprintf(stdout, "75%%...");}
123 | #endif
124 | 				if (checkVoxel(x, y, z, v_info.gridsize, vtable)) {
125 | 					//voxels_written += 1;
126 | 					write_cube(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), output);
127 | 				}
128 | 			}
129 | 		}
130 | 	}
131 | #ifndef SILENT
132 | 	fprintf(stdout, "100%% \n");
133 | #endif
134 | 	// std::cout << "written " << voxels_written << std::endl;
135 | 
136 | #ifndef SILENT
137 | 	fprintf(stdout, "[I/O] Reordering / Optimizing mesh with Trimesh2 \n");
138 | #endif
139 | 	// Load the file using TriMesh2
140 | 	trimesh::TriMesh* temp_mesh = trimesh::TriMesh::read(filename_output.c_str());	
141 | 	trimesh::reorder_verts(temp_mesh);
142 | 	//trimesh::faceflip(temp_mesh);
143 | 	//trimesh::edgeflip(temp_mesh);
144 | 	//temp_mesh->clear_normals();
145 | 	//temp_mesh->need_normals();
146 | #ifndef SILENT
147 | 	fprintf(stdout, "[I/O] Writing final mesh to file %s \n", filename_output.c_str());
148 | #endif
149 | 	temp_mesh->write(filename_output.c_str());
150 | 
151 | 	output.close();
152 | }
153 | 
154 | void write_obj_pointcloud(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename) {
155 | 	string filename_output = base_filename + string("_") + to_string(v_info.gridsize.x) + string("_pointcloud.obj");
156 | 	ofstream output(filename_output.c_str(), ios::out);
157 | 
158 | #ifndef SILENT
159 | 	fprintf(stdout, "[I/O] Writing data in obj point cloud format to %s \n", filename_output.c_str());
160 | 	size_t voxels_seen = 0;
161 | 	const size_t write_stats_25 = (size_t(v_info.gridsize.x) * size_t(v_info.gridsize.y) * size_t(v_info.gridsize.z)) / 4.0f;
162 | 	fprintf(stdout, "[I/O] Writing to file: 0%%...");
163 | #endif
164 | 
165 | 	// write stats
166 | 	size_t voxels_written = 0;
167 | 
168 | 	assert(output);
169 | 	for (size_t x = 0; x < v_info.gridsize.x; x++) {
170 | 		for (size_t y = 0; y < v_info.gridsize.y; y++) {
171 | 			for (size_t z = 0; z < v_info.gridsize.z; z++) {
172 | #ifndef SILENT
173 | 				voxels_seen++;
174 | 				if (voxels_seen == write_stats_25) { fprintf(stdout, "25%%...");}
175 | 				else if (voxels_seen == write_stats_25 * size_t(2)) { fprintf(stdout, "50%%...");}
176 | 				else if (voxels_seen == write_stats_25 * size_t(3)) {fprintf(stdout, "75%%...");}
177 | #endif
178 | 				if (checkVoxel(x, y, z, v_info.gridsize, vtable)) {
179 | 					voxels_written += 1;
180 | 					output << "v " << (x+0.5) << " " << (y + 0.5) << " " << (z + 0.5) << endl; // +0.5 to put vertex in the middle of the voxel
181 | 				}
182 | 			}
183 | 		}
184 | 	}
185 | #ifndef SILENT
186 | 	fprintf(stdout, "100%% \n");
187 | #endif
188 | 	// std::cout << "written " << voxels_written << std::endl;
189 | 	output.close();
190 | }
191 | 
192 | void write_binary(void* data, size_t bytes, const std::string base_filename){
193 | 	string filename_output = base_filename + string(".bin");
194 | #ifndef SILENT
195 | 	fprintf(stdout, "[I/O] Writing data in binary format to %s (%s) \n", filename_output.c_str(), readableSize(bytes).c_str());
196 | #endif
197 | 	ofstream output(filename_output.c_str(), ios_base::out | ios_base::binary);
198 | 	output.write((char*)data, bytes);
199 | 	output.close();
200 | }
201 | 
202 | void write_binvox(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename){
203 | 	// Open file
204 | 	string filename_output = base_filename + string("_") + to_string(v_info.gridsize.x) + string(".binvox");
205 | #ifndef SILENT
206 | 	fprintf(stdout, "[I/O] Writing data in binvox format to %s \n", filename_output.c_str());
207 | #endif
208 | 	ofstream output(filename_output.c_str(), ios::out | ios::binary);
209 | 	assert(output);
210 | 	// Write ASCII header
211 | 	output << "#binvox 1" << endl;
212 | 	output << "dim " << v_info.gridsize.x << " " << v_info.gridsize.y << " " << v_info.gridsize.z << "" << endl;
213 | 	output << "translate " << v_info.bbox.min.x << " " << v_info.bbox.min.y << " " << v_info.bbox.min.z << endl;
214 | 	output << "scale " << max(max(v_info.bbox.max.x - v_info.bbox.min.x, v_info.bbox.max.y - v_info.bbox.min.y), 
215 | 		v_info.bbox.max.z - v_info.bbox.min.z) << endl;
216 | 	output << "data" << endl;
217 | 
218 | 	// Write BINARY Data (and compress it a bit using run-length encoding)
219 | 	char currentvalue, current_seen;
220 | 	for (size_t x = 0; x < v_info.gridsize.x; x++){
221 | 		for (size_t z = 0; z < v_info.gridsize.z; z++){
222 | 			for (size_t y = 0; y < v_info.gridsize.y; y++){
223 | 				if (x == 0 && y == 0 && z == 0){ // special case: first voxel
224 | 					currentvalue = checkVoxel(0, 0, 0, v_info.gridsize, vtable);
225 | 					output.write((char*)&currentvalue, 1);
226 | 					current_seen = 1;
227 | 					continue;
228 | 				}
229 | 				char nextvalue = checkVoxel(x, y, z, v_info.gridsize, vtable);
230 | 				if (nextvalue != currentvalue || current_seen == (char) 255){
231 | 					output.write((char*)&current_seen, 1);
232 | 					current_seen = 1;
233 | 					currentvalue = nextvalue;
234 | 					output.write((char*)&currentvalue, 1);
235 | 				}
236 | 				else {
237 | 					current_seen++;
238 | 				}
239 | 			}
240 | 		}
241 | 	}
242 | 
243 | 	// Write rest
244 | 	output.write((char*)&current_seen, 1);
245 | 	output.close();
246 | }
247 | 
248 | // Experimental MagicaVoxel file format output
249 | void write_vox(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename) {
250 | 	string filename_output = base_filename + string("_") + to_string(v_info.gridsize.x) + string(".vox");
251 | 	vox::VoxWriter voxwriter;
252 | 	voxwriter.AddColor(255, 255, 255,0, 0);
253 | 
254 | #ifndef SILENT
255 | 	fprintf(stdout, "[I/O] Writing data in vox format to %s \n", filename_output.c_str());
256 | 
257 | 	// Write stats
258 | 	size_t voxels_seen = 0;
259 | 	const size_t write_stats_25 = (size_t(v_info.gridsize.x) * size_t(v_info.gridsize.y) * size_t(v_info.gridsize.z)) / 4.0f;
260 | 	fprintf(stdout, "[I/O] Writing to file: 0%%...");
261 | 	size_t voxels_written = 0;
262 | #endif
263 | 
264 | 	for (size_t x = 0; x < v_info.gridsize.x; x++) {
265 | 		for (size_t y = 0; y < v_info.gridsize.z; y++) {
266 | 			for (size_t z = 0; z < v_info.gridsize.y; z++) {
267 | #ifndef SILENT
268 | 				// Progress stats
269 | 				voxels_seen++;
270 | 				if (voxels_seen == write_stats_25) { fprintf(stdout, "25%%..."); }
271 | 				else if (voxels_seen == write_stats_25 * size_t(2)) { fprintf(stdout, "50%%..."); }
272 | 				else if (voxels_seen == write_stats_25 * size_t(3)) { fprintf(stdout, "75%%..."); }
273 | #endif
274 | 				if (checkVoxel(x, y, z, v_info.gridsize, vtable)) {
275 | 					// Somehow, this makes the vox model come out correct way up. Some axes probably got switched along the way
276 | 					voxwriter.AddVoxel(x, -z + v_info.gridsize.z, y, 1);
277 | 				}
278 | 			}
279 | 		}
280 | 	}
281 | #ifndef SILENT
282 | 	fprintf(stdout, "100%% \n");
283 | #endif
284 | 	voxwriter.SaveToFile(filename_output);
285 | }


--------------------------------------------------------------------------------
/src/util_io.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <string>
 3 | #include <iostream>
 4 | #include <fstream>
 5 | #include <assert.h>
 6 | #include "util.h"
 7 | #include "TriMesh_algo.h"
 8 | #include "util.h"
 9 | #include "libs/magicavoxel_file_writer/VoxWriter.h"
10 | 
11 | size_t get_file_length(const std::string base_filename);
12 | void read_binary(void* data, const size_t length, const std::string base_filename);
13 | void write_binary(void* data, const size_t bytes, const std::string base_filename);
14 | void write_binvox(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename);
15 | void write_obj_pointcloud(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename);
16 | void write_obj_cubes(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename);
17 | void write_vox(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename);
18 | 


--------------------------------------------------------------------------------
/src/voxelize.cu:
--------------------------------------------------------------------------------
  1 | #include "voxelize.cuh"
  2 | 
  3 | // CUDA Global Memory variables
  4 | 
  5 | // Debug counters for some sanity checks
  6 | #ifdef _DEBUG
  7 | __device__ size_t debug_d_n_voxels_marked = 0;
  8 | __device__ size_t debug_d_n_triangles = 0;
  9 | __device__ size_t debug_d_n_voxels_tested = 0;
 10 | #endif
 11 | 
 12 | // Possible optimization: buffer bitsets (for now: Disabled because too much overhead)
 13 | //struct bufferedBitSetter{
 14 | //	unsigned int* voxel_table;
 15 | //	size_t current_int_location;
 16 | //	unsigned int current_mask;
 17 | //
 18 | //	__device__ __inline__ bufferedBitSetter(unsigned int* voxel_table, size_t index) :
 19 | //		voxel_table(voxel_table), current_mask(0) {
 20 | //		current_int_location = int(index / 32.0f);
 21 | //	}
 22 | //
 23 | //	__device__ __inline__ void setBit(size_t index){
 24 | //		size_t new_int_location = int(index / 32.0f);
 25 | //		if (current_int_location != new_int_location){
 26 | //			flush();
 27 | //			current_int_location = new_int_location;
 28 | //		}
 29 | //		unsigned int bit_pos = 31 - (unsigned int)(int(index) % 32);
 30 | //		current_mask = current_mask | (1 << bit_pos);
 31 | //	}
 32 | //
 33 | //	__device__ __inline__ void flush(){
 34 | //		if (current_mask != 0){
 35 | //			atomicOr(&(voxel_table[current_int_location]), current_mask);
 36 | //		}
 37 | //	}
 38 | //};
 39 | 
 40 | // Possible optimization: check bit before you set it - don't need to do atomic operation if it's already set to 1
 41 | // For now: overhead, so it seems
 42 | //__device__ __inline__ bool checkBit(unsigned int* voxel_table, size_t index){
 43 | //	size_t int_location = index / size_t(32);
 44 | //	unsigned int bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR
 45 | //	return ((voxel_table[int_location]) & (1 << bit_pos));
 46 | //}
 47 | 
 48 | // Set a bit in the giant voxel table. This involves doing an atomic operation on a 32-bit word in memory.
 49 | // Blocking other threads writing to it for a very short time
 50 | __device__ __inline__ void setBit(unsigned int* voxel_table, size_t index){
 51 | 	size_t int_location = index / size_t(32);
 52 | 	unsigned int bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR
 53 | 	unsigned int mask = 1 << bit_pos;
 54 | 	atomicOr(&(voxel_table[int_location]), mask);
 55 | }
 56 | 
 57 | // Main triangle voxelization method
 58 | __global__ void voxelize_triangle(voxinfo info, float* triangle_data, unsigned int* voxel_table, bool morton_order){
 59 | 	size_t thread_id = threadIdx.x + blockIdx.x * blockDim.x;
 60 | 	size_t stride = blockDim.x * gridDim.x;
 61 | 
 62 | 	// Common variables used in the voxelization process
 63 | 	float3 delta_p = make_float3(info.unit.x, info.unit.y, info.unit.z);
 64 | 	int3 grid_max = make_int3(info.gridsize.x - 1, info.gridsize.y - 1, info.gridsize.z - 1); // grid max (grid runs from 0 to gridsize-1)
 65 | 
 66 | 	while (thread_id < info.n_triangles){ // every thread works on specific triangles in its stride
 67 | 		size_t t = thread_id * 9; // triangle contains 9 vertices
 68 | 
 69 | 		// COMPUTE COMMON TRIANGLE PROPERTIES
 70 | 		// Move vertices to origin using bbox
 71 | 		float3 v0 = make_float3(triangle_data[t], triangle_data[t + 1], triangle_data[t + 2]) - info.bbox.min;
 72 | 		float3 v1 = make_float3(triangle_data[t + 3], triangle_data[t + 4], triangle_data[t + 5]) - info.bbox.min;
 73 | 		float3 v2 = make_float3(triangle_data[t + 6], triangle_data[t + 7], triangle_data[t + 8]) - info.bbox.min;
 74 | 		// Edge vectors
 75 | 		float3 e0 = v1 - v0;
 76 | 		float3 e1 = v2 - v1;
 77 | 		float3 e2 = v0 - v2;
 78 | 		// Normal vector pointing up from the triangle
 79 | 		float3 n = normalize(cross(e0, e1));
 80 | 
 81 | 		// COMPUTE TRIANGLE BBOX IN GRID
 82 | 		// Triangle bounding box in world coordinates is min(v0,v1,v2) and max(v0,v1,v2)
 83 | 		AABox<float3> t_bbox_world(fminf(v0, fminf(v1, v2)), fmaxf(v0, fmaxf(v1, v2)));
 84 | 		// Triangle bounding box in voxel grid coordinates is the world bounding box divided by the grid unit vector
 85 | 		AABox<int3> t_bbox_grid;
 86 | 		t_bbox_grid.min = clamp(float3_to_int3(t_bbox_world.min / info.unit), make_int3(0, 0, 0), grid_max);
 87 | 		t_bbox_grid.max = clamp(float3_to_int3(t_bbox_world.max / info.unit), make_int3(0, 0, 0), grid_max);
 88 | 
 89 | 		// PREPARE PLANE TEST PROPERTIES
 90 | 		float3 c = make_float3(0.0f, 0.0f, 0.0f);
 91 | 		if (n.x > 0.0f) { c.x = info.unit.x; }
 92 | 		if (n.y > 0.0f) { c.y = info.unit.y; }
 93 | 		if (n.z > 0.0f) { c.z = info.unit.z; }
 94 | 		float d1 = dot(n, (c - v0));
 95 | 		float d2 = dot(n, ((delta_p - c) - v0));
 96 | 
 97 | 		// PREPARE PROJECTION TEST PROPERTIES
 98 | 		// XY plane
 99 | 		float2 n_xy_e0 = make_float2(-1.0f * e0.y, e0.x);
100 | 		float2 n_xy_e1 = make_float2(-1.0f * e1.y, e1.x);
101 | 		float2 n_xy_e2 = make_float2(-1.0f * e2.y, e2.x);
102 | 		if (n.z < 0.0f) {
103 | 			n_xy_e0 = -n_xy_e0;
104 | 			n_xy_e1 = -n_xy_e1;
105 | 			n_xy_e2 = -n_xy_e2;
106 | 		}
107 | 		float d_xy_e0 = (-1.0f * dot(n_xy_e0, make_float2(v0.x, v0.y))) + max(0.0f, info.unit.x * n_xy_e0.x) + max(0.0f, info.unit.y * n_xy_e0.y);
108 | 		float d_xy_e1 = (-1.0f * dot(n_xy_e1, make_float2(v1.x, v1.y))) + max(0.0f, info.unit.x * n_xy_e1.x) + max(0.0f, info.unit.y * n_xy_e1.y);
109 | 		float d_xy_e2 = (-1.0f * dot(n_xy_e2, make_float2(v2.x, v2.y))) + max(0.0f, info.unit.x * n_xy_e2.x) + max(0.0f, info.unit.y * n_xy_e2.y);
110 | 		// YZ plane
111 | 		float2 n_yz_e0 = make_float2(-1.0f * e0.z, e0.y);
112 | 		float2 n_yz_e1 = make_float2(-1.0f * e1.z, e1.y);
113 | 		float2 n_yz_e2 = make_float2(-1.0f * e2.z, e2.y);
114 | 		if (n.x < 0.0f) {
115 | 			n_yz_e0 = -n_yz_e0;
116 | 			n_yz_e1 = -n_yz_e1;
117 | 			n_yz_e2 = -n_yz_e2;
118 | 		}
119 | 		float d_yz_e0 = (-1.0f * dot(n_yz_e0, make_float2(v0.y, v0.z))) + max(0.0f, info.unit.y * n_yz_e0.x) + max(0.0f, info.unit.z * n_yz_e0.y);
120 | 		float d_yz_e1 = (-1.0f * dot(n_yz_e1, make_float2(v1.y, v1.z))) + max(0.0f, info.unit.y * n_yz_e1.x) + max(0.0f, info.unit.z * n_yz_e1.y);
121 | 		float d_yz_e2 = (-1.0f * dot(n_yz_e2, make_float2(v2.y, v2.z))) + max(0.0f, info.unit.y * n_yz_e2.x) + max(0.0f, info.unit.z * n_yz_e2.y);
122 | 		// ZX plane
123 | 		float2 n_zx_e0 = make_float2(-1.0f * e0.x, e0.z);
124 | 		float2 n_zx_e1 = make_float2(-1.0f * e1.x, e1.z);
125 | 		float2 n_zx_e2 = make_float2(-1.0f * e2.x, e2.z);
126 | 		if (n.y < 0.0f) {
127 | 			n_zx_e0 = -n_zx_e0;
128 | 			n_zx_e1 = -n_zx_e1;
129 | 			n_zx_e2 = -n_zx_e2;
130 | 		}
131 | 		float d_xz_e0 = (-1.0f * dot(n_zx_e0, make_float2(v0.z, v0.x))) + max(0.0f, info.unit.x * n_zx_e0.x) + max(0.0f, info.unit.z * n_zx_e0.y);
132 | 		float d_xz_e1 = (-1.0f * dot(n_zx_e1, make_float2(v1.z, v1.x))) + max(0.0f, info.unit.x * n_zx_e1.x) + max(0.0f, info.unit.z * n_zx_e1.y);
133 | 		float d_xz_e2 = (-1.0f * dot(n_zx_e2, make_float2(v2.z, v2.x))) + max(0.0f, info.unit.x * n_zx_e2.x) + max(0.0f, info.unit.z * n_zx_e2.y);
134 | 
135 | 		// test possible grid boxes for overlap
136 | 		for (int z = t_bbox_grid.min.z; z <= t_bbox_grid.max.z; z++){
137 | 			for (int y = t_bbox_grid.min.y; y <= t_bbox_grid.max.y; y++){
138 | 				for (int x = t_bbox_grid.min.x; x <= t_bbox_grid.max.x; x++){
139 | 					// if (checkBit(voxel_table, location)){ continue; }
140 | #ifdef _DEBUG
141 | 					atomicAdd(&debug_d_n_voxels_tested, 1);
142 | #endif
143 | 					// TRIANGLE PLANE THROUGH BOX TEST
144 | 					float3 p = make_float3(x * info.unit.x, y * info.unit.y, z * info.unit.z);
145 | 					float nDOTp = dot(n, p);
146 | 					if (((nDOTp + d1) * (nDOTp + d2)) > 0.0f) { continue; }
147 | 
148 | 					// PROJECTION TESTS
149 | 						// XY
150 | 					float2 p_xy = make_float2(p.x, p.y);
151 | 					if ((dot(n_xy_e0, p_xy) + d_xy_e0) < 0.0f) { continue; }
152 | 					if ((dot(n_xy_e1, p_xy) + d_xy_e1) < 0.0f) { continue; }
153 | 					if ((dot(n_xy_e2, p_xy) + d_xy_e2) < 0.0f) { continue; }
154 | 
155 | 					// YZ
156 | 					float2 p_yz = make_float2(p.y, p.z);
157 | 					if ((dot(n_yz_e0, p_yz) + d_yz_e0) < 0.0f) { continue; }
158 | 					if ((dot(n_yz_e1, p_yz) + d_yz_e1) < 0.0f) { continue; }
159 | 					if ((dot(n_yz_e2, p_yz) + d_yz_e2) < 0.0f) { continue; }
160 | 
161 | 					// XZ	
162 | 					float2 p_zx = make_float2(p.z, p.x);
163 | 					if ((dot(n_zx_e0, p_zx) + d_xz_e0) < 0.0f) { continue; }
164 | 					if ((dot(n_zx_e1, p_zx) + d_xz_e1) < 0.0f) { continue; }
165 | 					if ((dot(n_zx_e2, p_zx) + d_xz_e2) < 0.0f) { continue; }
166 | 
167 | #ifdef _DEBUG
168 | 					atomicAdd(&debug_d_n_voxels_marked, 1);
169 | #endif
170 | 
171 | 					if (morton_order){
172 | 						size_t location = mortonEncode_LUT(x, y, z);
173 | 						setBit(voxel_table, location);
174 | 					} else {
175 | 						size_t location = 
176 | 							static_cast<size_t>(x) + 
177 | 							(static_cast<size_t>(y)* static_cast<size_t>(info.gridsize.x)) + 
178 | 							(static_cast<size_t>(z)* (static_cast<size_t>(info.gridsize.y)* static_cast<size_t>(info.gridsize.x)));
179 | 						setBit(voxel_table, location);
180 | 					}
181 | 					continue;
182 | 				}
183 | 			}
184 | 		}
185 | #ifdef _DEBUG
186 | 		atomicAdd(&debug_d_n_triangles, 1);
187 | #endif
188 | 		thread_id += stride;
189 | 	}
190 | }
191 | 
192 | void voxelize(const voxinfo& v, float* triangle_data, unsigned int* vtable, bool morton_code) {
193 | 	float elapsedTime;
194 | 	
195 | 	// Create timers, set start time
196 | 	cudaEvent_t start_vox, stop_vox;
197 | 	checkCudaErrors(cudaEventCreate(&start_vox));
198 | 	checkCudaErrors(cudaEventCreate(&stop_vox));
199 | 
200 | 	// Copy morton LUT if we're encoding to morton
201 | 	if (morton_code){
202 | 		checkCudaErrors(cudaMemcpyToSymbol(morton256_x, host_morton256_x, 256 * sizeof(uint32_t)));
203 | 		checkCudaErrors(cudaMemcpyToSymbol(morton256_y, host_morton256_y, 256 * sizeof(uint32_t)));
204 | 		checkCudaErrors(cudaMemcpyToSymbol(morton256_z, host_morton256_z, 256 * sizeof(uint32_t)));
205 | 	}
206 | 
207 | 	// Estimate best block and grid size using CUDA Occupancy Calculator
208 | 	int blockSize;   // The launch configurator returned block size 
209 | 	int minGridSize; // The minimum grid size needed to achieve the  maximum occupancy for a full device launch 
210 | 	int gridSize;    // The actual grid size needed, based on input size 
211 | 	cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, voxelize_triangle, 0, 0);
212 | 	// Round up according to array size 
213 | 	gridSize = static_cast<int>((v.n_triangles + blockSize - 1) / blockSize);
214 | 
215 | 	checkCudaErrors(cudaEventRecord(start_vox, 0));
216 | 	voxelize_triangle << <gridSize, blockSize >> > (v, triangle_data, vtable, morton_code);
217 | 
218 | 	cudaDeviceSynchronize();
219 | 	checkCudaErrors(cudaEventRecord(stop_vox, 0));
220 | 	checkCudaErrors(cudaEventSynchronize(stop_vox));
221 | 	checkCudaErrors(cudaEventElapsedTime(&elapsedTime, start_vox, stop_vox));
222 | 	printf("[Perf] Voxelization GPU time: %.1f ms\n", elapsedTime);
223 | 
224 | 	// SANITY CHECKS
225 | #ifdef _DEBUG
226 | 	size_t debug_n_triangles, debug_n_voxels_marked, debug_n_voxels_tested;
227 | 	checkCudaErrors(cudaMemcpyFromSymbol((void*)&(debug_n_triangles),debug_d_n_triangles, sizeof(debug_d_n_triangles), 0, cudaMemcpyDeviceToHost));
228 | 	checkCudaErrors(cudaMemcpyFromSymbol((void*)&(debug_n_voxels_marked), debug_d_n_voxels_marked, sizeof(debug_d_n_voxels_marked), 0, cudaMemcpyDeviceToHost));
229 | 	checkCudaErrors(cudaMemcpyFromSymbol((void*) & (debug_n_voxels_tested), debug_d_n_voxels_tested, sizeof(debug_d_n_voxels_tested), 0, cudaMemcpyDeviceToHost));
230 | 	printf("[Debug] Processed %llu triangles on the GPU \n", debug_n_triangles);
231 | 	printf("[Debug] Tested %llu voxels for overlap on GPU \n", debug_n_voxels_tested);
232 | 	printf("[Debug] Marked %llu voxels as filled (includes duplicates!) \n", debug_n_voxels_marked);
233 | #endif
234 | 
235 | 	// Destroy timers
236 | 	checkCudaErrors(cudaEventDestroy(start_vox));
237 | 	checkCudaErrors(cudaEventDestroy(stop_vox));
238 | }
239 | 


--------------------------------------------------------------------------------
/src/voxelize.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Commun functions for both the solid and non-solid voxelization methods
 4 | 
 5 | #include <cuda.h>
 6 | #include <cuda_runtime.h>
 7 | #include <device_launch_parameters.h>
 8 | #include <iostream>
 9 | #include "util.h"
10 | #include "util_cuda.h"
11 | #include "libs/cuda/helper_math.h"
12 | #include "morton_LUTs.h"
13 | 
14 | // Morton LUTs for when we need them
15 | __constant__ uint32_t morton256_x[256];
16 | __constant__ uint32_t morton256_y[256];
17 | __constant__ uint32_t morton256_z[256];
18 | 
19 | // Encode morton code using LUT table
20 | __device__ inline uint64_t mortonEncode_LUT(unsigned int x, unsigned int y, unsigned int z){
21 | 	uint64_t answer = 0;
22 | 	answer = morton256_z[(z >> 16) & 0xFF] |
23 | 		morton256_y[(y >> 16) & 0xFF] |
24 | 		morton256_x[(x >> 16) & 0xFF];
25 | 	answer = answer << 48 |
26 | 		morton256_z[(z >> 8) & 0xFF] |
27 | 		morton256_y[(y >> 8) & 0xFF] |
28 | 		morton256_x[(x >> 8) & 0xFF];
29 | 	answer = answer << 24 |
30 | 		morton256_z[(z)& 0xFF] |
31 | 		morton256_y[(y)& 0xFF] |
32 | 		morton256_x[(x)& 0xFF];
33 | 	return answer;
34 | }


--------------------------------------------------------------------------------
/src/voxelize_solid.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 | 	Solid voxelization based on the Schwarz-Seidel paper.
  3 | */
  4 | 
  5 | #include "voxelize.cuh"
  6 | 
  7 | #ifdef _DEBUG
  8 | __device__ size_t debug_d_n_voxels_marked = 0;
  9 | __device__ size_t debug_d_n_triangles = 0;
 10 | __device__ size_t debug_d_n_voxels_tested = 0;
 11 | #endif
 12 | 
 13 | #define float_error 0.000001
 14 | 
 15 | // use Xor for voxels whose corresponding bits have to flipped
 16 | __device__ __inline__ void setBitXor(unsigned int* voxel_table, size_t index) {
 17 | 	size_t int_location = index / size_t(32);
 18 | 	unsigned int bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR
 19 | 	unsigned int mask = 1 << bit_pos;
 20 | 	atomicXor(&(voxel_table[int_location]), mask);
 21 | }
 22 | 
 23 | //check the location with point and triangle
 24 | __device__ inline int check_point_triangle(float2 v0, float2 v1, float2 v2, float2 point)
 25 | {
 26 | 	float2 PA = point - v0;
 27 | 	float2 PB = point - v1;
 28 | 	float2 PC = point - v2;
 29 | 
 30 | 	float t1 = PA.x*PB.y - PA.y*PB.x;
 31 | 	if (fabs(t1) < float_error&&PA.x*PB.x <= 0 && PA.y*PB.y <= 0)
 32 | 		return 1;
 33 | 
 34 | 	float t2 = PB.x*PC.y - PB.y*PC.x;
 35 | 	if (fabs(t2) < float_error&&PB.x*PC.x <= 0 && PB.y*PC.y <= 0)
 36 | 		return 2;
 37 | 
 38 | 	float t3 = PC.x*PA.y - PC.y*PA.x;
 39 | 	if (fabs(t3) < float_error&&PC.x*PA.x <= 0 && PC.y*PA.y <= 0)
 40 | 		return 3;
 41 | 
 42 | 	if (t1*t2 > 0 && t1*t3 > 0)
 43 | 		return 0;
 44 | 	else
 45 | 		return -1;
 46 | }
 47 | 
 48 | //find the x coordinate of the voxel
 49 | __device__ inline float get_x_coordinate(float3 n, float3 v0, float2 point)
 50 | {
 51 | 	return (-(n.y*(point.x - v0.y) + n.z*(point.y - v0.z)) / n.x + v0.x);
 52 | }
 53 | 
 54 | //check the triangle is counterclockwise or not
 55 | __device__ inline bool checkCCW(float2 v0, float2 v1, float2 v2)
 56 | {
 57 | 	float2 e0 = v1 - v0;
 58 | 	float2 e1 = v2 - v0;
 59 | 	float result = e0.x*e1.y - e1.x*e0.y;
 60 | 	if (result > 0)
 61 | 		return true;
 62 | 	else
 63 | 		return false;
 64 | }
 65 | 
 66 | //top-left rule
 67 | __device__ inline bool TopLeftEdge(float2 v0, float2 v1)
 68 | {
 69 | 	return ((v1.y<v0.y) || (v1.y == v0.y&&v0.x>v1.x));
 70 | }
 71 | 
 72 | //generate solid voxelization
 73 | __global__ void voxelize_triangle_solid(voxinfo info, float* triangle_data, unsigned int* voxel_table, bool morton_order)
 74 | {
 75 | 	size_t thread_id = threadIdx.x + blockIdx.x * blockDim.x;
 76 | 	size_t stride = blockDim.x * gridDim.x;
 77 | 
 78 | 	while (thread_id < info.n_triangles) { // every thread works on specific triangles in its stride
 79 | 		size_t t = thread_id * 9; // triangle contains 9 vertices
 80 | 
 81 | 		// COMPUTE COMMON TRIANGLE PROPERTIES
 82 | 		// Move vertices to origin using bbox
 83 | 		float3 v0 = make_float3(triangle_data[t], triangle_data[t + 1], triangle_data[t + 2]) - info.bbox.min;
 84 | 		float3 v1 = make_float3(triangle_data[t + 3], triangle_data[t + 4], triangle_data[t + 5]) - info.bbox.min;
 85 | 		float3 v2 = make_float3(triangle_data[t + 6], triangle_data[t + 7], triangle_data[t + 8]) - info.bbox.min;
 86 | 		// Edge vectors
 87 | 		float3 e0 = v1 - v0;
 88 | 		float3 e1 = v2 - v1;
 89 | 		float3 e2 = v0 - v2;
 90 | 		// Normal vector pointing up from the triangle
 91 | 		float3 n = normalize(cross(e0, e1));
 92 | 		if (fabs(n.x) < float_error) { return; }
 93 | 
 94 | 		// Calculate the projection of three point into yoz plane
 95 | 		float2 v0_yz = make_float2(v0.y, v0.z);
 96 | 		float2 v1_yz = make_float2(v1.y, v1.z);
 97 | 		float2 v2_yz = make_float2(v2.y, v2.z);
 98 | 
 99 | 		// Set the triangle counterclockwise
100 | 		if (!checkCCW(v0_yz, v1_yz, v2_yz))
101 | 		{
102 | 			float2 v3 = v1_yz;
103 | 			v1_yz = v2_yz;
104 | 			v2_yz = v3;
105 | 		}
106 | 
107 | 		// COMPUTE TRIANGLE BBOX IN GRID
108 | 		// Triangle bounding box in world coordinates is min(v0,v1,v2) and max(v0,v1,v2)
109 | 		float2 bbox_max = fmaxf(v0_yz, fmaxf(v1_yz, v2_yz));
110 | 		float2 bbox_min = fminf(v0_yz, fminf(v1_yz, v2_yz));
111 | 
112 | 		float2 bbox_max_grid = make_float2(floor(bbox_max.x / info.unit.y - 0.5), floor(bbox_max.y / info.unit.z - 0.5));
113 | 		float2 bbox_min_grid = make_float2(ceil(bbox_min.x / info.unit.y - 0.5), ceil(bbox_min.y / info.unit.z - 0.5));
114 | 
115 | 		for (int y = bbox_min_grid.x; y <= bbox_max_grid.x; y++)
116 | 		{
117 | 			for (int z = bbox_min_grid.y; z <= bbox_max_grid.y; z++)
118 | 			{
119 | 				float2 point = make_float2((y + 0.5) * info.unit.y, (z + 0.5) * info.unit.z);
120 | 				int checknum = check_point_triangle(v0_yz, v1_yz, v2_yz, point);
121 | 				if ((checknum == 1 && TopLeftEdge(v0_yz, v1_yz)) || (checknum == 2 && TopLeftEdge(v1_yz, v2_yz)) || (checknum == 3 && TopLeftEdge(v2_yz, v0_yz)) || (checknum == 0))
122 | 				{
123 | 					int xmax = int(get_x_coordinate(n, v0, point) / info.unit.x - 0.5);
124 | 					for (int x = 0; x <= xmax; x++)
125 | 					{
126 | 						if (morton_order){
127 | 							size_t location = mortonEncode_LUT(x, y, z);
128 | 							setBitXor(voxel_table, location);
129 | 						} else {
130 | 							size_t location =
131 | 								static_cast<size_t>(x) +
132 | 								(static_cast<size_t>(y) * static_cast<size_t>(info.gridsize.x)) +
133 | 								(static_cast<size_t>(z) * (static_cast<size_t>(info.gridsize.y) * static_cast<size_t>(info.gridsize.x))); 
134 | 							setBitXor(voxel_table, location);
135 | 						}
136 | 						continue;
137 | 					}
138 | 				}
139 | 			}
140 | 		}
141 | 		// sanity check: atomically count triangles
142 | 		//atomicAdd(&triangles_seen_count, 1);
143 | 		thread_id += stride;
144 | 	}
145 | }
146 | 
147 | void voxelize_solid(const voxinfo& v, float* triangle_data, unsigned int* vtable, bool morton_code) {
148 | 	float elapsedTime;
149 | 	
150 | 	// Create timers, set start time
151 | 	cudaEvent_t start_vox, stop_vox;
152 | 	checkCudaErrors(cudaEventCreate(&start_vox));
153 | 	checkCudaErrors(cudaEventCreate(&stop_vox));
154 | 
155 | 	// Copy morton LUT if we're encoding to morton
156 | 	if (morton_code){
157 | 		checkCudaErrors(cudaMemcpyToSymbol(morton256_x, host_morton256_x, 256 * sizeof(uint32_t)));
158 | 		checkCudaErrors(cudaMemcpyToSymbol(morton256_y, host_morton256_y, 256 * sizeof(uint32_t)));
159 | 		checkCudaErrors(cudaMemcpyToSymbol(morton256_z, host_morton256_z, 256 * sizeof(uint32_t)));
160 | 	}
161 | 
162 | 	// Estimate best block and grid size using CUDA Occupancy Calculator
163 | 	int blockSize;   // The launch configurator returned block size 
164 | 	int minGridSize; // The minimum grid size needed to achieve the  maximum occupancy for a full device launch 
165 | 	int gridSize;    // The actual grid size needed, based on input size 
166 | 	cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, voxelize_triangle_solid, 0, 0);
167 | 	// Round up according to array size 
168 | 	gridSize = static_cast<int>((v.n_triangles + blockSize - 1) / blockSize);
169 | 
170 | 	checkCudaErrors(cudaEventRecord(start_vox, 0));
171 | 	voxelize_triangle_solid << <gridSize, blockSize >> > (v, triangle_data, vtable, morton_code);
172 | 
173 | 	cudaDeviceSynchronize();
174 | 	checkCudaErrors(cudaEventRecord(stop_vox, 0));
175 | 	checkCudaErrors(cudaEventSynchronize(stop_vox));
176 | 	checkCudaErrors(cudaEventElapsedTime(&elapsedTime, start_vox, stop_vox));
177 | 	printf("[Perf] Voxelization GPU time: %.1f ms\n", elapsedTime);
178 | 
179 | 	// SANITY CHECKS
180 | #ifdef _DEBUG
181 | 	size_t debug_n_triangles, debug_n_voxels_marked, debug_n_voxels_tested;
182 | 	checkCudaErrors(cudaMemcpyFromSymbol((void*)&(debug_n_triangles),debug_d_n_triangles, sizeof(debug_d_n_triangles), 0, cudaMemcpyDeviceToHost));
183 | 	checkCudaErrors(cudaMemcpyFromSymbol((void*)&(debug_n_voxels_marked), debug_d_n_voxels_marked, sizeof(debug_d_n_voxels_marked), 0, cudaMemcpyDeviceToHost));
184 | 	checkCudaErrors(cudaMemcpyFromSymbol((void*) & (debug_n_voxels_tested), debug_d_n_voxels_tested, sizeof(debug_d_n_voxels_tested), 0, cudaMemcpyDeviceToHost));
185 | 	printf("[Debug] Processed %llu triangles on the GPU \n", debug_n_triangles);
186 | 	printf("[Debug] Tested %llu voxels for overlap on GPU \n", debug_n_voxels_tested);
187 | 	printf("[Debug] Marked %llu voxels as filled (includes duplicates!) \n", debug_n_voxels_marked);
188 | #endif
189 | 
190 | 	// Destroy timers
191 | 	checkCudaErrors(cudaEventDestroy(start_vox));
192 | 	checkCudaErrors(cudaEventDestroy(stop_vox));
193 | }


--------------------------------------------------------------------------------
/test_models/credit.txt:
--------------------------------------------------------------------------------
1 | Stanford Bunny Model - (c) 1996 Stanford University - http://www.graphics.stanford.edu/data/3Dscanrep/


--------------------------------------------------------------------------------