├── .github └── workflows │ └── autobuild.yml ├── .gitignore ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── img ├── output_examples.jpg └── viewvox.JPG ├── msvc └── vs2022 │ ├── cuda_voxelizer.sln │ ├── cuda_voxelizer.vcxproj │ ├── cuda_voxelizer.vcxproj.filters │ └── custom_includes.props ├── src ├── cpu_voxelizer.cpp ├── cpu_voxelizer.h ├── libs │ ├── cuda │ │ ├── helper_cuda.h │ │ ├── helper_math.h │ │ └── helper_string.h │ └── magicavoxel_file_writer │ │ ├── LICENSE │ │ ├── VoxWriter.cpp │ │ └── VoxWriter.h ├── main.cpp ├── morton_LUTs.h ├── timer.h ├── todo.txt ├── util.h ├── util_cuda.cpp ├── util_cuda.h ├── util_io.cpp ├── util_io.h ├── voxelize.cu ├── voxelize.cuh └── voxelize_solid.cu └── test_models ├── bunny.OBJ └── credit.txt /.github/workflows/autobuild.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - dev 8 | pull_request: 9 | branches: 10 | - main 11 | - dev 12 | jobs: 13 | linux-build: 14 | runs-on: ubuntu-20.04 15 | container: nvidia/cuda:12.2.0-devel-ubuntu20.04 16 | 17 | env: 18 | CUDAARCHS: '60' 19 | TRIMESH_VERSION: '2022.03.04' 20 | CMAKE_VERSION: '3.20.4' 21 | 22 | steps: 23 | - name: Checkout 24 | uses: actions/checkout@v2 25 | 26 | - name: Install OpenMP and other libraries 27 | run: | 28 | apt update 29 | apt install -y --no-install-recommends apt-utils 30 | apt install -y libgomp1 git mesa-common-dev libglu1-mesa-dev libxi-dev wget ninja-build 31 | 32 | - name: Install CMake 33 | run: | 34 | wget -q -O ./cmake-install.sh https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}-Linux-x86_64.sh 35 | chmod u+x ./cmake-install.sh 36 | mkdir "$HOME"/cmake 37 | ./cmake-install.sh --skip-license --prefix="$HOME"/cmake 38 | rm ./cmake-install.sh 39 | 40 | - name: Build Trimesh2 41 | run: | 42 | git clone --single-branch --depth 1 -b ${{ env.TRIMESH_VERSION }} https://github.com/Forceflow/trimesh2.git ../trimesh2 43 | cd ../trimesh2 44 | make all -j $(nproc) 45 | make clean 46 | 47 | - name: Configure cuda_voxelizer 48 | run: | 49 | PATH=$PATH:"$HOME"/cmake/bin 50 | cmake -GNinja \ 51 | -DTrimesh2_INCLUDE_DIR="../trimesh2/include" \ 52 | -DTrimesh2_LINK_DIR="../trimesh2/lib.Linux64" \ 53 | -S . -B ./build 54 | 55 | - name: Build cuda_voxelizer 56 | run: | 57 | PATH=$PATH:"$HOME"/cmake/bin 58 | cmake --build ./build --parallel $(nproc) 59 | 60 | - name: Test 61 | run: ./build/cuda_voxelizer -f ./test_models/bunny.OBJ -s 64 -cpu 62 | 63 | windows-build: 64 | runs-on: windows-2019 65 | env: 66 | CUDA_MAJOR_VERSION: '12.2' 67 | CUDA_PATCH_VERSION: '0' 68 | TRIMESH_VERSION: '2022.03.04' 69 | CUDAARCHS: '60' 70 | 71 | steps: 72 | - name: Checkout 73 | uses: actions/checkout@v2 74 | 75 | - name: Cache Trimesh2 76 | id: trimesh2-cache 77 | uses: actions/cache@v2 78 | with: 79 | path: ${{ runner.workspace }}\trimesh2-build 80 | key: ${{ runner.os }}-build-trimesh2-cache-1 81 | 82 | 83 | # Older version then 11.4.0 of CUDA Toolkit does not have thrust option for installation in silent mode 84 | - uses: Jimver/cuda-toolkit@v0.2.11 85 | id: cuda-toolkit 86 | with: 87 | method: 'network' 88 | cuda: '${{ env.CUDA_MAJOR_VERSION }}.${{ env.CUDA_PATCH_VERSION }}' 89 | sub-packages: '["nvcc", "cudart", "visual_studio_integration"]' 90 | 91 | - name: Build Trimesh2 92 | if: steps.trimesh2-cache.outputs.cache-hit != 'true' 93 | run: | 94 | Install-Module VSSetup -Scope CurrentUser -Force 95 | git clone -b ${{ env.TRIMESH_VERSION }} --single-branch --depth 1 https://github.com/Forceflow/trimesh2.git ..\trimesh2 96 | cd ..\trimesh2 97 | & (Join-Path (Get-VSSetupInstance).InstallationPath -ChildPath MSBuild\Current\Bin\msbuild.exe) .\msvc\vs2019\trimesh2.sln -nologo -m:2 /t:libsrc /p:Configuration=Release /p:Platform=x64 98 | mkdir ..\trimesh2-build 99 | Move-Item .\include ..\trimesh2-build 100 | Move-Item .\lib.Win64.vs142 ..\trimesh2-build 101 | cd - 102 | rm -Recurse -Force ..\trimesh2 103 | 104 | - name: Configure cuda_voxelizer 105 | run: | 106 | $trimeshDir = "..\trimesh2-build" 107 | cmake -A x64 ` 108 | -DCMAKE_TOOLCHAIN_FILE:FILEPATH="C:\vcpkg\scripts\buildsystems\vcpkg.cmake" ` 109 | -DTrimesh2_INCLUDE_DIR:PATH="$trimeshDir\include" ` 110 | -DTrimesh2_LINK_DIR:PATH="$trimeshDir\lib.Win64.vs142" ` 111 | -DCMAKE_BUILD_TYPE=Release ` 112 | -S . -B .\build 113 | 114 | - name: Build cuda_voxelizer 115 | run: cmake --build .\build --parallel 2 --target ALL_BUILD --config Release 116 | 117 | - name: Test cuda_voxelizer 118 | run: .\build\Release\cuda_voxelizer.exe -f .\test_models\bunny.OBJ -s 64 -cpu 119 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.sln.docstates 8 | 9 | # Thirdparty libraries 10 | thirdparty 11 | 12 | # Build results 13 | [Dd]ebug/ 14 | [Dd]ebugPublic/ 15 | [Rr]elease/ 16 | x64/ 17 | build/ 18 | bld/ 19 | [Bb]in/ 20 | [Oo]bj/ 21 | 22 | # MSTest test Results 23 | [Tt]est[Rr]esult*/ 24 | [Bb]uild[Ll]og.* 25 | 26 | #NUNIT 27 | *.VisualState.xml 28 | TestResult.xml 29 | 30 | # Build Results of an ATL Project 31 | [Dd]ebugPS/ 32 | [Rr]eleasePS/ 33 | dlldata.c 34 | 35 | *_i.c 36 | *_p.c 37 | *_i.h 38 | *.ilk 39 | *.meta 40 | *.pch 41 | *.pdb 42 | *.pgc 43 | *.pgd 44 | *.rsp 45 | *.sbr 46 | *.tlb 47 | *.tli 48 | *.tlh 49 | *.tmp 50 | *.tmp_proj 51 | *.log 52 | *.vspscc 53 | *.vssscc 54 | .builds 55 | *.pidb 56 | *.svclog 57 | *.scc 58 | 59 | # Chutzpah Test files 60 | _Chutzpah* 61 | 62 | # Visual C++ cache files 63 | ipch/ 64 | *.aps 65 | *.ncb 66 | *.opensdf 67 | *.sdf 68 | *.cachefile 69 | 70 | # Visual Studio profiler 71 | *.psess 72 | *.vsp 73 | *.vspx 74 | 75 | # TFS 2012 Local Workspace 76 | $tf/ 77 | 78 | # Guidance Automation Toolkit 79 | *.gpState 80 | 81 | # ReSharper is a .NET coding add-in 82 | _ReSharper*/ 83 | *.[Rr]e[Ss]harper 84 | *.DotSettings.user 85 | 86 | # JustCode is a .NET coding addin-in 87 | .JustCode 88 | 89 | # TeamCity is a build add-in 90 | _TeamCity* 91 | 92 | # DotCover is a Code Coverage Tool 93 | *.dotCover 94 | 95 | # NCrunch 96 | *.ncrunch* 97 | _NCrunch_* 98 | .*crunch*.local.xml 99 | 100 | # MightyMoose 101 | *.mm.* 102 | AutoTest.Net/ 103 | 104 | # Web workbench (sass) 105 | .sass-cache/ 106 | 107 | # Installshield output folder 108 | [Ee]xpress/ 109 | 110 | # DocProject is a documentation generator add-in 111 | DocProject/buildhelp/ 112 | DocProject/Help/*.HxT 113 | DocProject/Help/*.HxC 114 | DocProject/Help/*.hhc 115 | DocProject/Help/*.hhk 116 | DocProject/Help/*.hhp 117 | DocProject/Help/Html2 118 | DocProject/Help/html 119 | 120 | # Click-Once directory 121 | publish/ 122 | 123 | # Publish Web Output 124 | *.[Pp]ublish.xml 125 | *.azurePubxml 126 | 127 | # NuGet Packages Directory 128 | packages/ 129 | ## TODO: If the tool you use requires repositories.config uncomment the next line 130 | #!packages/repositories.config 131 | 132 | # Enable "build/" folder in the NuGet Packages folder since NuGet packages use it for MSBuild targets 133 | # This line needs to be after the ignore of the build folder (and the packages folder if the line above has been uncommented) 134 | !packages/build/ 135 | 136 | # Windows Azure Build Output 137 | csx/ 138 | *.build.csdef 139 | 140 | # Windows Store app package directory 141 | AppPackages/ 142 | 143 | # Others 144 | sql/ 145 | *.Cache 146 | ClientBin/ 147 | [Ss]tyle[Cc]op.* 148 | ~$* 149 | *~ 150 | *.dbmdl 151 | *.dbproj.schemaview 152 | *.pfx 153 | *.publishsettings 154 | node_modules/ 155 | 156 | # RIA/Silverlight projects 157 | Generated_Code/ 158 | 159 | # Backup & report files from converting an old project file to a newer 160 | # Visual Studio version. Backup files are not needed, because we have git ;-) 161 | _UpgradeReport_Files/ 162 | Backup*/ 163 | UpgradeLog*.XML 164 | UpgradeLog*.htm 165 | 166 | # SQL Server files 167 | *.mdf 168 | *.ldf 169 | 170 | # Business Intelligence projects 171 | *.rdl.data 172 | *.bim.layout 173 | *.bim_*.settings 174 | 175 | # Microsoft Fakes 176 | FakesAssemblies/ 177 | *.opendb 178 | *.db 179 | *.deps 180 | 181 | #VS folders 182 | .vs/ 183 | 184 | #Generated voxel models 185 | *.binvox 186 | *.bin -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.20 FATAL_ERROR) 2 | 3 | PROJECT(CudaVoxelize LANGUAGES CXX CUDA) 4 | 5 | FIND_PACKAGE(OpenMP REQUIRED) 6 | FIND_PACKAGE(CUDAToolkit REQUIRED) 7 | 8 | SET(CUDA_VOXELIZER_EXECUTABLE cuda_voxelizer) 9 | 10 | SET(Trimesh2_INCLUDE_DIR CACHE PATH "Path to Trimesh2 includes") 11 | 12 | IF(NOT Trimesh2_INCLUDE_DIR) 13 | MESSAGE(FATAL_ERROR "You need to set variable Trimesh2_INCLUDE_DIR") 14 | ENDIF() 15 | 16 | FIND_FILE(Trimesh2_TriMesh_h TriMesh.h ${Trimesh2_INCLUDE_DIR}) 17 | 18 | IF(NOT Trimesh2_TriMesh_h) 19 | message(FATAL_ERROR "Can't find TriMesh.h in ${Trimesh2_INCLUDE_DIR}") 20 | ENDIF() 21 | MARK_AS_ADVANCED(Trimesh2_TriMesh_h) 22 | 23 | SET(Trimesh2_LINK_DIR CACHE PATH "Path to Trimesh2 library dir.") 24 | 25 | IF(NOT Trimesh2_LINK_DIR) 26 | MESSAGE(FATAL_ERROR "You need to set variable Trimesh2_LINK_DIR") 27 | ENDIF() 28 | 29 | IF(NOT EXISTS "${Trimesh2_LINK_DIR}") 30 | MESSAGE(FATAL_ERROR "Trimesh2 library dir does not exist") 31 | ENDIF() 32 | 33 | FIND_LIBRARY(Trimesh2_LIBRARY trimesh ${Trimesh2_LINK_DIR}) 34 | 35 | IF(NOT Trimesh2_LIBRARY) 36 | message(SEND_ERROR "Can't find libtrimesh.a in ${Trimesh2_LINK_DIR}") 37 | ENDIF() 38 | MARK_AS_ADVANCED(Trimesh2_LIBRARY) 39 | 40 | MESSAGE(STATUS "Found Trimesh2 include: ${Trimesh2_TriMesh_h}") 41 | MESSAGE(STATUS "Found Trimesh2 lib: ${Trimesh2_LIBRARY}") 42 | 43 | SET(CUDA_VOXELIZER_SRCS 44 | ./src/main.cpp 45 | ./src/util_cuda.cpp 46 | ./src/util_io.cpp 47 | ./src/cpu_voxelizer.cpp 48 | ./src/libs/magicavoxel_file_writer/VoxWriter.cpp 49 | ) 50 | SET(CUDA_VOXELIZER_SRCS_CU 51 | ./src/voxelize.cu 52 | ./src/voxelize_solid.cu 53 | ) 54 | 55 | ADD_EXECUTABLE( 56 | ${CUDA_VOXELIZER_EXECUTABLE} 57 | ${CUDA_VOXELIZER_SRCS} 58 | ${CUDA_VOXELIZER_SRCS_CU}) 59 | 60 | TARGET_COMPILE_FEATURES(${CUDA_VOXELIZER_EXECUTABLE} PRIVATE cxx_std_17) 61 | TARGET_INCLUDE_DIRECTORIES(${CUDA_VOXELIZER_EXECUTABLE} PRIVATE ${Trimesh2_INCLUDE_DIR}) 62 | TARGET_LINK_LIBRARIES(${CUDA_VOXELIZER_EXECUTABLE} PRIVATE ${Trimesh2_LIBRARY} PRIVATE OpenMP::OpenMP_CXX PRIVATE CUDA::cudart) 63 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | * Please make PR's to the `dev` branch 2 | * Update the CMake and MSVC projects to include any extra files you add 3 | * Avoid pulling in extra dependencies (but I'll allow if needed) 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Jeroen Baert 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Build Status](https://github.com/Forceflow/cuda_voxelizer/actions/workflows/autobuild.yml/badge.svg) ![license](https://img.shields.io/github/license/Forceflow/cuda_voxelizer.svg)
2 | [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/Z8Z7GFNW3) 3 | 4 | # cuda_voxelizer v0.6 5 | 6 | A command-line tool to convert polygon meshes to (annotated) voxel grids. 7 | * Supported input formats: .ply, .off, .obj, .3DS, .SM and RAY 8 | * Supported output formats: .vox, .binvox, .obj cubes and point cloud, morton ordered grid 9 | * Requires a CUDA-compatible video card. Compute Capability 2.0 or higher (Nvidia Fermi or better). 10 | * Since v0.4.4, the voxelizer reverts to a (slower) CPU voxelization method when no CUDA device is found 11 | 12 | **Important:** _In v0.6 I replaced all GLM math types with builtin CUDA types, removing an external dependency. This is a big change. I've tested the release as well as I can, but if you encounter any weirdness, it's advised to check if you can reproduce the problem with an older version. Thanks!_ 13 | 14 | ## Usage 15 | Program options: 16 | * `-f `: **(required)** A path to a polygon-based 3D model file. 17 | * `-s `: **(default: 256)** The length of the cubical voxel grid. The process will construct the tightest possible cubical bounding box around the input model. 18 | * `-o `: The output format for voxelized models, default: *binvox*. Output files are saved in the same folder as the input file, in the format `_.extension`. 19 | * `vox`: **(default)** A [vox](https://github.com/ephtracy/voxel-model/blob/master/MagicaVoxel-file-format-vox.txt) file, which is the native format of and can be viewed with the excellent [MagicaVoxel](https://ephtracy.github.io/). 20 | * `binvox`: A [binvox](http://www.patrickmin.com/binvox/binvox.html) file. Can be viewed using [viewvox](http://www.patrickmin.com/viewvox/). 21 | * `obj`: A mesh containing actual cubes (made up of triangle faces) for each voxel. 22 | * `obj_points`: A mesh containing a point cloud, with a vertex for each voxel. Can be viewed using any compatible viewer that can just display vertices, like [Blender](https://www.blender.org/) or [Meshlab](https://www.meshlab.net/). 23 | * `morton`: a binary file containing a Morton-ordered grid. This is an internal format I use for other tools. 24 | * `-cpu`: Force multi-threaded voxelization on the CPU instead of GPU. Can be used when a CUDA device is not detected/compatible, or for very small models where GPU call overhead is not worth it. 25 | * `-solid` : (Experimental) Use solid voxelization instead of voxelizing the mesh faces. Needs a watertight input mesh. 26 | 27 | ## Examples 28 | `cuda_voxelizer -f bunny.ply -s 256` generates a 256 x 256 x 256 vox-based voxel model which will be stored in `bunny_256.vox`. 29 | 30 | `cuda_voxelizer -f torus.ply -s 64 -o obj -solid` generates a solid (filled) 64 x 64 x 64 .obj voxel model which will be stored in `torus_64.obj`. 31 | 32 | ![output_examples](https://raw.githubusercontent.com/Forceflow/cuda_voxelizer/main/img/output_examples.jpg) 33 | 34 | ## Building 35 | The build process is aimed at 64-bit executables. It's possible to build for 32-bit as well, but I'm not actively testing/supporting this. 36 | You can build using CMake or using the provided Visual Studio project. Since 2022, cuda_voxelizer builds via [Github Actions](https://github.com/Forceflow/cuda_voxelizer/actions) as well, check the .[yml config file](https://github.com/Forceflow/cuda_voxelizer/blob/main/.github/workflows/autobuild.yml) for more info. 37 | 38 | ### Dependencies 39 | The project has the following build dependencies: 40 | * [Nvidia Cuda 8.0 Toolkit (or higher)](https://developer.nvidia.com/cuda-toolkit) for CUDA 41 | * [Trimesh2](https://github.com/Forceflow/trimesh2) for model importing. Latest version recommended. 42 | * [OpenMP](https://www.openmp.org/) for multi-threading. 43 | 44 | ### Build using CMake (Windows, Linux) 45 | After installing dependencies, do `mkdir build` and `cd build`, followed by: 46 | 47 | For Windows with Visual Studio: 48 | ```powershell 49 | $env:CUDAARCHS="your_cuda_compute_capability" 50 | cmake -A x64 -DTrimesh2_INCLUDE_DIR:PATH="path_to_trimesh2_include" -DTrimesh2_LINK_DIR:PATH="path_to_trimesh2_library_dir" .. 51 | ``` 52 | 53 | For Linux: 54 | ```bash 55 | CUDAARCHS="your_cuda_compute_capability" cmake -DTrimesh2_INCLUDE_DIR:PATH="path_to_trimesh2_include" -DTrimesh2_LINK_DIR:PATH="path_to_trimesh2_library_dir" -DCUDA_ARCH:STRING="your_cuda_compute_capability" .. 56 | ``` 57 | Where `your_cuda_compute_capability` is a string specifying your CUDA architecture ([more info here](https://docs.nvidia.com/cuda/archive/10.2/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation-gpu-architecture) and [here CMake](https://cmake.org/cmake/help/v3.20/envvar/CUDAARCHS.html#envvar:CUDAARCHS)). For example: `CUDAARCHS="50;61"` or `CUDAARCHS="60"`. 58 | 59 | Finally, run 60 | ``` 61 | cmake --build . --parallel number_of_cores 62 | ``` 63 | 64 | ### Build using Visual Studio project (Windows) 65 | A project solution for Visual Studio 2022 is provided in the `msvc` folder. It is configured for CUDA 12.1, but you can edit the project file to make it work with other CUDA versions. You can edit the `custom_includes.props` file to configure the library locations, and specify a place where the resulting binaries should be placed. 66 | 67 | ``` 68 | C:\libs\trimesh2\ 69 | C:\libs\glm\ 70 | D:\dev\Binaries\ 71 | ``` 72 | ## Details 73 | `cuda_voxelizer` implements an optimized version of the method described in M. Schwarz and HP Seidel's 2010 paper [*Fast Parallel Surface and Solid Voxelization on GPU's*](http://research.michael-schwarz.com/publ/2010/vox/). The morton-encoded table was based on my 2013 HPG paper [*Out-Of-Core construction of Sparse Voxel Octrees*](http://graphics.cs.kuleuven.be/publications/BLD14OCCSVO/) and the work in [*libmorton*](https://github.com/Forceflow/libmorton). 74 | 75 | `cuda_voxelizer` is built with a focus on performance. Usage of the routine as a per-frame voxelization step for real-time applications is viable. These are the voxelization timings for the [Stanford Bunny Model](https://graphics.stanford.edu/data/3Dscanrep/) (1,55 MB, 70k triangles). 76 | * This is the voxelization time for a non-solid voxelization. No I/O - from disk or to GPU - is included in this timing. 77 | * CPU voxelization time is heavily dependent on how many cores your CPU has - OpenMP allocates 1 thread per core. 78 | 79 | | Grid size | GPU (GTX 1050 TI) | CPU (Intel i7 8750H, 12 threads) | 80 | |-----------|--------|--------| 81 | | 64³ | 0.2 ms | 39.8 ms | 82 | | 128³ | 0.3 ms | 63.6 ms | 83 | | 256³ | 0.6 ms | 118.2 ms | 84 | | 512³ | 1.8 ms | 308.8 ms | 85 | | 1024³ | 8.6 ms | 1047.5 ms | 86 | | 2048³ | 44.6 ms | 4147.4 ms | 87 | 88 | ## Thanks 89 | * The [MagicaVoxel](https://ephtracy.github.io/) I/O was implemented using [MagicaVoxel File Writer](https://github.com/aiekick/MagicaVoxel_File_Writer) by [aiekick](https://github.com/aiekick). 90 | * Thanks to [conceptclear](https://github.com/conceptclear) for implementing solid voxelization. 91 | 92 | ## See also 93 | 94 | * The [.binvox file format](https://www.patrickmin.com/binvox/binvox.html) was created by Michael Kazhdan. 95 | * [Patrick Min](https://www.patrickmin.com/binvox/) wrote some interesting tools to work with it: 96 | * [viewvox](https://www.patrickmin.com/viewvox/): Visualization of voxel grids (a copy of this tool is included in cuda_voxelizer releases) 97 | * [thinvox](https://www.patrickmin.com/thinvox/): Thinning of voxel grids 98 | * [binvox-rw-py](https://github.com/dimatura/binvox-rw-py) is a Python module to interact with .binvox files 99 | * [Zarbuz](https://github.com/zarbuz)'s [FileToVox](https://github.com/Zarbuz/FileToVox) looks interesting as well 100 | * If you want a good customizable CPU-based voxelizer, I can recommend [VoxSurf](https://github.com/sylefeb/VoxSurf). 101 | * Another hackable voxel viewer is Sean Barrett's excellent [stb_voxel_render.h](https://github.com/nothings/stb/blob/master/stb_voxel_render.h). 102 | * Nvidia also has a voxel library called [GVDB](https://developer.nvidia.com/gvdb), that does a lot more than just voxelizing. 103 | 104 | ## Todo / Possible future work 105 | This is on my list of "nice things to add". 106 | 107 | * Better output filename control 108 | * Noncubic grid support 109 | * Memory limits test 110 | * Implement partitioning for larger models 111 | * Do a pre-pass to categorize triangles 112 | * Implement capture of normals / color / texture data 113 | 114 | ## Citation 115 | If you use cuda_voxelizer in your published paper or other software, please reference it, for example as follows: 116 |
117 | @Misc{cudavoxelizer17,
118 | author = "Jeroen Baert",
119 | title = "Cuda Voxelizer: A GPU-accelerated Mesh Voxelizer",
120 | howpublished = "\url{https://github.com/Forceflow/cuda_voxelizer}",
121 | year = "2017"}
122 | 
123 | If you end up using cuda_voxelizer in something cool, drop me an e-mail: **mail (at) jeroen-baert.be** 124 | 125 | ## Donate 126 | cuda_voxelizer is developed in my free time. If you want to support the project, you can do so through: 127 | * [Kofi](https://ko-fi.com/jbaert) 128 | * BTC: 3GX3b7BZK2nhsneBG8eTqEchgCQ8FDfwZq 129 | * ETH: 0x7C9e97D2bBC2dFDd93EF56C77f626e802BA56860 130 | -------------------------------------------------------------------------------- /img/output_examples.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Forceflow/cuda_voxelizer/ff93fe65a9144c1dc9f11d22e786ad698387767b/img/output_examples.jpg -------------------------------------------------------------------------------- /img/viewvox.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Forceflow/cuda_voxelizer/ff93fe65a9144c1dc9f11d22e786ad698387767b/img/viewvox.JPG -------------------------------------------------------------------------------- /msvc/vs2022/cuda_voxelizer.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.28307.271 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuda_voxelizer", "cuda_voxelizer.vcxproj", "{D4330816-735D-4CC7-AE2A-04A0E998099E}" 7 | EndProject 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{C52A2702-E60C-4590-9C55-C8C66CCA5BAB}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|x64 = Debug|x64 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {D4330816-735D-4CC7-AE2A-04A0E998099E}.Debug|x64.ActiveCfg = Debug|x64 17 | {D4330816-735D-4CC7-AE2A-04A0E998099E}.Debug|x64.Build.0 = Debug|x64 18 | {D4330816-735D-4CC7-AE2A-04A0E998099E}.Release|x64.ActiveCfg = Release|x64 19 | {D4330816-735D-4CC7-AE2A-04A0E998099E}.Release|x64.Build.0 = Release|x64 20 | EndGlobalSection 21 | GlobalSection(SolutionProperties) = preSolution 22 | HideSolutionNode = FALSE 23 | EndGlobalSection 24 | GlobalSection(ExtensibilityGlobals) = postSolution 25 | SolutionGuid = {D7628502-09E5-4B15-AB62-365471E954D4} 26 | EndGlobalSection 27 | EndGlobal 28 | -------------------------------------------------------------------------------- /msvc/vs2022/cuda_voxelizer.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | {D4330816-735D-4CC7-AE2A-04A0E998099E} 15 | cuda_voxelizer 16 | 10.0 17 | 18 | 19 | 20 | Application 21 | true 22 | MultiByte 23 | v143 24 | 25 | 26 | Application 27 | false 28 | true 29 | MultiByte 30 | v143 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | true 47 | C:\libs\trimesh2\include;C:\libs\glm;$(IncludePath) 48 | C:\libs\trimesh2\lib.Win64;$(LibraryPath) 49 | xcopy /y "$(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName).exe" "$(BINARY_OUTPUT_DIR)$(ProjectName).exe" 50 | $(ProjectName)_debug 51 | 52 | 53 | C:\libs\trimesh2\include;C:\libs\glm;$(IncludePath) 54 | C:\libs\trimesh2\lib.Win64;$(LibraryPath) 55 | xcopy /y "$(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName).exe" "$(BINARY_OUTPUT_DIR)$(ProjectName).exe" 56 | 57 | 58 | 59 | Level3 60 | Disabled 61 | WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 62 | true 63 | 64 | 65 | true 66 | Console 67 | trimeshd.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 68 | 69 | 70 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 71 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(BINARY_OUTPUT_DIR)" 72 | copy /y "$(SolutionDir)$(Platform)\$(Configuration)\$(TargetName).exe" "$(BINARY_OUTPUT_DIR)$(TargetName).exe" 73 | 74 | 75 | false 76 | --source-in-ptx %(AdditionalOptions) 77 | compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80 78 | 79 | 80 | 81 | 82 | Level3 83 | MaxSpeed 84 | true 85 | true 86 | WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 87 | Speed 88 | AnySuitable 89 | true 90 | false 91 | Strict 92 | 93 | 94 | true 95 | true 96 | Console 97 | trimesh.lib;cudart.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 98 | 99 | 100 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" 101 | copy "$(CudaToolkitBinDir)\cudart*.dll" "$(BINARY_OUTPUT_DIR)" 102 | copy /y "$(SolutionDir)$(Platform)\$(Configuration)\$(TargetName).exe" "$(BINARY_OUTPUT_DIR)$(TargetName).exe" 103 | 104 | 105 | true 106 | 64 107 | compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /msvc/vs2022/cuda_voxelizer.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | util 11 | 12 | 13 | util 14 | 15 | 16 | 17 | libs\magicavoxel_file_writer 18 | 19 | 20 | 21 | 22 | 23 | util 24 | 25 | 26 | 27 | util 28 | 29 | 30 | util 31 | 32 | 33 | util 34 | 35 | 36 | libs\cuda 37 | 38 | 39 | libs\cuda 40 | 41 | 42 | 43 | libs\magicavoxel_file_writer 44 | 45 | 46 | libs\cuda 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | {a0232da8-2097-49f4-9412-0e4223c7ba4d} 55 | 56 | 57 | {f8ccb03d-e5cc-438b-96d6-5f9b5fb54160} 58 | 59 | 60 | {ea2a8fd1-3d76-496e-9ad4-123e8f208140} 61 | 62 | 63 | {e8008c56-21a7-481c-9d07-a2e13e61a713} 64 | 65 | 66 | -------------------------------------------------------------------------------- /msvc/vs2022/custom_includes.props: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | D:\dev\libs\trimesh2\ 6 | D:\dev\libs\glm\ 7 | D:\dev\Binaries\ 8 | 9 | 10 | $(GLM_DIR);$(TRIMESH_DIR)\include\;$(IncludePath) 11 | <_PropertySheetDisplayName>custom_includes 12 | $(TRIMESH_DIR)\lib.Win$(PlatformArchitecture).vs$(PlatformToolsetVersion);$(LibraryPath) 13 | 14 | 15 | 16 | 17 | 18 | $(BINARY_OUTPUT_DIR) 19 | true 20 | 21 | 22 | $(TRIMESH_DIR) 23 | true 24 | 25 | 26 | $(GLM_DIR) 27 | true 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/cpu_voxelizer.cpp: -------------------------------------------------------------------------------- 1 | #include "cpu_voxelizer.h" 2 | #define float_error 0.000001 3 | 4 | namespace cpu_voxelizer { 5 | 6 | // Set specific bit in voxel table 7 | void setBit(unsigned int* voxel_table, size_t index) { 8 | size_t int_location = index / size_t(32); 9 | uint32_t bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR 10 | uint32_t mask = 1 << bit_pos | 0; 11 | #pragma omp critical 12 | { 13 | voxel_table[int_location] = (voxel_table[int_location] | mask); 14 | } 15 | } 16 | 17 | // Encode morton code using LUT table 18 | uint64_t mortonEncode_LUT(unsigned int x, unsigned int y, unsigned int z) { 19 | uint64_t answer = 0; 20 | answer = host_morton256_z[(z >> 16) & 0xFF] | 21 | host_morton256_y[(y >> 16) & 0xFF] | 22 | host_morton256_x[(x >> 16) & 0xFF]; 23 | answer = answer << 48 | 24 | host_morton256_z[(z >> 8) & 0xFF] | 25 | host_morton256_y[(y >> 8) & 0xFF] | 26 | host_morton256_x[(x >> 8) & 0xFF]; 27 | answer = answer << 24 | 28 | host_morton256_z[(z) & 0xFF] | 29 | host_morton256_y[(y) & 0xFF] | 30 | host_morton256_x[(x) & 0xFF]; 31 | return answer; 32 | } 33 | 34 | // Mesh voxelization method 35 | void cpu_voxelize_mesh(voxinfo info, trimesh::TriMesh* themesh, unsigned int* voxel_table, bool morton_order) { 36 | Timer cpu_voxelization_timer; cpu_voxelization_timer.start(); 37 | 38 | // PREPASS 39 | // Move all vertices to origin (can be done in parallel) 40 | trimesh::vec3 move_min = float3_to_trimesh(info.bbox.min); 41 | #pragma omp parallel for 42 | for (int64_t i = 0; i < (int64_t) themesh->vertices.size(); i++) { 43 | if (i == 0) { printf("[Info] Using %d threads \n", omp_get_num_threads()); } 44 | themesh->vertices[i] = themesh->vertices[i] - move_min; 45 | } 46 | 47 | #ifdef _DEBUG 48 | size_t debug_n_triangles = 0; 49 | size_t debug_n_voxels_tested = 0; 50 | size_t debug_n_voxels_marked = 0; 51 | #endif 52 | 53 | #pragma omp parallel for 54 | for (int64_t i = 0; i < (int64_t) info.n_triangles; i++) { 55 | // Common variables used in the voxelization process 56 | float3 delta_p = make_float3(info.unit.x, info.unit.y, info.unit.z); 57 | float3 c = make_float3(0.0f, 0.0f, 0.0f); // critical point 58 | int3 grid_max = make_int3(info.gridsize.x - 1, info.gridsize.y - 1, info.gridsize.z - 1); // grid max (grid runs from 0 to gridsize-1) 59 | #ifdef _DEBUG 60 | debug_n_triangles++; 61 | #endif 62 | // COMPUTE COMMON TRIANGLE PROPERTIES 63 | float3 v0 = trimesh_to_float3(themesh->vertices[themesh->faces[i][0]]); 64 | float3 v1 = trimesh_to_float3(themesh->vertices[themesh->faces[i][1]]); 65 | float3 v2 = trimesh_to_float3(themesh->vertices[themesh->faces[i][2]]); 66 | 67 | // Edge vectors 68 | float3 e0 = v1-v0; 69 | float3 e1 = v2-v1; 70 | float3 e2 = v0-v2; 71 | // Normal vector pointing up from the triangle 72 | float3 n = normalize(cross(e0, e1)); 73 | 74 | // COMPUTE TRIANGLE BBOX IN GRID 75 | // Triangle bounding box in world coordinates is min(v0,v1,v2) and max(v0,v1,v2) 76 | AABox t_bbox_world(fminf(v0, fminf(v1, v2)), fmaxf(v0, fmaxf(v1, v2))); 77 | // Triangle bounding box in voxel grid coordinates is the world bounding box divided by the grid unit vector 78 | AABox t_bbox_grid; 79 | t_bbox_grid.min = clamp(float3_to_int3(t_bbox_world.min / info.unit), make_int3(0, 0, 0), grid_max); 80 | t_bbox_grid.max = clamp(float3_to_int3(t_bbox_world.max / info.unit), make_int3(0, 0, 0), grid_max); 81 | 82 | // PREPARE PLANE TEST PROPERTIES 83 | if (n.x > 0.0f) { c.x = info.unit.x; } 84 | if (n.y > 0.0f) { c.y = info.unit.y; } 85 | if (n.z > 0.0f) { c.z = info.unit.z; } 86 | float d1 = dot(n, (c - v0)); 87 | float d2 = dot(n, ((delta_p - c) - v0)); 88 | 89 | // PREPARE PROJECTION TEST PROPERTIES 90 | // XY plane 91 | float2 n_xy_e0 = make_float2(-1.0f * e0.y, e0.x); 92 | float2 n_xy_e1 = make_float2(-1.0f * e1.y, e1.x); 93 | float2 n_xy_e2 = make_float2(-1.0f * e2.y, e2.x); 94 | if (n.z < 0.0f) { 95 | n_xy_e0 = -n_xy_e0; 96 | n_xy_e1 = -n_xy_e1; 97 | n_xy_e2 = -n_xy_e2; 98 | } 99 | float d_xy_e0 = (-1.0f * dot(n_xy_e0, make_float2(v0.x, v0.y))) + max(0.0f, info.unit.x * n_xy_e0.x) + max(0.0f, info.unit.y * n_xy_e0.y); 100 | float d_xy_e1 = (-1.0f * dot(n_xy_e1, make_float2(v1.x, v1.y))) + max(0.0f, info.unit.x * n_xy_e1.x) + max(0.0f, info.unit.y * n_xy_e1.y); 101 | float d_xy_e2 = (-1.0f * dot(n_xy_e2, make_float2(v2.x, v2.y))) + max(0.0f, info.unit.x * n_xy_e2.x) + max(0.0f, info.unit.y * n_xy_e2.y); 102 | // YZ plane 103 | float2 n_yz_e0 = make_float2(-1.0f * e0.z, e0.y); 104 | float2 n_yz_e1 = make_float2(-1.0f * e1.z, e1.y); 105 | float2 n_yz_e2 = make_float2(-1.0f * e2.z, e2.y); 106 | if (n.x < 0.0f) { 107 | n_yz_e0 = -n_yz_e0; 108 | n_yz_e1 = -n_yz_e1; 109 | n_yz_e2 = -n_yz_e2; 110 | } 111 | float d_yz_e0 = (-1.0f * dot(n_yz_e0, make_float2(v0.y, v0.z))) + max(0.0f, info.unit.y * n_yz_e0.x) + max(0.0f, info.unit.z * n_yz_e0.y); 112 | float d_yz_e1 = (-1.0f * dot(n_yz_e1, make_float2(v1.y, v1.z))) + max(0.0f, info.unit.y * n_yz_e1.x) + max(0.0f, info.unit.z * n_yz_e1.y); 113 | float d_yz_e2 = (-1.0f * dot(n_yz_e2, make_float2(v2.y, v2.z))) + max(0.0f, info.unit.y * n_yz_e2.x) + max(0.0f, info.unit.z * n_yz_e2.y); 114 | // ZX plane 115 | float2 n_zx_e0 = make_float2(-1.0f * e0.x, e0.z); 116 | float2 n_zx_e1 = make_float2(-1.0f * e1.x, e1.z); 117 | float2 n_zx_e2 = make_float2(-1.0f * e2.x, e2.z); 118 | if (n.y < 0.0f) { 119 | n_zx_e0 = -n_zx_e0; 120 | n_zx_e1 = -n_zx_e1; 121 | n_zx_e2 = -n_zx_e2; 122 | } 123 | float d_xz_e0 = (-1.0f * dot(n_zx_e0, make_float2(v0.z, v0.x))) + max(0.0f, info.unit.x * n_zx_e0.x) + max(0.0f, info.unit.z * n_zx_e0.y); 124 | float d_xz_e1 = (-1.0f * dot(n_zx_e1, make_float2(v1.z, v1.x))) + max(0.0f, info.unit.x * n_zx_e1.x) + max(0.0f, info.unit.z * n_zx_e1.y); 125 | float d_xz_e2 = (-1.0f * dot(n_zx_e2, make_float2(v2.z, v2.x))) + max(0.0f, info.unit.x * n_zx_e2.x) + max(0.0f, info.unit.z * n_zx_e2.y); 126 | 127 | // test possible grid boxes for overlap 128 | for (int z = t_bbox_grid.min.z; z <= t_bbox_grid.max.z; z++) { 129 | for (int y = t_bbox_grid.min.y; y <= t_bbox_grid.max.y; y++) { 130 | for (int x = t_bbox_grid.min.x; x <= t_bbox_grid.max.x; x++) { 131 | // size_t location = x + (y*info.gridsize) + (z*info.gridsize*info.gridsize); 132 | // if (checkBit(voxel_table, location)){ continue; } 133 | #ifdef _DEBUG 134 | debug_n_voxels_tested++; 135 | #endif 136 | 137 | // TRIANGLE PLANE THROUGH BOX TEST 138 | float3 p = make_float3(x * info.unit.x, y * info.unit.y, z * info.unit.z); 139 | float nDOTp = dot(n, p); 140 | if (((nDOTp + d1) * (nDOTp + d2)) > 0.0f) { continue; } 141 | 142 | // PROJECTION TESTS 143 | // XY 144 | float2 p_xy = make_float2(p.x, p.y); 145 | if ((dot(n_xy_e0, p_xy) + d_xy_e0) < 0.0f) { continue; } 146 | if ((dot(n_xy_e1, p_xy) + d_xy_e1) < 0.0f) { continue; } 147 | if ((dot(n_xy_e2, p_xy) + d_xy_e2) < 0.0f) { continue; } 148 | 149 | // YZ 150 | float2 p_yz = make_float2(p.y, p.z); 151 | if ((dot(n_yz_e0, p_yz) + d_yz_e0) < 0.0f) { continue; } 152 | if ((dot(n_yz_e1, p_yz) + d_yz_e1) < 0.0f) { continue; } 153 | if ((dot(n_yz_e2, p_yz) + d_yz_e2) < 0.0f) { continue; } 154 | 155 | // XZ 156 | float2 p_zx = make_float2(p.z, p.x); 157 | if ((dot(n_zx_e0, p_zx) + d_xz_e0) < 0.0f) { continue; } 158 | if ((dot(n_zx_e1, p_zx) + d_xz_e1) < 0.0f) { continue; } 159 | if ((dot(n_zx_e2, p_zx) + d_xz_e2) < 0.0f) { continue; } 160 | #ifdef _DEBUG 161 | debug_n_voxels_marked += 1; 162 | #endif 163 | if (morton_order) { 164 | size_t location = mortonEncode_LUT(x, y, z); 165 | setBit(voxel_table, location); 166 | } 167 | else { 168 | size_t location = static_cast(x) + (static_cast(y)* static_cast(info.gridsize.y)) + (static_cast(z)* static_cast(info.gridsize.y)* static_cast(info.gridsize.z)); 169 | //std:: cout << "Voxel found at " << x << " " << y << " " << z << std::endl; 170 | setBit(voxel_table, location); 171 | } 172 | continue; 173 | } 174 | } 175 | } 176 | } 177 | cpu_voxelization_timer.stop(); std::fprintf(stdout, "[Perf] CPU voxelization time: %.1f ms \n", cpu_voxelization_timer.elapsed_time_milliseconds); 178 | #ifdef _DEBUG 179 | printf("[Debug] Processed %llu triangles on the CPU \n", debug_n_triangles); 180 | printf("[Debug] Tested %llu voxels for overlap on CPU \n", debug_n_voxels_tested); 181 | printf("[Debug] Marked %llu voxels as filled (includes duplicates!) on CPU \n", debug_n_voxels_marked); 182 | #endif 183 | } 184 | 185 | // use Xor for voxels whose corresponding bits have to flipped 186 | void setBitXor(unsigned int* voxel_table, size_t index) { 187 | size_t int_location = index / size_t(32); 188 | unsigned int bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR 189 | unsigned int mask = 1 << bit_pos; 190 | #pragma omp critical 191 | { 192 | voxel_table[int_location] = (voxel_table[int_location] ^ mask); 193 | } 194 | } 195 | 196 | bool TopLeftEdge(float2 v0, float2 v1) { 197 | return ((v1.y < v0.y) || (v1.y == v0.y && v0.x > v1.x)); 198 | } 199 | 200 | //check the triangle is counterclockwise or not 201 | bool checkCCW(float2 v0, float2 v1, float2 v2) { 202 | float2 e0 = v1 - v0; 203 | float2 e1 = v2 - v0; 204 | float result = e0.x * e1.y - e1.x * e0.y; 205 | if (result > 0) 206 | return true; 207 | else 208 | return false; 209 | } 210 | 211 | //find the x coordinate of the voxel 212 | float get_x_coordinate(float3 n, float3 v0, float2 point) { 213 | return (-(n.y * (point.x - v0.y) + n.z * (point.y - v0.z)) / n.x + v0.x); 214 | } 215 | 216 | //check the location with point and triangle 217 | int check_point_triangle(float2 v0, float2 v1, float2 v2, float2 point) { 218 | float2 PA = point - v0; 219 | float2 PB = point - v1; 220 | float2 PC = point - v2; 221 | 222 | float t1 = PA.x * PB.y - PA.y * PB.x; 223 | if (std::fabs(t1) < float_error && PA.x * PB.x <= 0 && PA.y * PB.y <= 0) 224 | return 1; 225 | 226 | float t2 = PB.x * PC.y - PB.y * PC.x; 227 | if (std::fabs(t2) < float_error && PB.x * PC.x <= 0 && PB.y * PC.y <= 0) 228 | return 2; 229 | 230 | float t3 = PC.x * PA.y - PC.y * PA.x; 231 | if (std::fabs(t3) < float_error && PC.x * PA.x <= 0 && PC.y * PA.y <= 0) 232 | return 3; 233 | 234 | if (t1 * t2 > 0 && t1 * t3 > 0) 235 | return 0; 236 | else 237 | return -1; 238 | } 239 | 240 | // Mesh voxelization method 241 | void cpu_voxelize_mesh_solid(voxinfo info, trimesh::TriMesh* themesh, unsigned int* voxel_table, bool morton_order) { 242 | Timer cpu_voxelization_timer; cpu_voxelization_timer.start(); 243 | 244 | // PREPASS 245 | // Move all vertices to origin (can be done in parallel) 246 | trimesh::vec3 move_min = float3_to_trimesh(info.bbox.min); 247 | #pragma omp parallel for 248 | for (int64_t i = 0; i < (int64_t) themesh->vertices.size(); i++) { 249 | if (i == 0) { printf("[Info] Using %d threads \n", omp_get_num_threads()); } 250 | themesh->vertices[i] = themesh->vertices[i] - move_min; 251 | } 252 | 253 | #pragma omp parallel for 254 | for (int64_t i = 0; i < (int64_t) info.n_triangles; i++) { 255 | // Triangle vertices 256 | float3 v0 = trimesh_to_float3(themesh->vertices[themesh->faces[i][0]]); 257 | float3 v1 = trimesh_to_float3(themesh->vertices[themesh->faces[i][1]]); 258 | float3 v2 = trimesh_to_float3(themesh->vertices[themesh->faces[i][2]]); 259 | // Edge vectors 260 | float3 e0 = v1 - v0; 261 | float3 e1 = v2 - v1; 262 | float3 e2 = v0 - v2; 263 | // Normal vector pointing up from the triangle 264 | float3 n = normalize(cross(e0, e1)); 265 | if (std::fabs(n.x) < float_error) {continue;} 266 | 267 | // Calculate the projection of three point into yoz plane 268 | float2 v0_yz = make_float2(v0.y, v0.z); 269 | float2 v1_yz = make_float2(v1.y, v1.z); 270 | float2 v2_yz = make_float2(v2.y, v2.z); 271 | 272 | // Set the triangle counterclockwise 273 | if (!checkCCW(v0_yz, v1_yz, v2_yz)) 274 | { 275 | float2 v3 = v1_yz; 276 | v1_yz = v2_yz; 277 | v2_yz = v3; 278 | } 279 | 280 | // COMPUTE TRIANGLE BBOX IN GRID 281 | // Triangle bounding box in world coordinates is min(v0,v1,v2) and max(v0,v1,v2) 282 | float2 bbox_max = fmaxf(v0_yz, fmaxf(v1_yz, v2_yz)); 283 | float2 bbox_min = fminf(v0_yz, fminf(v1_yz, v2_yz)); 284 | 285 | float2 bbox_max_grid = make_float2(floor(bbox_max.x / info.unit.y - 0.5f), floor(bbox_max.y / info.unit.z - 0.5f)); 286 | float2 bbox_min_grid = make_float2(ceil(bbox_min.x / info.unit.y - 0.5f), ceil(bbox_min.y / info.unit.z - 0.5f)); 287 | 288 | for (int y = static_cast(bbox_min_grid.x); y <= bbox_max_grid.x; y++) 289 | { 290 | for (int z = static_cast(bbox_min_grid.y); z <= bbox_max_grid.y; z++) 291 | { 292 | float2 point = make_float2((y + 0.5f) * info.unit.y, (z + 0.5f) * info.unit.z); 293 | int checknum = check_point_triangle(v0_yz, v1_yz, v2_yz, point); 294 | if ((checknum == 1 && TopLeftEdge(v0_yz, v1_yz)) || (checknum == 2 && TopLeftEdge(v1_yz, v2_yz)) || (checknum == 3 && TopLeftEdge(v2_yz, v0_yz)) || (checknum == 0)) 295 | { 296 | unsigned int xmax = int(get_x_coordinate(n, v0, point) / info.unit.x - 0.5); 297 | for (unsigned int x = 0; x <= xmax; x++) 298 | { 299 | if (morton_order) { 300 | size_t location = mortonEncode_LUT(x, y, z); 301 | setBitXor(voxel_table, location); 302 | } 303 | else { 304 | size_t location = static_cast(x) + (static_cast(y) * static_cast(info.gridsize.y)) + (static_cast(z) * static_cast(info.gridsize.y) * static_cast(info.gridsize.z)); 305 | setBitXor(voxel_table, location); 306 | } 307 | continue; 308 | } 309 | } 310 | } 311 | } 312 | } 313 | cpu_voxelization_timer.stop(); fprintf(stdout, "[Perf] CPU voxelization time: %.1f ms \n", cpu_voxelization_timer.elapsed_time_milliseconds); 314 | } 315 | } -------------------------------------------------------------------------------- /src/cpu_voxelizer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "libs/cuda/helper_math.h" 8 | #include "util.h" 9 | #include "timer.h" 10 | #include "morton_LUTs.h" 11 | 12 | namespace cpu_voxelizer { 13 | void cpu_voxelize_mesh(voxinfo info, trimesh::TriMesh* themesh, unsigned int* voxel_table, bool morton_order); 14 | void cpu_voxelize_mesh_solid(voxinfo info, trimesh::TriMesh* themesh, unsigned int* voxel_table, bool morton_order); 15 | } -------------------------------------------------------------------------------- /src/libs/cuda/helper_cuda.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 1993-2017 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | //////////////////////////////////////////////////////////////////////////////// 13 | // These are CUDA Helper functions for initialization and error checking 14 | 15 | #ifndef COMMON_HELPER_CUDA_H_ 16 | #define COMMON_HELPER_CUDA_H_ 17 | 18 | #pragma once 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "helper_string.h" 26 | 27 | #ifndef EXIT_WAIVED 28 | #define EXIT_WAIVED 2 29 | #endif 30 | 31 | // Note, it is required that your SDK sample to include the proper header 32 | // files, please refer the CUDA examples for examples of the needed CUDA 33 | // headers, which may change depending on which CUDA functions are used. 34 | 35 | // CUDA Runtime error messages 36 | #ifdef __DRIVER_TYPES_H__ 37 | static const char *_cudaGetErrorEnum(cudaError_t error) { 38 | return cudaGetErrorName(error); 39 | } 40 | #endif 41 | 42 | #ifdef CUDA_DRIVER_API 43 | // CUDA Driver API errors 44 | static const char *_cudaGetErrorEnum(CUresult error) { 45 | static char unknown[] = ""; 46 | const char *ret = NULL; 47 | cuGetErrorName(error, &ret); 48 | return ret ? ret : unknown; 49 | } 50 | #endif 51 | 52 | #ifdef CUBLAS_API_H_ 53 | // cuBLAS API errors 54 | static const char *_cudaGetErrorEnum(cublasStatus_t error) { 55 | switch (error) { 56 | case CUBLAS_STATUS_SUCCESS: 57 | return "CUBLAS_STATUS_SUCCESS"; 58 | 59 | case CUBLAS_STATUS_NOT_INITIALIZED: 60 | return "CUBLAS_STATUS_NOT_INITIALIZED"; 61 | 62 | case CUBLAS_STATUS_ALLOC_FAILED: 63 | return "CUBLAS_STATUS_ALLOC_FAILED"; 64 | 65 | case CUBLAS_STATUS_INVALID_VALUE: 66 | return "CUBLAS_STATUS_INVALID_VALUE"; 67 | 68 | case CUBLAS_STATUS_ARCH_MISMATCH: 69 | return "CUBLAS_STATUS_ARCH_MISMATCH"; 70 | 71 | case CUBLAS_STATUS_MAPPING_ERROR: 72 | return "CUBLAS_STATUS_MAPPING_ERROR"; 73 | 74 | case CUBLAS_STATUS_EXECUTION_FAILED: 75 | return "CUBLAS_STATUS_EXECUTION_FAILED"; 76 | 77 | case CUBLAS_STATUS_INTERNAL_ERROR: 78 | return "CUBLAS_STATUS_INTERNAL_ERROR"; 79 | 80 | case CUBLAS_STATUS_NOT_SUPPORTED: 81 | return "CUBLAS_STATUS_NOT_SUPPORTED"; 82 | 83 | case CUBLAS_STATUS_LICENSE_ERROR: 84 | return "CUBLAS_STATUS_LICENSE_ERROR"; 85 | } 86 | 87 | return ""; 88 | } 89 | #endif 90 | 91 | #ifdef _CUFFT_H_ 92 | // cuFFT API errors 93 | static const char *_cudaGetErrorEnum(cufftResult error) { 94 | switch (error) { 95 | case CUFFT_SUCCESS: 96 | return "CUFFT_SUCCESS"; 97 | 98 | case CUFFT_INVALID_PLAN: 99 | return "CUFFT_INVALID_PLAN"; 100 | 101 | case CUFFT_ALLOC_FAILED: 102 | return "CUFFT_ALLOC_FAILED"; 103 | 104 | case CUFFT_INVALID_TYPE: 105 | return "CUFFT_INVALID_TYPE"; 106 | 107 | case CUFFT_INVALID_VALUE: 108 | return "CUFFT_INVALID_VALUE"; 109 | 110 | case CUFFT_INTERNAL_ERROR: 111 | return "CUFFT_INTERNAL_ERROR"; 112 | 113 | case CUFFT_EXEC_FAILED: 114 | return "CUFFT_EXEC_FAILED"; 115 | 116 | case CUFFT_SETUP_FAILED: 117 | return "CUFFT_SETUP_FAILED"; 118 | 119 | case CUFFT_INVALID_SIZE: 120 | return "CUFFT_INVALID_SIZE"; 121 | 122 | case CUFFT_UNALIGNED_DATA: 123 | return "CUFFT_UNALIGNED_DATA"; 124 | 125 | case CUFFT_INCOMPLETE_PARAMETER_LIST: 126 | return "CUFFT_INCOMPLETE_PARAMETER_LIST"; 127 | 128 | case CUFFT_INVALID_DEVICE: 129 | return "CUFFT_INVALID_DEVICE"; 130 | 131 | case CUFFT_PARSE_ERROR: 132 | return "CUFFT_PARSE_ERROR"; 133 | 134 | case CUFFT_NO_WORKSPACE: 135 | return "CUFFT_NO_WORKSPACE"; 136 | 137 | case CUFFT_NOT_IMPLEMENTED: 138 | return "CUFFT_NOT_IMPLEMENTED"; 139 | 140 | case CUFFT_LICENSE_ERROR: 141 | return "CUFFT_LICENSE_ERROR"; 142 | 143 | case CUFFT_NOT_SUPPORTED: 144 | return "CUFFT_NOT_SUPPORTED"; 145 | } 146 | 147 | return ""; 148 | } 149 | #endif 150 | 151 | #ifdef CUSPARSEAPI 152 | // cuSPARSE API errors 153 | static const char *_cudaGetErrorEnum(cusparseStatus_t error) { 154 | switch (error) { 155 | case CUSPARSE_STATUS_SUCCESS: 156 | return "CUSPARSE_STATUS_SUCCESS"; 157 | 158 | case CUSPARSE_STATUS_NOT_INITIALIZED: 159 | return "CUSPARSE_STATUS_NOT_INITIALIZED"; 160 | 161 | case CUSPARSE_STATUS_ALLOC_FAILED: 162 | return "CUSPARSE_STATUS_ALLOC_FAILED"; 163 | 164 | case CUSPARSE_STATUS_INVALID_VALUE: 165 | return "CUSPARSE_STATUS_INVALID_VALUE"; 166 | 167 | case CUSPARSE_STATUS_ARCH_MISMATCH: 168 | return "CUSPARSE_STATUS_ARCH_MISMATCH"; 169 | 170 | case CUSPARSE_STATUS_MAPPING_ERROR: 171 | return "CUSPARSE_STATUS_MAPPING_ERROR"; 172 | 173 | case CUSPARSE_STATUS_EXECUTION_FAILED: 174 | return "CUSPARSE_STATUS_EXECUTION_FAILED"; 175 | 176 | case CUSPARSE_STATUS_INTERNAL_ERROR: 177 | return "CUSPARSE_STATUS_INTERNAL_ERROR"; 178 | 179 | case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: 180 | return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; 181 | } 182 | 183 | return ""; 184 | } 185 | #endif 186 | 187 | #ifdef CUSOLVER_COMMON_H_ 188 | // cuSOLVER API errors 189 | static const char *_cudaGetErrorEnum(cusolverStatus_t error) { 190 | switch (error) { 191 | case CUSOLVER_STATUS_SUCCESS: 192 | return "CUSOLVER_STATUS_SUCCESS"; 193 | case CUSOLVER_STATUS_NOT_INITIALIZED: 194 | return "CUSOLVER_STATUS_NOT_INITIALIZED"; 195 | case CUSOLVER_STATUS_ALLOC_FAILED: 196 | return "CUSOLVER_STATUS_ALLOC_FAILED"; 197 | case CUSOLVER_STATUS_INVALID_VALUE: 198 | return "CUSOLVER_STATUS_INVALID_VALUE"; 199 | case CUSOLVER_STATUS_ARCH_MISMATCH: 200 | return "CUSOLVER_STATUS_ARCH_MISMATCH"; 201 | case CUSOLVER_STATUS_MAPPING_ERROR: 202 | return "CUSOLVER_STATUS_MAPPING_ERROR"; 203 | case CUSOLVER_STATUS_EXECUTION_FAILED: 204 | return "CUSOLVER_STATUS_EXECUTION_FAILED"; 205 | case CUSOLVER_STATUS_INTERNAL_ERROR: 206 | return "CUSOLVER_STATUS_INTERNAL_ERROR"; 207 | case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: 208 | return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; 209 | case CUSOLVER_STATUS_NOT_SUPPORTED: 210 | return "CUSOLVER_STATUS_NOT_SUPPORTED "; 211 | case CUSOLVER_STATUS_ZERO_PIVOT: 212 | return "CUSOLVER_STATUS_ZERO_PIVOT"; 213 | case CUSOLVER_STATUS_INVALID_LICENSE: 214 | return "CUSOLVER_STATUS_INVALID_LICENSE"; 215 | } 216 | 217 | return ""; 218 | } 219 | #endif 220 | 221 | #ifdef CURAND_H_ 222 | // cuRAND API errors 223 | static const char *_cudaGetErrorEnum(curandStatus_t error) { 224 | switch (error) { 225 | case CURAND_STATUS_SUCCESS: 226 | return "CURAND_STATUS_SUCCESS"; 227 | 228 | case CURAND_STATUS_VERSION_MISMATCH: 229 | return "CURAND_STATUS_VERSION_MISMATCH"; 230 | 231 | case CURAND_STATUS_NOT_INITIALIZED: 232 | return "CURAND_STATUS_NOT_INITIALIZED"; 233 | 234 | case CURAND_STATUS_ALLOCATION_FAILED: 235 | return "CURAND_STATUS_ALLOCATION_FAILED"; 236 | 237 | case CURAND_STATUS_TYPE_ERROR: 238 | return "CURAND_STATUS_TYPE_ERROR"; 239 | 240 | case CURAND_STATUS_OUT_OF_RANGE: 241 | return "CURAND_STATUS_OUT_OF_RANGE"; 242 | 243 | case CURAND_STATUS_LENGTH_NOT_MULTIPLE: 244 | return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; 245 | 246 | case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: 247 | return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; 248 | 249 | case CURAND_STATUS_LAUNCH_FAILURE: 250 | return "CURAND_STATUS_LAUNCH_FAILURE"; 251 | 252 | case CURAND_STATUS_PREEXISTING_FAILURE: 253 | return "CURAND_STATUS_PREEXISTING_FAILURE"; 254 | 255 | case CURAND_STATUS_INITIALIZATION_FAILED: 256 | return "CURAND_STATUS_INITIALIZATION_FAILED"; 257 | 258 | case CURAND_STATUS_ARCH_MISMATCH: 259 | return "CURAND_STATUS_ARCH_MISMATCH"; 260 | 261 | case CURAND_STATUS_INTERNAL_ERROR: 262 | return "CURAND_STATUS_INTERNAL_ERROR"; 263 | } 264 | 265 | return ""; 266 | } 267 | #endif 268 | 269 | #ifdef NVJPEGAPI 270 | // nvJPEG API errors 271 | static const char *_cudaGetErrorEnum(nvjpegStatus_t error) { 272 | switch (error) { 273 | case NVJPEG_STATUS_SUCCESS: 274 | return "NVJPEG_STATUS_SUCCESS"; 275 | 276 | case NVJPEG_STATUS_NOT_INITIALIZED: 277 | return "NVJPEG_STATUS_NOT_INITIALIZED"; 278 | 279 | case NVJPEG_STATUS_INVALID_PARAMETER: 280 | return "NVJPEG_STATUS_INVALID_PARAMETER"; 281 | 282 | case NVJPEG_STATUS_BAD_JPEG: 283 | return "NVJPEG_STATUS_BAD_JPEG"; 284 | 285 | case NVJPEG_STATUS_JPEG_NOT_SUPPORTED: 286 | return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED"; 287 | 288 | case NVJPEG_STATUS_ALLOCATOR_FAILURE: 289 | return "NVJPEG_STATUS_ALLOCATOR_FAILURE"; 290 | 291 | case NVJPEG_STATUS_EXECUTION_FAILED: 292 | return "NVJPEG_STATUS_EXECUTION_FAILED"; 293 | 294 | case NVJPEG_STATUS_ARCH_MISMATCH: 295 | return "NVJPEG_STATUS_ARCH_MISMATCH"; 296 | 297 | case NVJPEG_STATUS_INTERNAL_ERROR: 298 | return "NVJPEG_STATUS_INTERNAL_ERROR"; 299 | } 300 | 301 | return ""; 302 | } 303 | #endif 304 | 305 | #ifdef NV_NPPIDEFS_H 306 | // NPP API errors 307 | static const char *_cudaGetErrorEnum(NppStatus error) { 308 | switch (error) { 309 | case NPP_NOT_SUPPORTED_MODE_ERROR: 310 | return "NPP_NOT_SUPPORTED_MODE_ERROR"; 311 | 312 | case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: 313 | return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; 314 | 315 | case NPP_RESIZE_NO_OPERATION_ERROR: 316 | return "NPP_RESIZE_NO_OPERATION_ERROR"; 317 | 318 | case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: 319 | return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; 320 | 321 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 322 | 323 | case NPP_BAD_ARG_ERROR: 324 | return "NPP_BAD_ARGUMENT_ERROR"; 325 | 326 | case NPP_COEFF_ERROR: 327 | return "NPP_COEFFICIENT_ERROR"; 328 | 329 | case NPP_RECT_ERROR: 330 | return "NPP_RECTANGLE_ERROR"; 331 | 332 | case NPP_QUAD_ERROR: 333 | return "NPP_QUADRANGLE_ERROR"; 334 | 335 | case NPP_MEM_ALLOC_ERR: 336 | return "NPP_MEMORY_ALLOCATION_ERROR"; 337 | 338 | case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: 339 | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; 340 | 341 | case NPP_INVALID_INPUT: 342 | return "NPP_INVALID_INPUT"; 343 | 344 | case NPP_POINTER_ERROR: 345 | return "NPP_POINTER_ERROR"; 346 | 347 | case NPP_WARNING: 348 | return "NPP_WARNING"; 349 | 350 | case NPP_ODD_ROI_WARNING: 351 | return "NPP_ODD_ROI_WARNING"; 352 | #else 353 | 354 | // These are for CUDA 5.5 or higher 355 | case NPP_BAD_ARGUMENT_ERROR: 356 | return "NPP_BAD_ARGUMENT_ERROR"; 357 | 358 | case NPP_COEFFICIENT_ERROR: 359 | return "NPP_COEFFICIENT_ERROR"; 360 | 361 | case NPP_RECTANGLE_ERROR: 362 | return "NPP_RECTANGLE_ERROR"; 363 | 364 | case NPP_QUADRANGLE_ERROR: 365 | return "NPP_QUADRANGLE_ERROR"; 366 | 367 | case NPP_MEMORY_ALLOCATION_ERR: 368 | return "NPP_MEMORY_ALLOCATION_ERROR"; 369 | 370 | case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: 371 | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; 372 | 373 | case NPP_INVALID_HOST_POINTER_ERROR: 374 | return "NPP_INVALID_HOST_POINTER_ERROR"; 375 | 376 | case NPP_INVALID_DEVICE_POINTER_ERROR: 377 | return "NPP_INVALID_DEVICE_POINTER_ERROR"; 378 | #endif 379 | 380 | case NPP_LUT_NUMBER_OF_LEVELS_ERROR: 381 | return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; 382 | 383 | case NPP_TEXTURE_BIND_ERROR: 384 | return "NPP_TEXTURE_BIND_ERROR"; 385 | 386 | case NPP_WRONG_INTERSECTION_ROI_ERROR: 387 | return "NPP_WRONG_INTERSECTION_ROI_ERROR"; 388 | 389 | case NPP_NOT_EVEN_STEP_ERROR: 390 | return "NPP_NOT_EVEN_STEP_ERROR"; 391 | 392 | case NPP_INTERPOLATION_ERROR: 393 | return "NPP_INTERPOLATION_ERROR"; 394 | 395 | case NPP_RESIZE_FACTOR_ERROR: 396 | return "NPP_RESIZE_FACTOR_ERROR"; 397 | 398 | case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: 399 | return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; 400 | 401 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 402 | 403 | case NPP_MEMFREE_ERR: 404 | return "NPP_MEMFREE_ERR"; 405 | 406 | case NPP_MEMSET_ERR: 407 | return "NPP_MEMSET_ERR"; 408 | 409 | case NPP_MEMCPY_ERR: 410 | return "NPP_MEMCPY_ERROR"; 411 | 412 | case NPP_MIRROR_FLIP_ERR: 413 | return "NPP_MIRROR_FLIP_ERR"; 414 | #else 415 | 416 | case NPP_MEMFREE_ERROR: 417 | return "NPP_MEMFREE_ERROR"; 418 | 419 | case NPP_MEMSET_ERROR: 420 | return "NPP_MEMSET_ERROR"; 421 | 422 | case NPP_MEMCPY_ERROR: 423 | return "NPP_MEMCPY_ERROR"; 424 | 425 | case NPP_MIRROR_FLIP_ERROR: 426 | return "NPP_MIRROR_FLIP_ERROR"; 427 | #endif 428 | 429 | case NPP_ALIGNMENT_ERROR: 430 | return "NPP_ALIGNMENT_ERROR"; 431 | 432 | case NPP_STEP_ERROR: 433 | return "NPP_STEP_ERROR"; 434 | 435 | case NPP_SIZE_ERROR: 436 | return "NPP_SIZE_ERROR"; 437 | 438 | case NPP_NULL_POINTER_ERROR: 439 | return "NPP_NULL_POINTER_ERROR"; 440 | 441 | case NPP_CUDA_KERNEL_EXECUTION_ERROR: 442 | return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; 443 | 444 | case NPP_NOT_IMPLEMENTED_ERROR: 445 | return "NPP_NOT_IMPLEMENTED_ERROR"; 446 | 447 | case NPP_ERROR: 448 | return "NPP_ERROR"; 449 | 450 | case NPP_SUCCESS: 451 | return "NPP_SUCCESS"; 452 | 453 | case NPP_WRONG_INTERSECTION_QUAD_WARNING: 454 | return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; 455 | 456 | case NPP_MISALIGNED_DST_ROI_WARNING: 457 | return "NPP_MISALIGNED_DST_ROI_WARNING"; 458 | 459 | case NPP_AFFINE_QUAD_INCORRECT_WARNING: 460 | return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; 461 | 462 | case NPP_DOUBLE_SIZE_WARNING: 463 | return "NPP_DOUBLE_SIZE_WARNING"; 464 | 465 | case NPP_WRONG_INTERSECTION_ROI_WARNING: 466 | return "NPP_WRONG_INTERSECTION_ROI_WARNING"; 467 | 468 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000 469 | /* These are 6.0 or higher */ 470 | case NPP_LUT_PALETTE_BITSIZE_ERROR: 471 | return "NPP_LUT_PALETTE_BITSIZE_ERROR"; 472 | 473 | case NPP_ZC_MODE_NOT_SUPPORTED_ERROR: 474 | return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR"; 475 | 476 | case NPP_QUALITY_INDEX_ERROR: 477 | return "NPP_QUALITY_INDEX_ERROR"; 478 | 479 | case NPP_CHANNEL_ORDER_ERROR: 480 | return "NPP_CHANNEL_ORDER_ERROR"; 481 | 482 | case NPP_ZERO_MASK_VALUE_ERROR: 483 | return "NPP_ZERO_MASK_VALUE_ERROR"; 484 | 485 | case NPP_NUMBER_OF_CHANNELS_ERROR: 486 | return "NPP_NUMBER_OF_CHANNELS_ERROR"; 487 | 488 | case NPP_COI_ERROR: 489 | return "NPP_COI_ERROR"; 490 | 491 | case NPP_DIVISOR_ERROR: 492 | return "NPP_DIVISOR_ERROR"; 493 | 494 | case NPP_CHANNEL_ERROR: 495 | return "NPP_CHANNEL_ERROR"; 496 | 497 | case NPP_STRIDE_ERROR: 498 | return "NPP_STRIDE_ERROR"; 499 | 500 | case NPP_ANCHOR_ERROR: 501 | return "NPP_ANCHOR_ERROR"; 502 | 503 | case NPP_MASK_SIZE_ERROR: 504 | return "NPP_MASK_SIZE_ERROR"; 505 | 506 | case NPP_MOMENT_00_ZERO_ERROR: 507 | return "NPP_MOMENT_00_ZERO_ERROR"; 508 | 509 | case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR: 510 | return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"; 511 | 512 | case NPP_THRESHOLD_ERROR: 513 | return "NPP_THRESHOLD_ERROR"; 514 | 515 | case NPP_CONTEXT_MATCH_ERROR: 516 | return "NPP_CONTEXT_MATCH_ERROR"; 517 | 518 | case NPP_FFT_FLAG_ERROR: 519 | return "NPP_FFT_FLAG_ERROR"; 520 | 521 | case NPP_FFT_ORDER_ERROR: 522 | return "NPP_FFT_ORDER_ERROR"; 523 | 524 | case NPP_SCALE_RANGE_ERROR: 525 | return "NPP_SCALE_RANGE_ERROR"; 526 | 527 | case NPP_DATA_TYPE_ERROR: 528 | return "NPP_DATA_TYPE_ERROR"; 529 | 530 | case NPP_OUT_OFF_RANGE_ERROR: 531 | return "NPP_OUT_OFF_RANGE_ERROR"; 532 | 533 | case NPP_DIVIDE_BY_ZERO_ERROR: 534 | return "NPP_DIVIDE_BY_ZERO_ERROR"; 535 | 536 | case NPP_RANGE_ERROR: 537 | return "NPP_RANGE_ERROR"; 538 | 539 | case NPP_NO_MEMORY_ERROR: 540 | return "NPP_NO_MEMORY_ERROR"; 541 | 542 | case NPP_ERROR_RESERVED: 543 | return "NPP_ERROR_RESERVED"; 544 | 545 | case NPP_NO_OPERATION_WARNING: 546 | return "NPP_NO_OPERATION_WARNING"; 547 | 548 | case NPP_DIVIDE_BY_ZERO_WARNING: 549 | return "NPP_DIVIDE_BY_ZERO_WARNING"; 550 | #endif 551 | 552 | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000 553 | /* These are 7.0 or higher */ 554 | case NPP_OVERFLOW_ERROR: 555 | return "NPP_OVERFLOW_ERROR"; 556 | 557 | case NPP_CORRUPTED_DATA_ERROR: 558 | return "NPP_CORRUPTED_DATA_ERROR"; 559 | #endif 560 | } 561 | 562 | return ""; 563 | } 564 | #endif 565 | 566 | template 567 | void check(T result, char const *const func, const char *const file, 568 | int const line) { 569 | if (result) { 570 | fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line, 571 | static_cast(result), _cudaGetErrorEnum(result), func); 572 | exit(EXIT_FAILURE); 573 | } 574 | } 575 | 576 | #ifdef __DRIVER_TYPES_H__ 577 | // This will output the proper CUDA error strings in the event 578 | // that a CUDA host call returns an error 579 | #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) 580 | 581 | // This will output the proper error string when calling cudaGetLastError 582 | #define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__) 583 | 584 | inline void __getLastCudaError(const char *errorMessage, const char *file, 585 | const int line) { 586 | cudaError_t err = cudaGetLastError(); 587 | 588 | if (cudaSuccess != err) { 589 | fprintf(stderr, 590 | "%s(%i) : getLastCudaError() CUDA error :" 591 | " %s : (%d) %s.\n", 592 | file, line, errorMessage, static_cast(err), 593 | cudaGetErrorString(err)); 594 | exit(EXIT_FAILURE); 595 | } 596 | } 597 | 598 | // This will only print the proper error string when calling cudaGetLastError 599 | // but not exit program incase error detected. 600 | #define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__) 601 | 602 | inline void __printLastCudaError(const char *errorMessage, const char *file, 603 | const int line) { 604 | cudaError_t err = cudaGetLastError(); 605 | 606 | if (cudaSuccess != err) { 607 | fprintf(stderr, 608 | "%s(%i) : getLastCudaError() CUDA error :" 609 | " %s : (%d) %s.\n", 610 | file, line, errorMessage, static_cast(err), 611 | cudaGetErrorString(err)); 612 | } 613 | } 614 | #endif 615 | 616 | #ifndef MAX 617 | #define MAX(a, b) (a > b ? a : b) 618 | #endif 619 | 620 | // Float To Int conversion 621 | inline int ftoi(float value) { 622 | return (value >= 0 ? static_cast(value + 0.5) 623 | : static_cast(value - 0.5)); 624 | } 625 | 626 | // Beginning of GPU Architecture definitions 627 | inline int _ConvertSMVer2Cores(int major, int minor) { 628 | // Defines for GPU Architecture types (using the SM version to determine 629 | // the # of cores per SM 630 | typedef struct { 631 | int SM; // 0xMm (hexidecimal notation), M = SM Major version, 632 | // and m = SM minor version 633 | int Cores; 634 | } sSMtoCores; 635 | 636 | sSMtoCores nGpuArchCoresPerSM[] = { 637 | {0x30, 192}, 638 | {0x32, 192}, 639 | {0x35, 192}, 640 | {0x37, 192}, 641 | {0x50, 128}, 642 | {0x52, 128}, 643 | {0x53, 128}, 644 | {0x60, 64}, 645 | {0x61, 128}, 646 | {0x62, 128}, 647 | {0x70, 64}, 648 | {0x72, 64}, 649 | {0x75, 64}, 650 | {0x80, 64}, 651 | {0x86, 128}, 652 | {0x87, 128}, 653 | {-1, -1}}; 654 | 655 | int index = 0; 656 | 657 | while (nGpuArchCoresPerSM[index].SM != -1) { 658 | if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) { 659 | return nGpuArchCoresPerSM[index].Cores; 660 | } 661 | 662 | index++; 663 | } 664 | 665 | // If we don't find the values, we default use the previous one 666 | // to run properly 667 | printf( 668 | "MapSMtoCores for SM %d.%d is undefined." 669 | " Default to use %d Cores/SM\n", 670 | major, minor, nGpuArchCoresPerSM[index - 1].Cores); 671 | return nGpuArchCoresPerSM[index - 1].Cores; 672 | } 673 | 674 | inline const char* _ConvertSMVer2ArchName(int major, int minor) { 675 | // Defines for GPU Architecture types (using the SM version to determine 676 | // the GPU Arch name) 677 | typedef struct { 678 | int SM; // 0xMm (hexidecimal notation), M = SM Major version, 679 | // and m = SM minor version 680 | const char* name; 681 | } sSMtoArchName; 682 | 683 | sSMtoArchName nGpuArchNameSM[] = { 684 | {0x30, "Kepler"}, 685 | {0x32, "Kepler"}, 686 | {0x35, "Kepler"}, 687 | {0x37, "Kepler"}, 688 | {0x50, "Maxwell"}, 689 | {0x52, "Maxwell"}, 690 | {0x53, "Maxwell"}, 691 | {0x60, "Pascal"}, 692 | {0x61, "Pascal"}, 693 | {0x62, "Pascal"}, 694 | {0x70, "Volta"}, 695 | {0x72, "Xavier"}, 696 | {0x75, "Turing"}, 697 | {0x80, "Ampere"}, 698 | {0x86, "Ampere"}, 699 | {0x87, "Ampere"}, 700 | {-1, "Graphics Device"}}; 701 | 702 | int index = 0; 703 | 704 | while (nGpuArchNameSM[index].SM != -1) { 705 | if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) { 706 | return nGpuArchNameSM[index].name; 707 | } 708 | 709 | index++; 710 | } 711 | 712 | // If we don't find the values, we default use the previous one 713 | // to run properly 714 | printf( 715 | "MapSMtoArchName for SM %d.%d is undefined." 716 | " Default to use %s\n", 717 | major, minor, nGpuArchNameSM[index - 1].name); 718 | return nGpuArchNameSM[index - 1].name; 719 | } 720 | // end of GPU Architecture definitions 721 | 722 | #ifdef __CUDA_RUNTIME_H__ 723 | // General GPU Device CUDA Initialization 724 | inline int gpuDeviceInit(int devID) { 725 | int device_count; 726 | checkCudaErrors(cudaGetDeviceCount(&device_count)); 727 | 728 | if (device_count == 0) { 729 | fprintf(stderr, 730 | "gpuDeviceInit() CUDA error: " 731 | "no devices supporting CUDA.\n"); 732 | exit(EXIT_FAILURE); 733 | } 734 | 735 | if (devID < 0) { 736 | devID = 0; 737 | } 738 | 739 | if (devID > device_count - 1) { 740 | fprintf(stderr, "\n"); 741 | fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", 742 | device_count); 743 | fprintf(stderr, 744 | ">> gpuDeviceInit (-device=%d) is not a valid" 745 | " GPU device. <<\n", 746 | devID); 747 | fprintf(stderr, "\n"); 748 | return -devID; 749 | } 750 | 751 | int computeMode = -1, major = 0, minor = 0; 752 | checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID)); 753 | checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID)); 754 | checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID)); 755 | if (computeMode == cudaComputeModeProhibited) { 756 | fprintf(stderr, 757 | "Error: device is running in , no threads can use cudaSetDevice().\n"); 759 | return -1; 760 | } 761 | 762 | if (major < 1) { 763 | fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n"); 764 | exit(EXIT_FAILURE); 765 | } 766 | 767 | checkCudaErrors(cudaSetDevice(devID)); 768 | printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, _ConvertSMVer2ArchName(major, minor)); 769 | 770 | return devID; 771 | } 772 | 773 | // This function returns the best GPU (with maximum GFLOPS) 774 | inline int gpuGetMaxGflopsDeviceId() { 775 | int current_device = 0, sm_per_multiproc = 0; 776 | int max_perf_device = 0; 777 | int device_count = 0; 778 | int devices_prohibited = 0; 779 | 780 | uint64_t max_compute_perf = 0; 781 | checkCudaErrors(cudaGetDeviceCount(&device_count)); 782 | 783 | if (device_count == 0) { 784 | fprintf(stderr, 785 | "gpuGetMaxGflopsDeviceId() CUDA error:" 786 | " no devices supporting CUDA.\n"); 787 | exit(EXIT_FAILURE); 788 | } 789 | 790 | // Find the best CUDA capable GPU device 791 | current_device = 0; 792 | 793 | while (current_device < device_count) { 794 | int computeMode = -1, major = 0, minor = 0; 795 | checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device)); 796 | checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device)); 797 | checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device)); 798 | 799 | // If this GPU is not running on Compute Mode prohibited, 800 | // then we can add it to the list 801 | if (computeMode != cudaComputeModeProhibited) { 802 | if (major == 9999 && minor == 9999) { 803 | sm_per_multiproc = 1; 804 | } else { 805 | sm_per_multiproc = 806 | _ConvertSMVer2Cores(major, minor); 807 | } 808 | int multiProcessorCount = 0, clockRate = 0; 809 | checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device)); 810 | cudaError_t result = cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device); 811 | if (result != cudaSuccess) { 812 | // If cudaDevAttrClockRate attribute is not supported we 813 | // set clockRate as 1, to consider GPU with most SMs and CUDA Cores. 814 | if(result == cudaErrorInvalidValue) { 815 | clockRate = 1; 816 | } 817 | else { 818 | fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n", __FILE__, __LINE__, 819 | static_cast(result), _cudaGetErrorEnum(result)); 820 | exit(EXIT_FAILURE); 821 | } 822 | } 823 | uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate; 824 | 825 | if (compute_perf > max_compute_perf) { 826 | max_compute_perf = compute_perf; 827 | max_perf_device = current_device; 828 | } 829 | } else { 830 | devices_prohibited++; 831 | } 832 | 833 | ++current_device; 834 | } 835 | 836 | if (devices_prohibited == device_count) { 837 | fprintf(stderr, 838 | "gpuGetMaxGflopsDeviceId() CUDA error:" 839 | " all devices have compute mode prohibited.\n"); 840 | exit(EXIT_FAILURE); 841 | } 842 | 843 | return max_perf_device; 844 | } 845 | 846 | // Initialization code to find the best CUDA Device 847 | inline int findCudaDevice(int argc, const char **argv) { 848 | int devID = 0; 849 | 850 | // If the command-line has a device number specified, use it 851 | if (checkCmdLineFlag(argc, argv, "device")) { 852 | devID = getCmdLineArgumentInt(argc, argv, "device="); 853 | 854 | if (devID < 0) { 855 | printf("Invalid command line parameter\n "); 856 | exit(EXIT_FAILURE); 857 | } else { 858 | devID = gpuDeviceInit(devID); 859 | 860 | if (devID < 0) { 861 | printf("exiting...\n"); 862 | exit(EXIT_FAILURE); 863 | } 864 | } 865 | } else { 866 | // Otherwise pick the device with highest Gflops/s 867 | devID = gpuGetMaxGflopsDeviceId(); 868 | checkCudaErrors(cudaSetDevice(devID)); 869 | int major = 0, minor = 0; 870 | checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID)); 871 | checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID)); 872 | printf("GPU Device %d: \"%s\" with compute capability %d.%d\n", 873 | devID, _ConvertSMVer2ArchName(major, minor), major, minor); 874 | 875 | } 876 | 877 | return devID; 878 | } 879 | 880 | inline int findIntegratedGPU() { 881 | int current_device = 0; 882 | int device_count = 0; 883 | int devices_prohibited = 0; 884 | 885 | checkCudaErrors(cudaGetDeviceCount(&device_count)); 886 | 887 | if (device_count == 0) { 888 | fprintf(stderr, "CUDA error: no devices supporting CUDA.\n"); 889 | exit(EXIT_FAILURE); 890 | } 891 | 892 | // Find the integrated GPU which is compute capable 893 | while (current_device < device_count) { 894 | int computeMode = -1, integrated = -1; 895 | checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device)); 896 | checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device)); 897 | // If GPU is integrated and is not running on Compute Mode prohibited, 898 | // then cuda can map to GLES resource 899 | if (integrated && (computeMode != cudaComputeModeProhibited)) { 900 | checkCudaErrors(cudaSetDevice(current_device)); 901 | 902 | int major = 0, minor = 0; 903 | checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device)); 904 | checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device)); 905 | printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", 906 | current_device, _ConvertSMVer2ArchName(major, minor), major, minor); 907 | 908 | return current_device; 909 | } else { 910 | devices_prohibited++; 911 | } 912 | 913 | current_device++; 914 | } 915 | 916 | if (devices_prohibited == device_count) { 917 | fprintf(stderr, 918 | "CUDA error:" 919 | " No GLES-CUDA Interop capable GPU found.\n"); 920 | exit(EXIT_FAILURE); 921 | } 922 | 923 | return -1; 924 | } 925 | 926 | // General check for CUDA GPU SM Capabilities 927 | inline bool checkCudaCapabilities(int major_version, int minor_version) { 928 | int dev; 929 | int major = 0, minor = 0; 930 | 931 | checkCudaErrors(cudaGetDevice(&dev)); 932 | checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev)); 933 | checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev)); 934 | 935 | if ((major > major_version) || 936 | (major == major_version && 937 | minor >= minor_version)) { 938 | printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, 939 | _ConvertSMVer2ArchName(major, minor), major, minor); 940 | return true; 941 | } else { 942 | printf( 943 | " No GPU device was found that can support " 944 | "CUDA compute capability %d.%d.\n", 945 | major_version, minor_version); 946 | return false; 947 | } 948 | } 949 | #endif 950 | 951 | // end of CUDA Helper Functions 952 | 953 | #endif // COMMON_HELPER_CUDA_H_ 954 | -------------------------------------------------------------------------------- /src/libs/cuda/helper_string.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | // These are helper functions for the SDK samples (string parsing, timers, etc) 13 | #ifndef COMMON_HELPER_STRING_H_ 14 | #define COMMON_HELPER_STRING_H_ 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 22 | #ifndef _CRT_SECURE_NO_DEPRECATE 23 | #define _CRT_SECURE_NO_DEPRECATE 24 | #endif 25 | #ifndef STRCASECMP 26 | #define STRCASECMP _stricmp 27 | #endif 28 | #ifndef STRNCASECMP 29 | #define STRNCASECMP _strnicmp 30 | #endif 31 | #ifndef STRCPY 32 | #define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath) 33 | #endif 34 | 35 | #ifndef FOPEN 36 | #define FOPEN(fHandle, filename, mode) fopen_s(&fHandle, filename, mode) 37 | #endif 38 | #ifndef FOPEN_FAIL 39 | #define FOPEN_FAIL(result) (result != 0) 40 | #endif 41 | #ifndef SSCANF 42 | #define SSCANF sscanf_s 43 | #endif 44 | #ifndef SPRINTF 45 | #define SPRINTF sprintf_s 46 | #endif 47 | #else // Linux Includes 48 | #include 49 | #include 50 | 51 | #ifndef STRCASECMP 52 | #define STRCASECMP strcasecmp 53 | #endif 54 | #ifndef STRNCASECMP 55 | #define STRNCASECMP strncasecmp 56 | #endif 57 | #ifndef STRCPY 58 | #define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath) 59 | #endif 60 | 61 | #ifndef FOPEN 62 | #define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode)) 63 | #endif 64 | #ifndef FOPEN_FAIL 65 | #define FOPEN_FAIL(result) (result == NULL) 66 | #endif 67 | #ifndef SSCANF 68 | #define SSCANF sscanf 69 | #endif 70 | #ifndef SPRINTF 71 | #define SPRINTF sprintf 72 | #endif 73 | #endif 74 | 75 | #ifndef EXIT_WAIVED 76 | #define EXIT_WAIVED 2 77 | #endif 78 | 79 | // CUDA Utility Helper Functions 80 | inline int stringRemoveDelimiter(char delimiter, const char *string) { 81 | int string_start = 0; 82 | 83 | while (string[string_start] == delimiter) { 84 | string_start++; 85 | } 86 | 87 | if (string_start >= static_cast(strlen(string) - 1)) { 88 | return 0; 89 | } 90 | 91 | return string_start; 92 | } 93 | 94 | inline int getFileExtension(char *filename, char **extension) { 95 | int string_length = static_cast(strlen(filename)); 96 | 97 | while (filename[string_length--] != '.') { 98 | if (string_length == 0) break; 99 | } 100 | 101 | if (string_length > 0) string_length += 2; 102 | 103 | if (string_length == 0) 104 | *extension = NULL; 105 | else 106 | *extension = &filename[string_length]; 107 | 108 | return string_length; 109 | } 110 | 111 | inline bool checkCmdLineFlag(const int argc, const char **argv, 112 | const char *string_ref) { 113 | bool bFound = false; 114 | 115 | if (argc >= 1) { 116 | for (int i = 1; i < argc; i++) { 117 | int string_start = stringRemoveDelimiter('-', argv[i]); 118 | const char *string_argv = &argv[i][string_start]; 119 | 120 | const char *equal_pos = strchr(string_argv, '='); 121 | int argv_length = static_cast( 122 | equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv); 123 | 124 | int length = static_cast(strlen(string_ref)); 125 | 126 | if (length == argv_length && 127 | !STRNCASECMP(string_argv, string_ref, length)) { 128 | bFound = true; 129 | continue; 130 | } 131 | } 132 | } 133 | 134 | return bFound; 135 | } 136 | 137 | // This function wraps the CUDA Driver API into a template function 138 | template 139 | inline bool getCmdLineArgumentValue(const int argc, const char **argv, 140 | const char *string_ref, T *value) { 141 | bool bFound = false; 142 | 143 | if (argc >= 1) { 144 | for (int i = 1; i < argc; i++) { 145 | int string_start = stringRemoveDelimiter('-', argv[i]); 146 | const char *string_argv = &argv[i][string_start]; 147 | int length = static_cast(strlen(string_ref)); 148 | 149 | if (!STRNCASECMP(string_argv, string_ref, length)) { 150 | if (length + 1 <= static_cast(strlen(string_argv))) { 151 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 152 | *value = (T)atoi(&string_argv[length + auto_inc]); 153 | } 154 | 155 | bFound = true; 156 | i = argc; 157 | } 158 | } 159 | } 160 | 161 | return bFound; 162 | } 163 | 164 | inline int getCmdLineArgumentInt(const int argc, const char **argv, 165 | const char *string_ref) { 166 | bool bFound = false; 167 | int value = -1; 168 | 169 | if (argc >= 1) { 170 | for (int i = 1; i < argc; i++) { 171 | int string_start = stringRemoveDelimiter('-', argv[i]); 172 | const char *string_argv = &argv[i][string_start]; 173 | int length = static_cast(strlen(string_ref)); 174 | 175 | if (!STRNCASECMP(string_argv, string_ref, length)) { 176 | if (length + 1 <= static_cast(strlen(string_argv))) { 177 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 178 | value = atoi(&string_argv[length + auto_inc]); 179 | } else { 180 | value = 0; 181 | } 182 | 183 | bFound = true; 184 | continue; 185 | } 186 | } 187 | } 188 | 189 | if (bFound) { 190 | return value; 191 | } else { 192 | return 0; 193 | } 194 | } 195 | 196 | inline float getCmdLineArgumentFloat(const int argc, const char **argv, 197 | const char *string_ref) { 198 | bool bFound = false; 199 | float value = -1; 200 | 201 | if (argc >= 1) { 202 | for (int i = 1; i < argc; i++) { 203 | int string_start = stringRemoveDelimiter('-', argv[i]); 204 | const char *string_argv = &argv[i][string_start]; 205 | int length = static_cast(strlen(string_ref)); 206 | 207 | if (!STRNCASECMP(string_argv, string_ref, length)) { 208 | if (length + 1 <= static_cast(strlen(string_argv))) { 209 | int auto_inc = (string_argv[length] == '=') ? 1 : 0; 210 | value = static_cast(atof(&string_argv[length + auto_inc])); 211 | } else { 212 | value = 0.f; 213 | } 214 | 215 | bFound = true; 216 | continue; 217 | } 218 | } 219 | } 220 | 221 | if (bFound) { 222 | return value; 223 | } else { 224 | return 0; 225 | } 226 | } 227 | 228 | inline bool getCmdLineArgumentString(const int argc, const char **argv, 229 | const char *string_ref, 230 | char **string_retval) { 231 | bool bFound = false; 232 | 233 | if (argc >= 1) { 234 | for (int i = 1; i < argc; i++) { 235 | int string_start = stringRemoveDelimiter('-', argv[i]); 236 | char *string_argv = const_cast(&argv[i][string_start]); 237 | int length = static_cast(strlen(string_ref)); 238 | 239 | if (!STRNCASECMP(string_argv, string_ref, length)) { 240 | *string_retval = &string_argv[length + 1]; 241 | bFound = true; 242 | continue; 243 | } 244 | } 245 | } 246 | 247 | if (!bFound) { 248 | *string_retval = NULL; 249 | } 250 | 251 | return bFound; 252 | } 253 | 254 | ////////////////////////////////////////////////////////////////////////////// 255 | //! Find the path for a file assuming that 256 | //! files are found in the searchPath. 257 | //! 258 | //! @return the path if succeeded, otherwise 0 259 | //! @param filename name of the file 260 | //! @param executable_path optional absolute path of the executable 261 | ////////////////////////////////////////////////////////////////////////////// 262 | inline char *sdkFindFilePath(const char *filename, 263 | const char *executable_path) { 264 | // defines a variable that is replaced with the name of the 265 | // executable 266 | 267 | // Typical relative search paths to locate needed companion files (e.g. sample 268 | // input data, or JIT source files) The origin for the relative search may be 269 | // the .exe file, a .bat file launching an .exe, a browser .exe launching the 270 | // .exe or .bat, etc 271 | const char *searchPath[] = { 272 | "./", // same dir 273 | "./_data_files/", 274 | "./common/", // "/common/" subdir 275 | "./common/data/", // "/common/data/" subdir 276 | "./data/", // "/data/" subdir 277 | "./src/", // "/src/" subdir 278 | "./src//data/", // "/src//data/" subdir 279 | "./inc/", // "/inc/" subdir 280 | "./0_Simple/", // "/0_Simple/" subdir 281 | "./1_Utilities/", // "/1_Utilities/" subdir 282 | "./2_Graphics/", // "/2_Graphics/" subdir 283 | "./3_Imaging/", // "/3_Imaging/" subdir 284 | "./4_Finance/", // "/4_Finance/" subdir 285 | "./5_Simulations/", // "/5_Simulations/" subdir 286 | "./6_Advanced/", // "/6_Advanced/" subdir 287 | "./7_CUDALibraries/", // "/7_CUDALibraries/" subdir 288 | "./8_Android/", // "/8_Android/" subdir 289 | "./samples/", // "/samples/" subdir 290 | 291 | "./0_Simple//data/", // "/0_Simple//data/" 292 | // subdir 293 | "./1_Utilities//data/", // "/1_Utilities//data/" 294 | // subdir 295 | "./2_Graphics//data/", // "/2_Graphics//data/" 296 | // subdir 297 | "./3_Imaging//data/", // "/3_Imaging//data/" 298 | // subdir 299 | "./4_Finance//data/", // "/4_Finance//data/" 300 | // subdir 301 | "./5_Simulations//data/", // "/5_Simulations//data/" 302 | // subdir 303 | "./6_Advanced//data/", // "/6_Advanced//data/" 304 | // subdir 305 | "./7_CUDALibraries//", // "/7_CUDALibraries//" 306 | // subdir 307 | "./7_CUDALibraries//data/", // "/7_CUDALibraries//data/" 308 | // subdir 309 | 310 | "../", // up 1 in tree 311 | "../common/", // up 1 in tree, "/common/" subdir 312 | "../common/data/", // up 1 in tree, "/common/data/" subdir 313 | "../data/", // up 1 in tree, "/data/" subdir 314 | "../src/", // up 1 in tree, "/src/" subdir 315 | "../inc/", // up 1 in tree, "/inc/" subdir 316 | 317 | "../0_Simple//data/", // up 1 in tree, 318 | // "/0_Simple//" 319 | // subdir 320 | "../1_Utilities//data/", // up 1 in tree, 321 | // "/1_Utilities//" 322 | // subdir 323 | "../2_Graphics//data/", // up 1 in tree, 324 | // "/2_Graphics//" 325 | // subdir 326 | "../3_Imaging//data/", // up 1 in tree, 327 | // "/3_Imaging//" 328 | // subdir 329 | "../4_Finance//data/", // up 1 in tree, 330 | // "/4_Finance//" 331 | // subdir 332 | "../5_Simulations//data/", // up 1 in tree, 333 | // "/5_Simulations//" 334 | // subdir 335 | "../6_Advanced//data/", // up 1 in tree, 336 | // "/6_Advanced//" 337 | // subdir 338 | "../7_CUDALibraries//data/", // up 1 in tree, 339 | // "/7_CUDALibraries//" 340 | // subdir 341 | "../8_Android//data/", // up 1 in tree, 342 | // "/8_Android//" 343 | // subdir 344 | "../samples//data/", // up 1 in tree, 345 | // "/samples//" 346 | // subdir 347 | "../../", // up 2 in tree 348 | "../../common/", // up 2 in tree, "/common/" subdir 349 | "../../common/data/", // up 2 in tree, "/common/data/" subdir 350 | "../../data/", // up 2 in tree, "/data/" subdir 351 | "../../src/", // up 2 in tree, "/src/" subdir 352 | "../../inc/", // up 2 in tree, "/inc/" subdir 353 | "../../sandbox//data/", // up 2 in tree, 354 | // "/sandbox//" 355 | // subdir 356 | "../../0_Simple//data/", // up 2 in tree, 357 | // "/0_Simple//" 358 | // subdir 359 | "../../1_Utilities//data/", // up 2 in tree, 360 | // "/1_Utilities//" 361 | // subdir 362 | "../../2_Graphics//data/", // up 2 in tree, 363 | // "/2_Graphics//" 364 | // subdir 365 | "../../3_Imaging//data/", // up 2 in tree, 366 | // "/3_Imaging//" 367 | // subdir 368 | "../../4_Finance//data/", // up 2 in tree, 369 | // "/4_Finance//" 370 | // subdir 371 | "../../5_Simulations//data/", // up 2 in tree, 372 | // "/5_Simulations//" 373 | // subdir 374 | "../../6_Advanced//data/", // up 2 in tree, 375 | // "/6_Advanced//" 376 | // subdir 377 | "../../7_CUDALibraries//data/", // up 2 in tree, 378 | // "/7_CUDALibraries//" 379 | // subdir 380 | "../../8_Android//data/", // up 2 in tree, 381 | // "/8_Android//" 382 | // subdir 383 | "../../samples//data/", // up 2 in tree, 384 | // "/samples//" 385 | // subdir 386 | "../../../", // up 3 in tree 387 | "../../../src//", // up 3 in tree, 388 | // "/src//" subdir 389 | "../../../src//data/", // up 3 in tree, 390 | // "/src//data/" 391 | // subdir 392 | "../../../src//src/", // up 3 in tree, 393 | // "/src//src/" 394 | // subdir 395 | "../../../src//inc/", // up 3 in tree, 396 | // "/src//inc/" 397 | // subdir 398 | "../../../sandbox//", // up 3 in tree, 399 | // "/sandbox//" 400 | // subdir 401 | "../../../sandbox//data/", // up 3 in tree, 402 | // "/sandbox//data/" 403 | // subdir 404 | "../../../sandbox//src/", // up 3 in tree, 405 | // "/sandbox//src/" 406 | // subdir 407 | "../../../sandbox//inc/", // up 3 in tree, 408 | // "/sandbox//inc/" 409 | // subdir 410 | "../../../0_Simple//data/", // up 3 in tree, 411 | // "/0_Simple//" 412 | // subdir 413 | "../../../1_Utilities//data/", // up 3 in tree, 414 | // "/1_Utilities//" 415 | // subdir 416 | "../../../2_Graphics//data/", // up 3 in tree, 417 | // "/2_Graphics//" 418 | // subdir 419 | "../../../3_Imaging//data/", // up 3 in tree, 420 | // "/3_Imaging//" 421 | // subdir 422 | "../../../4_Finance//data/", // up 3 in tree, 423 | // "/4_Finance//" 424 | // subdir 425 | "../../../5_Simulations//data/", // up 3 in tree, 426 | // "/5_Simulations//" 427 | // subdir 428 | "../../../6_Advanced//data/", // up 3 in tree, 429 | // "/6_Advanced//" 430 | // subdir 431 | "../../../7_CUDALibraries//data/", // up 3 in tree, 432 | // "/7_CUDALibraries//" 433 | // subdir 434 | "../../../8_Android//data/", // up 3 in tree, 435 | // "/8_Android//" 436 | // subdir 437 | "../../../0_Simple//", // up 3 in tree, 438 | // "/0_Simple//" 439 | // subdir 440 | "../../../1_Utilities//", // up 3 in tree, 441 | // "/1_Utilities//" 442 | // subdir 443 | "../../../2_Graphics//", // up 3 in tree, 444 | // "/2_Graphics//" 445 | // subdir 446 | "../../../3_Imaging//", // up 3 in tree, 447 | // "/3_Imaging//" 448 | // subdir 449 | "../../../4_Finance//", // up 3 in tree, 450 | // "/4_Finance//" 451 | // subdir 452 | "../../../5_Simulations//", // up 3 in tree, 453 | // "/5_Simulations//" 454 | // subdir 455 | "../../../6_Advanced//", // up 3 in tree, 456 | // "/6_Advanced//" 457 | // subdir 458 | "../../../7_CUDALibraries//", // up 3 in tree, 459 | // "/7_CUDALibraries//" 460 | // subdir 461 | "../../../8_Android//", // up 3 in tree, 462 | // "/8_Android//" 463 | // subdir 464 | "../../../samples//data/", // up 3 in tree, 465 | // "/samples//" 466 | // subdir 467 | "../../../common/", // up 3 in tree, "../../../common/" subdir 468 | "../../../common/data/", // up 3 in tree, "../../../common/data/" subdir 469 | "../../../data/", // up 3 in tree, "../../../data/" subdir 470 | "../../../../", // up 4 in tree 471 | "../../../../src//", // up 4 in tree, 472 | // "/src//" subdir 473 | "../../../../src//data/", // up 4 in tree, 474 | // "/src//data/" 475 | // subdir 476 | "../../../../src//src/", // up 4 in tree, 477 | // "/src//src/" 478 | // subdir 479 | "../../../../src//inc/", // up 4 in tree, 480 | // "/src//inc/" 481 | // subdir 482 | "../../../../sandbox//", // up 4 in tree, 483 | // "/sandbox//" 484 | // subdir 485 | "../../../../sandbox//data/", // up 4 in tree, 486 | // "/sandbox//data/" 487 | // subdir 488 | "../../../../sandbox//src/", // up 4 in tree, 489 | // "/sandbox//src/" 490 | // subdir 491 | "../../../../sandbox//inc/", // up 4 in tree, 492 | // "/sandbox//inc/" 493 | // subdir 494 | "../../../../0_Simple//data/", // up 4 in tree, 495 | // "/0_Simple//" 496 | // subdir 497 | "../../../../1_Utilities//data/", // up 4 in tree, 498 | // "/1_Utilities//" 499 | // subdir 500 | "../../../../2_Graphics//data/", // up 4 in tree, 501 | // "/2_Graphics//" 502 | // subdir 503 | "../../../../3_Imaging//data/", // up 4 in tree, 504 | // "/3_Imaging//" 505 | // subdir 506 | "../../../../4_Finance//data/", // up 4 in tree, 507 | // "/4_Finance//" 508 | // subdir 509 | "../../../../5_Simulations//data/", // up 4 in tree, 510 | // "/5_Simulations//" 511 | // subdir 512 | "../../../../6_Advanced//data/", // up 4 in tree, 513 | // "/6_Advanced//" 514 | // subdir 515 | "../../../../7_CUDALibraries//data/", // up 4 in tree, 516 | // "/7_CUDALibraries//" 517 | // subdir 518 | "../../../../8_Android//data/", // up 4 in tree, 519 | // "/8_Android//" 520 | // subdir 521 | "../../../../0_Simple//", // up 4 in tree, 522 | // "/0_Simple//" 523 | // subdir 524 | "../../../../1_Utilities//", // up 4 in tree, 525 | // "/1_Utilities//" 526 | // subdir 527 | "../../../../2_Graphics//", // up 4 in tree, 528 | // "/2_Graphics//" 529 | // subdir 530 | "../../../../3_Imaging//", // up 4 in tree, 531 | // "/3_Imaging//" 532 | // subdir 533 | "../../../../4_Finance//", // up 4 in tree, 534 | // "/4_Finance//" 535 | // subdir 536 | "../../../../5_Simulations//", // up 4 in tree, 537 | // "/5_Simulations//" 538 | // subdir 539 | "../../../../6_Advanced//", // up 4 in tree, 540 | // "/6_Advanced//" 541 | // subdir 542 | "../../../../7_CUDALibraries//", // up 4 in tree, 543 | // "/7_CUDALibraries//" 544 | // subdir 545 | "../../../../8_Android//", // up 4 in tree, 546 | // "/8_Android//" 547 | // subdir 548 | "../../../../samples//data/", // up 4 in tree, 549 | // "/samples//" 550 | // subdir 551 | "../../../../common/", // up 4 in tree, "../../../common/" subdir 552 | "../../../../common/data/", // up 4 in tree, "../../../common/data/" 553 | // subdir 554 | "../../../../data/", // up 4 in tree, "../../../data/" subdir 555 | "../../../../../", // up 5 in tree 556 | "../../../../../src//", // up 5 in tree, 557 | // "/src//" 558 | // subdir 559 | "../../../../../src//data/", // up 5 in tree, 560 | // "/src//data/" 561 | // subdir 562 | "../../../../../src//src/", // up 5 in tree, 563 | // "/src//src/" 564 | // subdir 565 | "../../../../../src//inc/", // up 5 in tree, 566 | // "/src//inc/" 567 | // subdir 568 | "../../../../../sandbox//", // up 5 in tree, 569 | // "/sandbox//" 570 | // subdir 571 | "../../../../../sandbox//data/", // up 5 in tree, 572 | // "/sandbox//data/" 573 | // subdir 574 | "../../../../../sandbox//src/", // up 5 in tree, 575 | // "/sandbox//src/" 576 | // subdir 577 | "../../../../../sandbox//inc/", // up 5 in tree, 578 | // "/sandbox//inc/" 579 | // subdir 580 | "../../../../../0_Simple//data/", // up 5 in tree, 581 | // "/0_Simple//" 582 | // subdir 583 | "../../../../../1_Utilities//data/", // up 5 in tree, 584 | // "/1_Utilities//" 585 | // subdir 586 | "../../../../../2_Graphics//data/", // up 5 in tree, 587 | // "/2_Graphics//" 588 | // subdir 589 | "../../../../../3_Imaging//data/", // up 5 in tree, 590 | // "/3_Imaging//" 591 | // subdir 592 | "../../../../../4_Finance//data/", // up 5 in tree, 593 | // "/4_Finance//" 594 | // subdir 595 | "../../../../../5_Simulations//data/", // up 5 in tree, 596 | // "/5_Simulations//" 597 | // subdir 598 | "../../../../../6_Advanced//data/", // up 5 in tree, 599 | // "/6_Advanced//" 600 | // subdir 601 | "../../../../../7_CUDALibraries//data/", // up 5 in 602 | // tree, 603 | // "/7_CUDALibraries//" 604 | // subdir 605 | "../../../../../8_Android//data/", // up 5 in tree, 606 | // "/8_Android//" 607 | // subdir 608 | "../../../../../samples//data/", // up 5 in tree, 609 | // "/samples//" 610 | // subdir 611 | "../../../../../common/", // up 5 in tree, "../../../common/" subdir 612 | "../../../../../common/data/", // up 5 in tree, "../../../common/data/" 613 | // subdir 614 | }; 615 | 616 | // Extract the executable name 617 | std::string executable_name; 618 | 619 | if (executable_path != 0) { 620 | executable_name = std::string(executable_path); 621 | 622 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 623 | // Windows path delimiter 624 | size_t delimiter_pos = executable_name.find_last_of('\\'); 625 | executable_name.erase(0, delimiter_pos + 1); 626 | 627 | if (executable_name.rfind(".exe") != std::string::npos) { 628 | // we strip .exe, only if the .exe is found 629 | executable_name.resize(executable_name.size() - 4); 630 | } 631 | 632 | #else 633 | // Linux & OSX path delimiter 634 | size_t delimiter_pos = executable_name.find_last_of('/'); 635 | executable_name.erase(0, delimiter_pos + 1); 636 | #endif 637 | } 638 | 639 | // Loop over all search paths and return the first hit 640 | for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) { 641 | std::string path(searchPath[i]); 642 | size_t executable_name_pos = path.find(""); 643 | 644 | // If there is executable_name variable in the searchPath 645 | // replace it with the value 646 | if (executable_name_pos != std::string::npos) { 647 | if (executable_path != 0) { 648 | path.replace(executable_name_pos, strlen(""), 649 | executable_name); 650 | } else { 651 | // Skip this path entry if no executable argument is given 652 | continue; 653 | } 654 | } 655 | 656 | #ifdef _DEBUG 657 | printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str()); 658 | #endif 659 | 660 | // Test if the file exists 661 | path.append(filename); 662 | FILE *fp; 663 | FOPEN(fp, path.c_str(), "rb"); 664 | 665 | if (fp != NULL) { 666 | fclose(fp); 667 | // File found 668 | // returning an allocated array here for backwards compatibility reasons 669 | char *file_path = reinterpret_cast(malloc(path.length() + 1)); 670 | STRCPY(file_path, path.length() + 1, path.c_str()); 671 | return file_path; 672 | } 673 | 674 | if (fp) { 675 | fclose(fp); 676 | } 677 | } 678 | 679 | // File not found 680 | return 0; 681 | } 682 | 683 | #endif // COMMON_HELPER_STRING_H_ 684 | -------------------------------------------------------------------------------- /src/libs/magicavoxel_file_writer/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Aiekick 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/libs/magicavoxel_file_writer/VoxWriter.cpp: -------------------------------------------------------------------------------- 1 | // This is an independent project of an individual developer. Dear PVS-Studio, please check it. 2 | // PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com 3 | 4 | // Copyright 2018 Stephane Cuillerdier @Aiekick 5 | 6 | // Permission is hereby granted, free of charge, to any person obtaining a 7 | // copy of this software and associated documentation files (the "Software"), 8 | // to deal in the Software without restriction, including without 9 | // limitation the rights to use, copy, modify, merge, publish, distribute, 10 | // sublicense, and/or sell copies of the Software, and to permit persons to 11 | // whom the Software is furnished to do so, subject to the following conditions: 12 | 13 | // The above copyright notice and this permission notice shall be included 14 | // in all copies or substantial portions of the Software. 15 | 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 18 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 20 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | // This File is a helper for write a vox file after 0.99 release to support 24 | // the world mode editor 25 | // just add all color with the color Index with AddColor 26 | // And add all voxels with the method AddVoxel with the voxel in world position, and finally save the model 27 | // that's all, the file was initially created for my Proecedural soft 28 | // "SdfMesher" cf :https://twitter.com/hashtag/sdfmesher?src=hash 29 | // it support just my needs for the moment, but i put here because its a basis for more i thinck 30 | 31 | #include "VoxWriter.h" 32 | #include 33 | #include 34 | 35 | // #define VERBOSE 36 | 37 | namespace vox { 38 | DICTstring::DICTstring() { bufferSize = 0; } 39 | 40 | void DICTstring::write(FILE* fp) { 41 | bufferSize = (int32_t)buffer.size(); 42 | fwrite(&bufferSize, sizeof(int32_t), 1, fp); 43 | fwrite(buffer.data(), sizeof(char), bufferSize, fp); 44 | } 45 | 46 | size_t DICTstring::getSize() { 47 | bufferSize = (int32_t)buffer.size(); 48 | return sizeof(int32_t) + sizeof(char) * bufferSize; 49 | } 50 | 51 | ////////////////////////////////////////////////////////////////// 52 | 53 | DICTitem::DICTitem() {} 54 | 55 | DICTitem::DICTitem(std::string vKey, std::string vValue) { 56 | key.buffer = vKey; 57 | value.buffer = vValue; 58 | } 59 | 60 | void DICTitem::write(FILE* fp) { 61 | key.write(fp); 62 | value.write(fp); 63 | } 64 | 65 | size_t DICTitem::getSize() { return key.getSize() + value.getSize(); } 66 | 67 | ////////////////////////////////////////////////////////////////// 68 | 69 | DICT::DICT() { count = 0; } 70 | 71 | void DICT::write(FILE* fp) { 72 | count = (int32_t)keys.size(); 73 | fwrite(&count, sizeof(int32_t), 1, fp); 74 | for (int i = 0; i < count; i++) 75 | keys[i].write(fp); 76 | } 77 | 78 | size_t DICT::getSize() { 79 | count = (int32_t)keys.size(); 80 | size_t s = sizeof(int32_t); 81 | for (int i = 0; i < count; i++) 82 | s += keys[i].getSize(); 83 | return s; 84 | } 85 | 86 | void DICT::Add(std::string vKey, std::string vValue) { keys.push_back(DICTitem(vKey, vValue)); } 87 | 88 | ////////////////////////////////////////////////////////////////// 89 | 90 | nTRN::nTRN(int32_t countFrames) { 91 | nodeId = 0; 92 | reservedId = -1; 93 | childNodeId = 0; 94 | numFrames = 1; 95 | layerId = -1; 96 | numFrames = countFrames; 97 | while ((int32_t)frames.size() < numFrames) 98 | frames.push_back(DICT()); 99 | } 100 | 101 | void nTRN::write(FILE* fp) { 102 | // chunk header 103 | int32_t id = GetMVID('n', 'T', 'R', 'N'); 104 | fwrite(&id, sizeof(int32_t), 1, fp); 105 | size_t contentSize = getSize(); 106 | fwrite(&contentSize, sizeof(int32_t), 1, fp); 107 | size_t childSize = 0; 108 | fwrite(&childSize, sizeof(int32_t), 1, fp); 109 | 110 | // datas's 111 | fwrite(&nodeId, sizeof(int32_t), 1, fp); 112 | nodeAttribs.write(fp); 113 | fwrite(&childNodeId, sizeof(int32_t), 1, fp); 114 | fwrite(&reservedId, sizeof(int32_t), 1, fp); 115 | fwrite(&layerId, sizeof(int32_t), 1, fp); 116 | fwrite(&numFrames, sizeof(int32_t), 1, fp); 117 | for (int i = 0; i < numFrames; i++) 118 | frames[i].write(fp); 119 | } 120 | 121 | size_t nTRN::getSize() { 122 | size_t s = sizeof(int32_t) * 5 + nodeAttribs.getSize(); 123 | for (int i = 0; i < numFrames; i++) 124 | s += frames[i].getSize(); 125 | return s; 126 | } 127 | 128 | ////////////////////////////////////////////////////////////////// 129 | 130 | nGRP::nGRP(int32_t vCount) { 131 | nodeId = 0; 132 | nodeChildrenNodes = vCount; 133 | while ((int32_t)childNodes.size() < nodeChildrenNodes) 134 | childNodes.push_back(0); 135 | } 136 | 137 | void nGRP::write(FILE* fp) { 138 | // chunk header 139 | int32_t id = GetMVID('n', 'G', 'R', 'P'); 140 | fwrite(&id, sizeof(int32_t), 1, fp); 141 | size_t contentSize = getSize(); 142 | fwrite(&contentSize, sizeof(int32_t), 1, fp); 143 | size_t childSize = 0; 144 | fwrite(&childSize, sizeof(int32_t), 1, fp); 145 | 146 | // datas's 147 | fwrite(&nodeId, sizeof(int32_t), 1, fp); 148 | nodeAttribs.write(fp); 149 | fwrite(&nodeChildrenNodes, sizeof(int32_t), 1, fp); 150 | fwrite(childNodes.data(), sizeof(int32_t), nodeChildrenNodes, fp); 151 | } 152 | 153 | size_t nGRP::getSize() { return sizeof(int32_t) * (2 + nodeChildrenNodes) + nodeAttribs.getSize(); } 154 | 155 | ////////////////////////////////////////////////////////////////// 156 | 157 | MODEL::MODEL() { modelId = 0; } 158 | 159 | void MODEL::write(FILE* fp) { 160 | fwrite(&modelId, sizeof(int32_t), 1, fp); 161 | modelAttribs.write(fp); 162 | } 163 | 164 | size_t MODEL::getSize() { return sizeof(int32_t) + modelAttribs.getSize(); } 165 | 166 | ////////////////////////////////////////////////////////////////// 167 | 168 | nSHP::nSHP(int32_t vCount) { 169 | nodeId = 0; 170 | numModels = vCount; 171 | models.resize(numModels); 172 | } 173 | 174 | void nSHP::write(FILE* fp) { 175 | // chunk header 176 | int32_t id = GetMVID('n', 'S', 'H', 'P'); 177 | fwrite(&id, sizeof(int32_t), 1, fp); 178 | size_t contentSize = getSize(); 179 | fwrite(&contentSize, sizeof(int32_t), 1, fp); 180 | size_t childSize = 0; 181 | fwrite(&childSize, sizeof(int32_t), 1, fp); 182 | 183 | // datas's 184 | fwrite(&nodeId, sizeof(int32_t), 1, fp); 185 | nodeAttribs.write(fp); 186 | fwrite(&numModels, sizeof(int32_t), 1, fp); 187 | for (int i = 0; i < numModels; i++) 188 | models[i].write(fp); 189 | } 190 | 191 | size_t nSHP::getSize() { 192 | size_t s = sizeof(int32_t) * 2 + nodeAttribs.getSize(); 193 | for (int i = 0; i < numModels; i++) 194 | s += models[i].getSize(); 195 | return s; 196 | } 197 | 198 | ////////////////////////////////////////////////////////////////// 199 | 200 | LAYR::LAYR() { 201 | nodeId = 0; 202 | reservedId = -1; 203 | } 204 | 205 | void LAYR::write(FILE* fp) { 206 | // chunk header 207 | int32_t id = GetMVID('L', 'A', 'Y', 'R'); 208 | fwrite(&id, sizeof(int32_t), 1, fp); 209 | size_t contentSize = getSize(); 210 | fwrite(&contentSize, sizeof(int32_t), 1, fp); 211 | size_t childSize = 0; 212 | fwrite(&childSize, sizeof(int32_t), 1, fp); 213 | 214 | // datas's 215 | fwrite(&nodeId, sizeof(int32_t), 1, fp); 216 | nodeAttribs.write(fp); 217 | fwrite(&reservedId, sizeof(int32_t), 1, fp); 218 | } 219 | 220 | size_t LAYR::getSize() { return sizeof(int32_t) * 2 + nodeAttribs.getSize(); } 221 | 222 | ////////////////////////////////////////////////////////////////// 223 | 224 | SIZE::SIZE() { 225 | sizex = 0; 226 | sizey = 0; 227 | sizez = 0; 228 | } 229 | 230 | void SIZE::write(FILE* fp) { 231 | // chunk header 232 | int32_t id = GetMVID('S', 'I', 'Z', 'E'); 233 | fwrite(&id, sizeof(int32_t), 1, fp); 234 | size_t contentSize = getSize(); 235 | fwrite(&contentSize, sizeof(int32_t), 1, fp); 236 | size_t childSize = 0; 237 | fwrite(&childSize, sizeof(int32_t), 1, fp); 238 | 239 | // datas's 240 | fwrite(&sizex, sizeof(int32_t), 1, fp); 241 | fwrite(&sizey, sizeof(int32_t), 1, fp); 242 | fwrite(&sizez, sizeof(int32_t), 1, fp); 243 | } 244 | 245 | size_t SIZE::getSize() { return sizeof(int32_t) * 3; } 246 | 247 | ////////////////////////////////////////////////////////////////// 248 | 249 | XYZI::XYZI() { numVoxels = 0; } 250 | 251 | void XYZI::write(FILE* fp) { 252 | // chunk header 253 | int32_t id = GetMVID('X', 'Y', 'Z', 'I'); 254 | fwrite(&id, sizeof(int32_t), 1, fp); 255 | size_t contentSize = getSize(); 256 | fwrite(&contentSize, sizeof(int32_t), 1, fp); 257 | size_t childSize = 0; 258 | fwrite(&childSize, sizeof(int32_t), 1, fp); 259 | 260 | // datas's 261 | fwrite(&numVoxels, sizeof(int32_t), 1, fp); 262 | fwrite(voxels.data(), sizeof(uint8_t), voxels.size(), fp); 263 | } 264 | 265 | size_t XYZI::getSize() { 266 | numVoxels = (int32_t)voxels.size() / 4; 267 | return sizeof(int32_t) * (1 + numVoxels); 268 | } 269 | 270 | ////////////////////////////////////////////////////////////////// 271 | 272 | RGBA::RGBA() {} 273 | 274 | void RGBA::write(FILE* fp) { 275 | // chunk header 276 | int32_t id = GetMVID('R', 'G', 'B', 'A'); 277 | fwrite(&id, sizeof(int32_t), 1, fp); 278 | size_t contentSize = getSize(); 279 | fwrite(&contentSize, sizeof(int32_t), 1, fp); 280 | size_t childSize = 0; 281 | fwrite(&childSize, sizeof(int32_t), 1, fp); 282 | 283 | // datas's 284 | fwrite(colors, sizeof(uint8_t), contentSize, fp); 285 | } 286 | 287 | size_t RGBA::getSize() { return sizeof(uint8_t) * 4 * 256; } 288 | 289 | ////////////////////////////////////////////////////////////////// 290 | 291 | VoxCube::VoxCube() { 292 | id = 0; 293 | tx = 0; 294 | ty = 0; 295 | tz = 0; 296 | } 297 | 298 | void VoxCube::write(FILE* fp) { 299 | for (auto& xyzi : xyzis) { 300 | size.write(fp); 301 | xyzi.second.write(fp); 302 | } 303 | } 304 | 305 | ////////////////////////////////////////////////////////////////// 306 | 307 | VoxWriter* VoxWriter::Create(const std::string& vFilePathName, const uint32_t& vLimitX, const uint32_t& vLimitY, const uint32_t& vLimitZ, int32_t* vError) { 308 | VoxWriter* vox = new VoxWriter(vLimitX, vLimitY, vLimitZ); 309 | 310 | *vError = vox->IsOk(vFilePathName); 311 | 312 | if (*vError == 0) { 313 | return vox; 314 | } else { 315 | printf("Vox file creation failed, err : %s", GetErrnoMsg(*vError).c_str()); 316 | 317 | SAFE_DELETE(vox); 318 | } 319 | 320 | return vox; 321 | } 322 | 323 | std::string VoxWriter::GetErrnoMsg(const int32_t& vError) { 324 | std::string res; 325 | 326 | switch (vError) { 327 | case 1: res = "Operation not permitted"; break; 328 | case 2: res = "No such file or directory"; break; 329 | case 3: res = "No such process"; break; 330 | case 4: res = "Interrupted function"; break; 331 | case 5: res = "I / O error"; break; 332 | case 6: res = "No such device or address"; break; 333 | case 7: res = "Argument list too long"; break; 334 | case 8: res = "Exec format error"; break; 335 | case 9: res = "Bad file number"; break; 336 | case 10: res = "No spawned processes"; break; 337 | case 11: res = "No more processes or not enough memory or maximum nesting level reached"; break; 338 | case 12: res = "Not enough memory"; break; 339 | case 13: res = "Permission denied"; break; 340 | case 14: res = "Bad address"; break; 341 | case 16: res = "Device or resource busy"; break; 342 | case 17: res = "File exists"; break; 343 | case 18: res = "Cross - device link"; break; 344 | case 19: res = "No such device"; break; 345 | case 20: res = "Not a director"; break; 346 | case 21: res = "Is a directory"; break; 347 | case 22: res = "Invalid argument"; break; 348 | case 23: res = "Too many files open in system"; break; 349 | case 24: res = "Too many open files"; break; 350 | case 25: res = "Inappropriate I / O control operation"; break; 351 | case 27: res = "File too large"; break; 352 | case 28: res = "No space left on device"; break; 353 | case 29: res = "Invalid seek"; break; 354 | case 30: res = "Read - only file system"; break; 355 | case 31: res = "Too many links"; break; 356 | case 32: res = "Broken pipe"; break; 357 | case 33: res = "Math argument"; break; 358 | case 34: res = "Result too large"; break; 359 | case 36: res = "Resource deadlock would occur"; break; 360 | case 38: res = "Filename too long"; break; 361 | case 39: res = "No locks available"; break; 362 | case 40: res = "Function not supported"; break; 363 | case 41: res = "Directory not empty"; break; 364 | case 42: res = "Illegal byte sequence"; break; 365 | case 80: res = "String was truncated"; break; 366 | } 367 | 368 | return res; 369 | } 370 | 371 | 372 | ////////////////////////////////////////////////////////////////// 373 | // the limit of magicavoxel is 127 for one cube, is 127 voxels (indexs : 0 -> 126) 374 | // vMaxVoxelPerCubeX,Y,Z define the limit of one cube 375 | VoxWriter::VoxWriter(const VoxelX& vMaxVoxelPerCubeX, const VoxelY& vMaxVoxelPerCubeY, const VoxelZ& vMaxVoxelPerCubeZ) { 376 | // the limit of magicavoxel is 127 because the first voxel is 1 not 0 377 | // so this is 0 to 126 378 | // index limit, size is 127 379 | m_MaxVoxelPerCubeX = ct::clamp(vMaxVoxelPerCubeX, 0, 126); 380 | m_MaxVoxelPerCubeY = ct::clamp(vMaxVoxelPerCubeY, 0, 126); 381 | m_MaxVoxelPerCubeZ = ct::clamp(vMaxVoxelPerCubeZ, 0, 126); 382 | } 383 | 384 | VoxWriter::~VoxWriter() {} 385 | 386 | int32_t VoxWriter::IsOk(const std::string& vFilePathName) { 387 | if (m_OpenFileForWriting(vFilePathName)) { 388 | m_CloseFile(); 389 | } 390 | return lastError; 391 | } 392 | 393 | void VoxWriter::ClearVoxels() { 394 | cubes.clear(); 395 | cubesId.clear(); 396 | voxelId.clear(); 397 | } 398 | 399 | void VoxWriter::ClearColors() { colors.clear(); } 400 | 401 | void VoxWriter::StartTimeLogging() { 402 | m_TimeLoggingEnabled = true; 403 | m_StartTime = std::chrono::steady_clock::now(); 404 | m_LastKeyFrameTime = m_StartTime; 405 | }; 406 | 407 | void VoxWriter::StopTimeLogging() { 408 | if (m_TimeLoggingEnabled) { 409 | const auto now = std::chrono::steady_clock::now(); 410 | m_FrameTimes[m_KeyFrame] = std::chrono::duration_cast(now - m_LastKeyFrameTime).count() * 1e-3; 411 | if (m_KeyFrameTimeLoggingFunctor) { 412 | m_KeyFrameTimeLoggingFunctor(m_KeyFrame, m_FrameTimes.at(m_KeyFrame)); 413 | } 414 | m_TotalTime = std::chrono::duration_cast(now - m_StartTime).count() * 1e-3; 415 | m_TimeLoggingEnabled = false; 416 | } 417 | } 418 | 419 | void VoxWriter::SetKeyFrameTimeLoggingFunctor(const KeyFrameTimeLoggingFunctor& vKeyFrameTimeLoggingFunctor) { 420 | m_KeyFrameTimeLoggingFunctor = vKeyFrameTimeLoggingFunctor; 421 | } 422 | 423 | void VoxWriter::SetKeyFrame(uint32_t vKeyFrame) { 424 | if (m_KeyFrame != vKeyFrame) { 425 | if (m_TimeLoggingEnabled) { 426 | const auto now = std::chrono::steady_clock::now(); 427 | const auto elapsed = now - m_LastKeyFrameTime; 428 | m_FrameTimes[m_KeyFrame] = std::chrono::duration_cast(elapsed).count() * 1e-3; 429 | if (m_KeyFrameTimeLoggingFunctor) { 430 | m_KeyFrameTimeLoggingFunctor(m_KeyFrame, m_FrameTimes.at(m_KeyFrame)); 431 | } 432 | m_LastKeyFrameTime = now; 433 | } 434 | m_KeyFrame = vKeyFrame; 435 | } 436 | } 437 | 438 | void VoxWriter::AddColor(const uint8_t& r, const uint8_t& g, const uint8_t& b, const uint8_t& a, const uint8_t& index) { 439 | while (colors.size() <= index) 440 | colors.push_back(0); 441 | colors[index] = GetID(r, g, b, a); 442 | } 443 | 444 | void VoxWriter::AddVoxel(const size_t& vX, const size_t& vY, const size_t& vZ, const uint8_t& vColorIndex) { 445 | // cube pos 446 | size_t ox = (size_t)std::floor((double)vX / (double)m_MaxVoxelPerCubeX); 447 | size_t oy = (size_t)std::floor((double)vY / (double)m_MaxVoxelPerCubeY); 448 | size_t oz = (size_t)std::floor((double)vZ / (double)m_MaxVoxelPerCubeZ); 449 | 450 | minCubeX = ct::mini(minCubeX, ox); 451 | minCubeY = ct::mini(minCubeX, oy); 452 | minCubeZ = ct::mini(minCubeX, oz); 453 | 454 | auto cube = m_GetCube(ox, oy, oz); 455 | 456 | m_MergeVoxelInCube(vX, vY, vZ, vColorIndex, cube); 457 | } 458 | 459 | void VoxWriter::SaveToFile(const std::string& vFilePathName) { 460 | if (m_OpenFileForWriting(vFilePathName)) { 461 | int32_t zero = 0; 462 | 463 | fwrite(&ID_VOX, sizeof(int32_t), 1, m_File); 464 | fwrite(&MV_VERSION, sizeof(int32_t), 1, m_File); 465 | 466 | // MAIN CHUNCK 467 | fwrite(&ID_MAIN, sizeof(int32_t), 1, m_File); 468 | fwrite(&zero, sizeof(int32_t), 1, m_File); 469 | 470 | long numBytesMainChunkPos = m_GetFilePos(); 471 | fwrite(&zero, sizeof(int32_t), 1, m_File); 472 | 473 | long headerSize = m_GetFilePos(); 474 | 475 | int count = (int)cubes.size(); 476 | 477 | int nodeIds = 0; 478 | nTRN rootTransform(1); 479 | rootTransform.nodeId = nodeIds; 480 | rootTransform.childNodeId = ++nodeIds; 481 | 482 | nGRP rootGroup(count); 483 | rootGroup.nodeId = nodeIds; // 484 | rootGroup.nodeChildrenNodes = count; 485 | 486 | std::vector shapes; 487 | std::vector shapeTransforms; 488 | size_t cube_idx = 0U; 489 | int32_t model_id = 0U; 490 | for (auto& cube : cubes) { 491 | cube.write(m_File); 492 | 493 | // trans 494 | nTRN trans(1);// not a trans anim so ony one frame 495 | trans.nodeId = ++nodeIds; // 496 | rootGroup.childNodes[cube_idx] = nodeIds; 497 | trans.childNodeId = ++nodeIds; 498 | trans.layerId = 0; 499 | cube.tx = (int)std::floor((cube.tx - minCubeX + 0.5f) * m_MaxVoxelPerCubeX - maxVolume.lowerBound.x - maxVolume.Size().x * 0.5); 500 | cube.ty = (int)std::floor((cube.ty - minCubeY + 0.5f) * m_MaxVoxelPerCubeY - maxVolume.lowerBound.y - maxVolume.Size().y * 0.5); 501 | cube.tz = (int)std::floor((cube.tz - minCubeZ + 0.5f) * m_MaxVoxelPerCubeZ); 502 | trans.frames[0].Add("_t", ct::toStr(cube.tx) + " " + ct::toStr(cube.ty) + " " + ct::toStr(cube.tz)); 503 | shapeTransforms.push_back(trans); 504 | 505 | // shape 506 | nSHP shape((int32_t)cube.xyzis.size()); 507 | shape.nodeId = nodeIds; 508 | size_t model_array_id = 0U; 509 | for (const auto& xyzi : cube.xyzis) { 510 | shape.models[model_array_id].modelId = model_id; 511 | shape.models[model_array_id].modelAttribs.Add("_f", ct::toStr(xyzi.first)); 512 | ++model_array_id; 513 | ++model_id; 514 | } 515 | shapes.push_back(shape); 516 | 517 | ++cube_idx; 518 | } 519 | 520 | rootTransform.write(m_File); 521 | rootGroup.write(m_File); 522 | 523 | // trn & shp 524 | for (int i = 0; i < count; i++) { 525 | shapeTransforms[i].write(m_File); 526 | shapes[i].write(m_File); 527 | } 528 | 529 | // no layr in my cases 530 | 531 | // layr 532 | /*for (int i = 0; i < 8; i++) 533 | { 534 | LAYR layr; 535 | layr.nodeId = i; 536 | layr.nodeAttribs.Add("_name", ct::toStr(i)); 537 | layr.write(m_File); 538 | }*/ 539 | 540 | // RGBA Palette 541 | if (colors.size() > 0) { 542 | RGBA palette; 543 | for (int32_t i = 0; i < 255; i++) { 544 | if (i < (int32_t)colors.size()) { 545 | palette.colors[i] = colors[i]; 546 | } else { 547 | palette.colors[i] = 0; 548 | } 549 | } 550 | 551 | palette.write(m_File); 552 | } 553 | 554 | const long mainChildChunkSize = m_GetFilePos() - headerSize; 555 | m_SetFilePos(numBytesMainChunkPos); 556 | uint32_t size = (uint32_t)mainChildChunkSize; 557 | fwrite(&size, sizeof(uint32_t), 1, m_File); 558 | 559 | m_CloseFile(); 560 | } 561 | } 562 | 563 | const size_t VoxWriter::GetVoxelsCount(const KeyFrame& vKeyFrame) const { 564 | size_t voxel_count = 0U; 565 | for (const auto& cube : cubes) { 566 | if (cube.xyzis.find(vKeyFrame) != cube.xyzis.end()) { 567 | voxel_count += cube.xyzis.at(vKeyFrame).numVoxels; 568 | } 569 | } 570 | return voxel_count; 571 | } 572 | 573 | const size_t VoxWriter::GetVoxelsCount() const { 574 | size_t voxel_count = 0U; 575 | for (const auto& cube : cubes) { 576 | for (auto& key_xyzi : cube.xyzis) { 577 | voxel_count += key_xyzi.second.numVoxels; 578 | } 579 | } 580 | return voxel_count; 581 | } 582 | 583 | void VoxWriter::PrintStats() const { 584 | std::cout << "---- Stats ------------------------------" << std::endl; 585 | std::cout << "Volume : " << maxVolume.Size().x << " x " << maxVolume.Size().y << " x " << maxVolume.Size().z << std::endl; 586 | std::cout << "count cubes : " << cubes.size() << std::endl; 587 | std::map frame_counts; 588 | for (const auto& cube : cubes) { 589 | for (auto& key_xyzi : cube.xyzis) { 590 | frame_counts[key_xyzi.first] += key_xyzi.second.numVoxels; 591 | } 592 | } 593 | size_t voxels_total = 0U; 594 | if (frame_counts.size() > 1U) { 595 | std::cout << "count key frames : " << frame_counts.size() << std::endl; 596 | std::cout << "-----------------------------------------" << std::endl; 597 | for (const auto& frame_count : frame_counts) { 598 | std::cout << " o--\\-> key frame : " << frame_count.first << std::endl; 599 | std::cout << " \\-> voxels count : " << frame_count.second << std::endl; 600 | if (m_FrameTimes.find(frame_count.first) != m_FrameTimes.end()) { 601 | std::cout << " \\-> elapsed time : " << m_FrameTimes.at(frame_count.first) << " secs" << std::endl; 602 | } 603 | voxels_total += frame_count.second; 604 | } 605 | std::cout << "-----------------------------------------" << std::endl; 606 | } else if (!frame_counts.empty()) { 607 | voxels_total = frame_counts.begin()->second; 608 | } 609 | std::cout << "voxels total : " << voxels_total << std::endl; 610 | std::cout << "total elapsed time : " << m_TotalTime << " secs" << std::endl; 611 | std::cout << "-----------------------------------------" << std::endl; 612 | } 613 | 614 | bool VoxWriter::m_OpenFileForWriting(const std::string& vFilePathName) { 615 | #if _MSC_VER 616 | lastError = fopen_s(&m_File, vFilePathName.c_str(), "wb"); 617 | #else 618 | m_File = fopen(vFilePathName.c_str(), "wb"); 619 | lastError = m_File ? 0 : errno; 620 | #endif 621 | if (lastError != 0) 622 | return false; 623 | return true; 624 | } 625 | 626 | void VoxWriter::m_CloseFile() { fclose(m_File); } 627 | 628 | long VoxWriter::m_GetFilePos() const { return ftell(m_File); } 629 | 630 | void VoxWriter::m_SetFilePos(const long& vPos) { 631 | // SEEK_SET Beginning of file 632 | // SEEK_CUR Current position of the file pointer 633 | // SEEK_END End of file 634 | fseek(m_File, vPos, SEEK_SET); 635 | } 636 | 637 | const size_t VoxWriter::m_GetCubeId(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ) { 638 | if (cubesId.find(vX) != cubesId.end()) { 639 | if (cubesId[vX].find(vY) != cubesId[vX].end()) { 640 | if (cubesId[vX][vY].find(vZ) != cubesId[vX][vY].end()) { 641 | return cubesId[vX][vY][vZ]; 642 | } 643 | } 644 | } 645 | 646 | cubesId[vX][vY][vZ] = maxCubeId++; 647 | 648 | return cubesId[vX][vY][vZ]; 649 | } 650 | 651 | // Wrap a position inside a particular cube dimension 652 | inline uint8_t Wrap(size_t v, size_t lim) { 653 | v = v % lim; 654 | if (v < 0) { 655 | v += lim; 656 | } 657 | return (uint8_t)v; 658 | } 659 | 660 | void VoxWriter::m_MergeVoxelInCube(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ, const uint8_t& vColorIndex, VoxCube* vCube) { 661 | maxVolume.Combine(ct::dvec3((double)vX, (double)vY, (double)vZ)); 662 | 663 | bool exist = false; 664 | if (voxelId.find(m_KeyFrame) != voxelId.end()) { 665 | auto& vidk = voxelId.at(m_KeyFrame); 666 | if (vidk.find(vX) != vidk.end()) { 667 | auto& vidkx = vidk.at(vX); 668 | if (vidkx.find(vY) != vidkx.end()) { 669 | auto& vidkxy = vidkx.at(vY); 670 | if (vidkxy.find(vZ) != vidkxy.end()) { 671 | exist = true; 672 | } 673 | } 674 | } 675 | } 676 | 677 | if (!exist) { 678 | auto& xyzi = vCube->xyzis[m_KeyFrame]; 679 | xyzi.voxels.push_back(Wrap(vX, m_MaxVoxelPerCubeX)); // x 680 | xyzi.voxels.push_back(Wrap(vY, m_MaxVoxelPerCubeY)); // y 681 | xyzi.voxels.push_back(Wrap(vZ, m_MaxVoxelPerCubeZ)); // z 682 | 683 | // correspond a la loc de la couleur du voxel en question 684 | voxelId[m_KeyFrame][vX][vY][vZ] = (int)xyzi.voxels.size(); 685 | 686 | xyzi.voxels.push_back(vColorIndex); // color index 687 | } 688 | } 689 | 690 | VoxCube* VoxWriter::m_GetCube(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ) { 691 | const auto& id = m_GetCubeId(vX, vY, vZ); 692 | 693 | if (id == cubes.size()) { 694 | VoxCube c; 695 | 696 | c.id = (int32_t)id; 697 | 698 | c.tx = (int32_t)vX; 699 | c.ty = (int32_t)vY; 700 | c.tz = (int32_t)vZ; 701 | 702 | c.size.sizex = (int32_t)m_MaxVoxelPerCubeX; 703 | c.size.sizey = (int32_t)m_MaxVoxelPerCubeY; 704 | c.size.sizez = (int32_t)m_MaxVoxelPerCubeZ; 705 | 706 | cubes.push_back(c); 707 | } 708 | 709 | if (id < cubes.size()) { 710 | return &cubes[id]; 711 | } 712 | 713 | return nullptr; 714 | } 715 | 716 | } // namespace vox 717 | -------------------------------------------------------------------------------- /src/libs/magicavoxel_file_writer/VoxWriter.h: -------------------------------------------------------------------------------- 1 | // Copyright 2018 Stephane Cuillerdier @Aiekick 2 | 3 | // Permission is hereby granted, free of charge, to any person obtaining a 4 | // copy of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without 6 | // limitation the rights to use, copy, modify, merge, publish, distribute, 7 | // sublicense, and/or sell copies of the Software, and to permit persons to 8 | // whom the Software is furnished to do so, subject to the following conditions: 9 | 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 14 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 15 | // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 16 | // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 17 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | // This File is a helper for write a vox file after 0.99 release to support 21 | // the world mode editor 22 | // just add all color with the color Index with AddColor 23 | // And add all voxels with the method AddVoxel with the voxel in world position, and finally save the model 24 | // that's all, the file was initially created for my Proecedural soft 25 | // "SdfMesher" cf :https://twitter.com/hashtag/sdfmesher?src=hash 26 | // it support just my needs for the moment, but i put here because its a basis for more i thinck 27 | #ifndef __VOX_WRITER_H__ 28 | #define __VOX_WRITER_H__ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | // extracted and adapted from https://github.com/aiekick/cTools (LICENSE MIT) 40 | // for make VoxWriter lib free 41 | #define SAFE_DELETE(a) \ 42 | if (a != 0) \ 43 | delete a, a = 0 44 | 45 | namespace ct { 46 | template 47 | ::std::string toStr(const T& DOUBLE) { 48 | ::std::ostringstream os; 49 | os << DOUBLE; 50 | return os.str(); 51 | } 52 | template 53 | inline T mini(const T& a, T& b) { 54 | return a < b ? a : b; 55 | } 56 | template 57 | inline T maxi(const T& a, T& b) { 58 | return a > b ? a : b; 59 | } 60 | template 61 | inline T clamp(const T& n) { 62 | return n >= T(0) && n <= T(1) ? n : T(n > T(0)); 63 | } // clamp n => 0 to 1 64 | template 65 | inline T clamp(const T& n, const T& b) { 66 | return n >= T(0) && n <= b ? n : T(n > T(0)) * b; 67 | } // clamp n => 0 to b 68 | template 69 | inline T clamp(const T& n, const T& a, const T& b) { 70 | return n >= a && n <= b ? n : n < a ? a : b; 71 | } // clamp n => a to b 72 | 73 | // specialized 74 | struct dvec3 { 75 | double x, y, z; 76 | dvec3() { x = 0.0, y = 0.0, z = 0.0; } 77 | dvec3(const double& vxyz) { x = vxyz, y = vxyz, z = vxyz; } 78 | dvec3(const double& vx, const double& vy, const double& vz) { x = vx, y = vy, z = vz; } 79 | void operator+=(const double v) { 80 | x += v; 81 | y += v; 82 | z += v; 83 | } 84 | void operator-=(const double v) { 85 | x -= v; 86 | y -= v; 87 | z -= v; 88 | } 89 | void operator+=(const dvec3 v) { 90 | x += v.x; 91 | y += v.y; 92 | z += v.z; 93 | } 94 | void operator-=(const dvec3 v) { 95 | x -= v.x; 96 | y -= v.y; 97 | z -= v.z; 98 | } 99 | void operator*=(double v) { 100 | x *= v; 101 | y *= v; 102 | z *= v; 103 | } 104 | void operator/=(double v) { 105 | x /= v; 106 | y /= v; 107 | z /= v; 108 | } 109 | void operator*=(dvec3 v) { 110 | x *= v.x; 111 | y *= v.y; 112 | z *= v.z; 113 | } 114 | void operator/=(dvec3 v) { 115 | x /= v.x; 116 | y /= v.y; 117 | z /= v.z; 118 | } 119 | }; 120 | inline dvec3 operator+(const dvec3& v, const double& f) { return dvec3(v.x + f, v.y + f, v.z + f); } 121 | inline dvec3 operator+(const dvec3& v, dvec3 f) { return dvec3(v.x + f.x, v.y + f.y, v.z + f.z); } 122 | inline dvec3 operator-(const dvec3& v, const double& f) { return dvec3(v.x - f, v.y - f, v.z - f); } 123 | inline dvec3 operator-(const dvec3& v, dvec3 f) { return dvec3(v.x - f.x, v.y - f.y, v.z - f.z); } 124 | inline dvec3 operator*(const dvec3& v, const double& f) { return dvec3(v.x * f, v.y * f, v.z * f); } 125 | inline dvec3 operator*(const dvec3& v, dvec3 f) { return dvec3(v.x * f.x, v.y * f.y, v.z * f.z); } 126 | inline dvec3 operator/(const dvec3& v, const double& f) { return dvec3(v.x / f, v.y / f, v.z / f); } 127 | inline dvec3 operator/(dvec3& v, const double& f) { return dvec3(v.x / f, v.y / f, v.z / f); } 128 | inline dvec3 operator/(const double& f, dvec3& v) { return dvec3(f / v.x, f / v.y, f / v.z); } 129 | inline dvec3 operator/(const dvec3& v, dvec3 f) { return dvec3(v.x / f.x, v.y / f.y, v.z / f.z); } 130 | 131 | // specialized 132 | struct dAABBCC // copy of b2AABB struct 133 | { 134 | dvec3 lowerBound; ///< the lower left vertex 135 | dvec3 upperBound; ///< the upper right vertex 136 | 137 | dAABBCC() : lowerBound(0.0), upperBound(0.0) {} 138 | dAABBCC(dvec3 vlowerBound, dvec3 vUpperBound) { 139 | lowerBound = vlowerBound; 140 | upperBound = vUpperBound; 141 | } 142 | /// Add a vector to this vector. 143 | void operator+=(const dvec3& v) { 144 | lowerBound += v; 145 | upperBound += v; 146 | } 147 | 148 | /// Subtract a vector from this vector. 149 | void operator-=(const dvec3& v) { 150 | lowerBound -= v; 151 | upperBound -= v; 152 | } 153 | 154 | /// Multiply this vector by a scalar. 155 | void operator*=(double a) { 156 | lowerBound *= a; 157 | upperBound *= a; 158 | } 159 | 160 | /// Divide this vector by a scalar. 161 | void operator/=(double a) { 162 | lowerBound /= a; 163 | upperBound /= a; 164 | } 165 | 166 | /// Get the center of the AABB. 167 | const dvec3 GetCenter() const { return (lowerBound + upperBound) * 0.5; } 168 | 169 | /// Get the extents of the AABB (half-widths). 170 | const dvec3 GetExtents() const { return (upperBound - lowerBound) * 0.5; } 171 | 172 | /// Get the perimeter length 173 | double GetPerimeter() const { 174 | double wx = upperBound.x - lowerBound.x; 175 | double wy = upperBound.y - lowerBound.y; 176 | double wz = upperBound.z - lowerBound.z; 177 | return 2.0 * (wx + wy + wz); 178 | } 179 | 180 | /// Combine a point into this one. 181 | void Combine(dvec3 pt) { 182 | lowerBound.x = mini(lowerBound.x, pt.x); 183 | lowerBound.y = mini(lowerBound.y, pt.y); 184 | lowerBound.z = mini(lowerBound.z, pt.z); 185 | upperBound.x = maxi(upperBound.x, pt.x); 186 | upperBound.y = maxi(upperBound.y, pt.y); 187 | upperBound.z = maxi(upperBound.z, pt.z); 188 | } 189 | 190 | /// Does this aabb contain the provided vec2. 191 | bool ContainsPoint(const dvec3& pt) const { 192 | bool result = true; 193 | result = result && lowerBound.x <= pt.x; 194 | result = result && lowerBound.y <= pt.y; 195 | result = result && lowerBound.z <= pt.z; 196 | result = result && pt.x <= upperBound.x; 197 | result = result && pt.y <= upperBound.y; 198 | result = result && pt.z <= upperBound.z; 199 | return result; 200 | } 201 | 202 | bool Intersects(const dAABBCC& other) { 203 | bool result = true; 204 | result = result || lowerBound.x <= other.lowerBound.x; 205 | result = result || lowerBound.y <= other.lowerBound.y; 206 | result = result || lowerBound.z <= other.lowerBound.z; 207 | result = result || other.upperBound.x <= upperBound.x; 208 | result = result || other.upperBound.y <= upperBound.y; 209 | result = result || other.upperBound.z <= upperBound.z; 210 | return result; 211 | } 212 | 213 | const dvec3 Size() const { return dvec3(upperBound - lowerBound); } 214 | }; 215 | 216 | /// Add a float to a dAABBCC. 217 | inline dAABBCC operator+(const dAABBCC& v, float f) { return dAABBCC(v.lowerBound + f, v.upperBound + f); } 218 | 219 | /// Add a dAABBCC to a dAABBCC. 220 | inline dAABBCC operator+(const dAABBCC& v, dAABBCC f) { return dAABBCC(v.lowerBound + f.lowerBound, v.upperBound + f.upperBound); } 221 | 222 | /// Substract a float from a dAABBCC. 223 | inline dAABBCC operator-(const dAABBCC& v, float f) { return dAABBCC(v.lowerBound - f, v.upperBound - f); } 224 | 225 | /// Substract a dAABBCC to a dAABBCC. 226 | inline dAABBCC operator-(const dAABBCC& v, dAABBCC f) { return dAABBCC(v.lowerBound - f.lowerBound, v.upperBound - f.upperBound); } 227 | 228 | /// Multiply a float with a dAABBCC. 229 | inline dAABBCC operator*(const dAABBCC& v, float f) { return dAABBCC(v.lowerBound * f, v.upperBound * f); } 230 | 231 | /// Multiply a dAABBCC with a dAABBCC. 232 | inline dAABBCC operator*(const dAABBCC& v, dAABBCC f) { return dAABBCC(v.lowerBound * f.lowerBound, v.upperBound * f.upperBound); } 233 | 234 | /// Divide a dAABBCC by a float. 235 | inline dAABBCC operator/(const dAABBCC& v, float f) { return dAABBCC(v.lowerBound / f, v.upperBound / f); } 236 | 237 | /// Divide a dAABBCC by a float. 238 | inline dAABBCC operator/(dAABBCC& v, float f) { return dAABBCC(v.lowerBound / f, v.upperBound / f); } 239 | 240 | /// Divide a dAABBCC by a dAABBCC. 241 | inline dAABBCC operator/(const dAABBCC& v, dAABBCC f) { return dAABBCC(v.lowerBound / f.lowerBound, v.upperBound / f.upperBound); } 242 | } // namespace ct 243 | 244 | namespace vox { 245 | 246 | typedef uint32_t KeyFrame; 247 | 248 | typedef size_t CubeX; 249 | typedef size_t CubeY; 250 | typedef size_t CubeZ; 251 | typedef size_t CubeID; 252 | typedef size_t VoxelX; 253 | typedef size_t VoxelY; 254 | typedef size_t VoxelZ; 255 | typedef size_t VoxelID; 256 | typedef int32_t TagID; 257 | typedef int32_t Version; 258 | typedef int32_t ColorID; 259 | 260 | typedef ct::dAABBCC Volume; 261 | 262 | typedef std::function KeyFrameTimeLoggingFunctor; 263 | 264 | inline uint32_t GetMVID(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { return (a) | (b << 8) | (c << 16) | (d << 24); } 265 | 266 | struct DICTstring { 267 | int32_t bufferSize; 268 | std::string buffer; 269 | 270 | DICTstring(); 271 | 272 | void write(FILE* fp); 273 | size_t getSize(); 274 | }; 275 | 276 | struct DICTitem { 277 | DICTstring key; 278 | DICTstring value; 279 | 280 | DICTitem(); 281 | DICTitem(std::string vKey, std::string vValue); 282 | 283 | void write(FILE* fp); 284 | size_t getSize(); 285 | }; 286 | 287 | struct DICT { 288 | int32_t count; 289 | std::vector keys; 290 | 291 | DICT(); 292 | void write(FILE* fp); 293 | size_t getSize(); 294 | void Add(std::string vKey, std::string vValue); 295 | }; 296 | 297 | struct nTRN { 298 | int32_t nodeId; 299 | DICT nodeAttribs; 300 | int32_t childNodeId; 301 | int32_t reservedId; 302 | int32_t layerId; 303 | int32_t numFrames; 304 | std::vector frames; 305 | 306 | nTRN(int32_t countFrames); 307 | 308 | void write(FILE* fp); 309 | size_t getSize(); 310 | }; 311 | 312 | struct nGRP { 313 | int32_t nodeId; 314 | DICT nodeAttribs; 315 | int32_t nodeChildrenNodes; 316 | std::vector childNodes; 317 | 318 | nGRP(int32_t vCount); 319 | 320 | void write(FILE* fp); 321 | size_t getSize(); 322 | }; 323 | 324 | struct MODEL { 325 | int32_t modelId; 326 | DICT modelAttribs; 327 | 328 | MODEL(); 329 | 330 | void write(FILE* fp); 331 | size_t getSize(); 332 | }; 333 | 334 | struct nSHP { 335 | int32_t nodeId; 336 | DICT nodeAttribs; 337 | int32_t numModels; 338 | std::vector models; 339 | 340 | nSHP(int32_t vCount); 341 | 342 | void write(FILE* fp); 343 | size_t getSize(); 344 | }; 345 | 346 | struct LAYR { 347 | int32_t nodeId; 348 | DICT nodeAttribs; 349 | int32_t reservedId; 350 | 351 | LAYR(); 352 | void write(FILE* fp); 353 | size_t getSize(); 354 | }; 355 | 356 | struct SIZE { 357 | int32_t sizex; 358 | int32_t sizey; 359 | int32_t sizez; 360 | 361 | SIZE(); 362 | 363 | void write(FILE* fp); 364 | size_t getSize(); 365 | }; 366 | 367 | struct XYZI { 368 | int32_t numVoxels; 369 | std::vector voxels; 370 | 371 | XYZI(); 372 | void write(FILE* fp); 373 | size_t getSize(); 374 | }; 375 | 376 | struct RGBA { 377 | int32_t colors[256]; 378 | 379 | RGBA(); 380 | void write(FILE* fp); 381 | size_t getSize(); 382 | }; 383 | 384 | struct VoxCube { 385 | int id; 386 | 387 | // translate 388 | int tx; 389 | int ty; 390 | int tz; 391 | 392 | SIZE size; 393 | std::map xyzis; 394 | 395 | VoxCube(); 396 | 397 | void write(FILE* fp); 398 | }; 399 | 400 | 401 | class VoxWriter { 402 | public: 403 | static VoxWriter* Create(const std::string& vFilePathName, const uint32_t& vLimitX, const uint32_t& vLimitY, const uint32_t& vLimitZ, int32_t* vError); 404 | static std::string GetErrnoMsg(const int32_t& vError); 405 | 406 | private: 407 | static const uint32_t GetID(const uint8_t& a, const uint8_t& b, const uint8_t& c, const uint8_t& d) { return (a) | (b << 8) | (c << 16) | (d << 24); } 408 | 409 | private: 410 | Version MV_VERSION = 150; // the old version of MV not open another file than if version is 150 (answer by @ephtracy) 411 | 412 | TagID ID_VOX = GetID('V', 'O', 'X', ' '); 413 | TagID ID_PACK = GetID('P', 'A', 'C', 'K'); 414 | TagID ID_MAIN = GetID('M', 'A', 'I', 'N'); 415 | TagID ID_SIZE = GetID('S', 'I', 'Z', 'E'); 416 | TagID ID_XYZI = GetID('X', 'Y', 'Z', 'I'); 417 | TagID ID_RGBA = GetID('R', 'G', 'B', 'A'); 418 | TagID ID_NTRN = GetID('n', 'T', 'R', 'N'); 419 | TagID ID_NGRP = GetID('n', 'G', 'R', 'P'); 420 | TagID ID_NSHP = GetID('n', 'S', 'H', 'P'); 421 | 422 | VoxelX m_MaxVoxelPerCubeX = 0; 423 | VoxelY m_MaxVoxelPerCubeY = 0; 424 | VoxelZ m_MaxVoxelPerCubeZ = 0; 425 | 426 | CubeID maxCubeId = 0; 427 | CubeX minCubeX = (CubeX)1e7; 428 | CubeY minCubeY = (CubeY)1e7; 429 | CubeZ minCubeZ = (CubeZ)1e7; 430 | 431 | FILE* m_File = nullptr; 432 | 433 | Volume maxVolume = Volume(1e7, -1e7); 434 | 435 | KeyFrame m_KeyFrame = 0; 436 | 437 | std::vector colors; 438 | 439 | std::vector cubes; 440 | 441 | std::map>> cubesId; 442 | std::map>>> voxelId; 443 | 444 | int32_t lastError = 0; 445 | 446 | bool m_TimeLoggingEnabled = false; // for log elapsed time between key frames and total 447 | 448 | std::chrono::steady_clock::time_point m_StartTime; 449 | std::chrono::steady_clock::time_point m_LastKeyFrameTime; 450 | std::map m_FrameTimes; 451 | double m_TotalTime; 452 | 453 | KeyFrameTimeLoggingFunctor m_KeyFrameTimeLoggingFunctor; 454 | 455 | public: 456 | VoxWriter(const VoxelX& vMaxVoxelPerCubeX = 126, const VoxelY& vMaxVoxelPerCubeY = 126, const VoxelZ& vMaxVoxelPerCubeZ = 126); 457 | ~VoxWriter(); 458 | 459 | int32_t IsOk(const std::string& vFilePathName); 460 | 461 | void ClearVoxels(); 462 | void ClearColors(); 463 | 464 | void StartTimeLogging(); 465 | void StopTimeLogging(); 466 | void SetKeyFrameTimeLoggingFunctor(const KeyFrameTimeLoggingFunctor& vKeyFrameTimeLoggingFunctor); 467 | void SetKeyFrame(uint32_t vKeyFrame); 468 | void AddColor(const uint8_t& r, const uint8_t& g, const uint8_t& b, const uint8_t& a, const uint8_t& index); 469 | void AddVoxel(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ, const uint8_t& vColorIndex); 470 | void SaveToFile(const std::string& vFilePathName); 471 | 472 | const size_t GetVoxelsCount(const KeyFrame& vKeyFrame) const; 473 | const size_t GetVoxelsCount() const; 474 | void PrintStats() const; 475 | 476 | private: 477 | bool m_OpenFileForWriting(const std::string& vFilePathName); 478 | void m_CloseFile(); 479 | long m_GetFilePos() const; 480 | void m_SetFilePos(const long& vPos); 481 | const size_t m_GetCubeId(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ); 482 | VoxCube* m_GetCube(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ); 483 | void m_MergeVoxelInCube(const VoxelX& vX, const VoxelY& vY, const VoxelZ& vZ, const uint8_t& vColorIndex, VoxCube* vCube); 484 | }; 485 | } // namespace vox 486 | #endif //__VOX_WRITER_H__ 487 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 2 | #define WINDOWS_LEAN_AND_MEAN // Please, not too much windows shenanigans 3 | #endif 4 | 5 | // Standard libs 6 | #include 7 | #include 8 | 9 | // Trimesh for model importing 10 | #include "TriMesh.h" 11 | // Util 12 | #include "util.h" 13 | #include "util_io.h" 14 | #include "util_cuda.h" 15 | #include "timer.h" 16 | // CPU voxelizer fallback 17 | #include "cpu_voxelizer.h" 18 | 19 | using namespace std; 20 | string version_number = "v0.6"; 21 | 22 | // Forward declaration of CUDA functions 23 | void voxelize(const voxinfo & v, float* triangle_data, unsigned int* vtable, bool morton_code); 24 | void voxelize_solid(const voxinfo& v, float* triangle_data, unsigned int* vtable, bool morton_code); 25 | 26 | // Output formats 27 | enum class OutputFormat { output_binvox = 0, output_morton = 1, output_obj_points = 2, output_obj_cubes = 3, output_vox = 4}; 28 | char *OutputFormats[] = { "binvox file", "morton encoded blob", "obj file (pointcloud)", "obj file (cubes)", "magicavoxel file"}; 29 | 30 | // Default options 31 | string filename = ""; 32 | string filename_base = ""; 33 | OutputFormat outputformat = OutputFormat::output_vox; 34 | unsigned int gridsize = 256; 35 | bool forceCPU = false; 36 | bool solidVoxelization = false; 37 | 38 | void printHeader(){ 39 | fprintf(stdout, "## CUDA VOXELIZER \n"); 40 | cout << "CUDA Voxelizer " << version_number << " by Jeroen Baert" << endl; 41 | cout << "https://github.com/Forceflow/cuda_voxelizer - mail (at) jeroen-baert (dot) be" << endl; 42 | } 43 | 44 | void printExample() { 45 | cout << "Example: cuda_voxelizer -f /home/jeroen/bunny.ply -s 512" << endl; 46 | } 47 | 48 | void printHelp(){ 49 | fprintf(stdout, "\n## HELP \n"); 50 | cout << "Program options: " << endl << endl; 51 | cout << " -f (required)" << endl; 52 | cout << " -s 512, 1024, ... (default: 256)>" << endl; 53 | cout << " -o " << endl; 54 | cout << " -cpu : Force CPU-based voxelization (slow, but works if no compatible GPU can be found)" << endl; 55 | cout << " -solid : Force solid voxelization (experimental, needs watertight model)" << endl << endl; 56 | printExample(); 57 | cout << endl; 58 | } 59 | 60 | // METHOD 1: Helper function to transfer triangles to automatically managed CUDA memory ( > CUDA 7.x) 61 | float* meshToGPU_managed(const trimesh::TriMesh *mesh) { 62 | Timer t; t.start(); 63 | size_t n_floats = sizeof(float) * 9 * (mesh->faces.size()); 64 | float* device_triangles = 0; 65 | fprintf(stdout, "[Mesh] Allocating %s of CUDA-managed UNIFIED memory for triangle data \n", (readableSize(n_floats)).c_str()); 66 | checkCudaErrors(cudaMallocManaged((void**) &device_triangles, n_floats)); // managed memory 67 | fprintf(stdout, "[Mesh] Copy %llu triangles to CUDA-managed UNIFIED memory \n", (size_t)(mesh->faces.size())); 68 | for (size_t i = 0; i < mesh->faces.size(); i++) { 69 | float3 v0 = trimesh_to_float3(mesh->vertices[mesh->faces[i][0]]); 70 | float3 v1 = trimesh_to_float3(mesh->vertices[mesh->faces[i][1]]); 71 | float3 v2 = trimesh_to_float3(mesh->vertices[mesh->faces[i][2]]); 72 | size_t j = i * 9; 73 | // Memcpy assuming the floats are laid out next to eachother 74 | memcpy((device_triangles)+j, &v0.x, 3*sizeof(float)); 75 | memcpy((device_triangles)+j+3, &v1.x, 3*sizeof(float)); 76 | memcpy((device_triangles)+j+6, &v2.x, 3*sizeof(float)); 77 | } 78 | t.stop();fprintf(stdout, "[Perf] Mesh transfer time to GPU: %.1f ms \n", t.elapsed_time_milliseconds); 79 | return device_triangles; 80 | } 81 | 82 | // METHOD 2: Helper function to transfer triangles to old-style, self-managed CUDA memory ( < CUDA 7.x ) 83 | // Leaving this here for reference, the function above should be faster and better managed on all versions CUDA 7+ 84 | // 85 | //float* meshToGPU(const trimesh::TriMesh *mesh){ 86 | // size_t n_floats = sizeof(float) * 9 * (mesh->faces.size()); 87 | // float* pagelocktriangles; 88 | // fprintf(stdout, "Allocating %llu kb of page-locked HOST memory \n", (size_t)(n_floats / 1024.0f)); 89 | // checkCudaErrors(cudaHostAlloc((void**)&pagelocktriangles, n_floats, cudaHostAllocDefault)); // pinned memory to easily copy from 90 | // fprintf(stdout, "Copy %llu triangles to page-locked HOST memory \n", (size_t)(mesh->faces.size())); 91 | // for (size_t i = 0; i < mesh->faces.size(); i++){ 92 | // glm::vec3 v0 = trimesh_to_glm(mesh->vertices[mesh->faces[i][0]]); 93 | // glm::vec3 v1 = trimesh_to_glm(mesh->vertices[mesh->faces[i][1]]); 94 | // glm::vec3 v2 = trimesh_to_glm(mesh->vertices[mesh->faces[i][2]]); 95 | // size_t j = i * 9; 96 | // memcpy((pagelocktriangles)+j, glm::value_ptr(v0), sizeof(glm::vec3)); 97 | // memcpy((pagelocktriangles)+j+3, glm::value_ptr(v1), sizeof(glm::vec3)); 98 | // memcpy((pagelocktriangles)+j+6, glm::value_ptr(v2), sizeof(glm::vec3)); 99 | // } 100 | // float* device_triangles; 101 | // fprintf(stdout, "Allocating %llu kb of DEVICE memory \n", (size_t)(n_floats / 1024.0f)); 102 | // checkCudaErrors(cudaMalloc((void **) &device_triangles, n_floats)); 103 | // fprintf(stdout, "Copy %llu triangles from page-locked HOST memory to DEVICE memory \n", (size_t)(mesh->faces.size())); 104 | // checkCudaErrors(cudaMemcpy((void *) device_triangles, (void*) pagelocktriangles, n_floats, cudaMemcpyDefault)); 105 | // return device_triangles; 106 | //} 107 | 108 | // Parse the program parameters and set them as global variables 109 | void parseProgramParameters(int argc, char* argv[]){ 110 | if(argc<2){ // not enough arguments 111 | fprintf(stdout, "Not enough program parameters. \n \n"); 112 | printHelp(); 113 | exit(0); 114 | } 115 | bool filegiven = false; 116 | for (int i = 1; i < argc; i++) { 117 | if (string(argv[i]) == "-f") { 118 | filename = argv[i + 1]; 119 | filename_base = filename.substr(0, filename.find_last_of(".")); 120 | filegiven = true; 121 | if (!file_exists(filename)) {fprintf(stdout, "[Err] File does not exist / cannot access: %s \n", filename.c_str());exit(1);} 122 | i++; 123 | } 124 | else if (string(argv[i]) == "-s") { 125 | gridsize = atoi(argv[i + 1]); 126 | i++; 127 | } else if (string(argv[i]) == "-h") { 128 | printHelp(); exit(0); 129 | } else if (string(argv[i]) == "-o") { 130 | string output = (argv[i + 1]); 131 | transform(output.begin(), output.end(), output.begin(), ::tolower); // to lowercase 132 | if (output == "binvox"){outputformat = OutputFormat::output_binvox;} 133 | else if (output == "morton"){outputformat = OutputFormat::output_morton;} 134 | else if (output == "obj"){outputformat = OutputFormat::output_obj_cubes;} 135 | else if (output == "obj_points") { outputformat = OutputFormat::output_obj_points; } 136 | else if (output == "vox") { outputformat = OutputFormat::output_vox; } 137 | else {fprintf(stdout, "[Err] Unrecognized output format: %s, valid options are binvox (default), morton, obj or obj_points \n", output.c_str());exit(1);} 138 | } 139 | else if (string(argv[i]) == "-cpu") { 140 | forceCPU = true; 141 | } 142 | else if (string(argv[i])=="-solid"){ 143 | solidVoxelization = true; 144 | } 145 | } 146 | if (!filegiven) { 147 | fprintf(stdout, "[Err] You didn't specify a file using -f (path). This is required. Exiting. \n"); 148 | printExample(); exit(1); 149 | } 150 | fprintf(stdout, "[Info] Filename: %s \n", filename.c_str()); 151 | fprintf(stdout, "[Info] Grid size: %i \n", gridsize); 152 | fprintf(stdout, "[Info] Output format: %s \n", OutputFormats[int(outputformat)]); 153 | fprintf(stdout, "[Info] Using CPU-based voxelization: %s (default: No)\n", forceCPU ? "Yes" : "No"); 154 | fprintf(stdout, "[Info] Using Solid Voxelization: %s (default: No)\n", solidVoxelization ? "Yes" : "No"); 155 | } 156 | 157 | int main(int argc, char* argv[]) { 158 | // PRINT PROGRAM INFO 159 | Timer t; t.start(); 160 | printHeader(); 161 | 162 | // PARSE PROGRAM PARAMETERS 163 | fprintf(stdout, "\n## PROGRAM PARAMETERS \n"); 164 | parseProgramParameters(argc, argv); 165 | fflush(stdout); 166 | trimesh::TriMesh::set_verbose(false); 167 | 168 | // READ THE MESH 169 | fprintf(stdout, "\n## READ MESH \n"); 170 | #ifdef _DEBUG 171 | trimesh::TriMesh::set_verbose(true); 172 | #endif 173 | fprintf(stdout, "[I/O] Reading mesh from %s \n", filename.c_str()); 174 | trimesh::TriMesh* themesh = trimesh::TriMesh::read(filename.c_str()); 175 | themesh->need_faces(); // Trimesh: Unpack (possible) triangle strips so we have faces for sure 176 | fprintf(stdout, "[Mesh] Number of triangles: %zu \n", themesh->faces.size()); 177 | fprintf(stdout, "[Mesh] Number of vertices: %zu \n", themesh->vertices.size()); 178 | fprintf(stdout, "[Mesh] Computing bbox \n"); 179 | themesh->need_bbox(); // Trimesh: Compute the bounding box (in model coordinates) 180 | 181 | // COMPUTE BOUNDING BOX AND VOXELISATION PARAMETERS 182 | fprintf(stdout, "\n## VOXELISATION SETUP \n"); 183 | // Initialize our own AABox, pad it so it's a cube 184 | AABox bbox_mesh_cubed = createMeshBBCube(AABox(trimesh_to_float3(themesh->bbox.min), trimesh_to_float3(themesh->bbox.max))); 185 | // Create voxinfo struct and print all info 186 | voxinfo voxelization_info(bbox_mesh_cubed, make_uint3(gridsize, gridsize, gridsize), themesh->faces.size()); 187 | voxelization_info.print(); 188 | // Compute space needed to hold voxel table (1 voxel / bit) 189 | unsigned int* vtable = 0; // Both voxelization paths (GPU and CPU) need this 190 | size_t vtable_size = static_cast(ceil(static_cast(voxelization_info.gridsize.x) * static_cast(voxelization_info.gridsize.y) * static_cast(voxelization_info.gridsize.z) / 32.0f) * 4); 191 | 192 | // CUDA initialization 193 | bool cuda_ok = false; 194 | if (!forceCPU) 195 | { 196 | // SECTION: Try to figure out if we have a CUDA-enabled GPU 197 | fprintf(stdout, "\n## CUDA INIT \n"); 198 | cuda_ok = initCuda(); 199 | if (! cuda_ok ) fprintf(stdout, "[Info] CUDA GPU not found\n"); 200 | } 201 | 202 | // SECTION: The actual voxelization 203 | if (cuda_ok && !forceCPU) { 204 | // GPU voxelization 205 | fprintf(stdout, "\n## TRIANGLES TO GPU TRANSFER \n"); 206 | 207 | float* device_triangles; 208 | 209 | // Transfer triangle data to GPU 210 | device_triangles = meshToGPU_managed(themesh); 211 | 212 | // Allocate memory for voxel grid 213 | fprintf(stdout, "[Voxel Grid] Allocating %s of CUDA-managed UNIFIED memory for Voxel Grid\n", readableSize(vtable_size).c_str()); 214 | checkCudaErrors(cudaMallocManaged((void**)&vtable, vtable_size)); 215 | 216 | fprintf(stdout, "\n## GPU VOXELISATION \n"); 217 | if (solidVoxelization){ 218 | voxelize_solid(voxelization_info, device_triangles, vtable, (outputformat == OutputFormat::output_morton)); 219 | } 220 | else{ 221 | voxelize(voxelization_info, device_triangles, vtable, (outputformat == OutputFormat::output_morton)); 222 | } 223 | } else { 224 | // CPU VOXELIZATION FALLBACK 225 | fprintf(stdout, "\n## CPU VOXELISATION \n"); 226 | if (!forceCPU) { fprintf(stdout, "[Info] No suitable CUDA GPU was found: Falling back to CPU voxelization\n"); } 227 | else { fprintf(stdout, "[Info] Doing CPU voxelization (forced using command-line switch -cpu)\n"); } 228 | // allocate zero-filled array 229 | vtable = (unsigned int*) calloc(1, vtable_size); 230 | if (!solidVoxelization) { 231 | cpu_voxelizer::cpu_voxelize_mesh(voxelization_info, themesh, vtable, (outputformat == OutputFormat::output_morton)); 232 | } 233 | else { 234 | cpu_voxelizer::cpu_voxelize_mesh_solid(voxelization_info, themesh, vtable, (outputformat == OutputFormat::output_morton)); 235 | } 236 | } 237 | 238 | //// DEBUG: print vtable 239 | //for (int i = 0; i < vtable_size; i++) { 240 | // char* vtable_p = (char*)vtable; 241 | // cout << (int) vtable_p[i] << endl; 242 | //} 243 | 244 | fprintf(stdout, "\n## FILE OUTPUT \n"); 245 | if (outputformat == OutputFormat::output_morton){ 246 | write_binary(vtable, vtable_size, filename); 247 | } else if (outputformat == OutputFormat::output_binvox){ 248 | write_binvox(vtable, voxelization_info, filename); 249 | } 250 | else if (outputformat == OutputFormat::output_obj_points) { 251 | write_obj_pointcloud(vtable, voxelization_info, filename); 252 | } 253 | else if (outputformat == OutputFormat::output_obj_cubes) { 254 | write_obj_cubes(vtable, voxelization_info, filename); 255 | } 256 | else if (outputformat == OutputFormat::output_vox) { 257 | write_vox(vtable, voxelization_info, filename); 258 | } 259 | 260 | fprintf(stdout, "\n## STATS \n"); 261 | t.stop(); fprintf(stdout, "[Perf] Total runtime: %.1f ms \n", t.elapsed_time_milliseconds); 262 | } -------------------------------------------------------------------------------- /src/morton_LUTs.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | // LUT tables to copy to GPU memory for quick morton decode / encode 5 | static const uint32_t host_morton256_x[256] = 6 | { 7 | 0x00000000, 8 | 0x00000001, 0x00000008, 0x00000009, 0x00000040, 0x00000041, 0x00000048, 0x00000049, 0x00000200, 9 | 0x00000201, 0x00000208, 0x00000209, 0x00000240, 0x00000241, 0x00000248, 0x00000249, 0x00001000, 10 | 0x00001001, 0x00001008, 0x00001009, 0x00001040, 0x00001041, 0x00001048, 0x00001049, 0x00001200, 11 | 0x00001201, 0x00001208, 0x00001209, 0x00001240, 0x00001241, 0x00001248, 0x00001249, 0x00008000, 12 | 0x00008001, 0x00008008, 0x00008009, 0x00008040, 0x00008041, 0x00008048, 0x00008049, 0x00008200, 13 | 0x00008201, 0x00008208, 0x00008209, 0x00008240, 0x00008241, 0x00008248, 0x00008249, 0x00009000, 14 | 0x00009001, 0x00009008, 0x00009009, 0x00009040, 0x00009041, 0x00009048, 0x00009049, 0x00009200, 15 | 0x00009201, 0x00009208, 0x00009209, 0x00009240, 0x00009241, 0x00009248, 0x00009249, 0x00040000, 16 | 0x00040001, 0x00040008, 0x00040009, 0x00040040, 0x00040041, 0x00040048, 0x00040049, 0x00040200, 17 | 0x00040201, 0x00040208, 0x00040209, 0x00040240, 0x00040241, 0x00040248, 0x00040249, 0x00041000, 18 | 0x00041001, 0x00041008, 0x00041009, 0x00041040, 0x00041041, 0x00041048, 0x00041049, 0x00041200, 19 | 0x00041201, 0x00041208, 0x00041209, 0x00041240, 0x00041241, 0x00041248, 0x00041249, 0x00048000, 20 | 0x00048001, 0x00048008, 0x00048009, 0x00048040, 0x00048041, 0x00048048, 0x00048049, 0x00048200, 21 | 0x00048201, 0x00048208, 0x00048209, 0x00048240, 0x00048241, 0x00048248, 0x00048249, 0x00049000, 22 | 0x00049001, 0x00049008, 0x00049009, 0x00049040, 0x00049041, 0x00049048, 0x00049049, 0x00049200, 23 | 0x00049201, 0x00049208, 0x00049209, 0x00049240, 0x00049241, 0x00049248, 0x00049249, 0x00200000, 24 | 0x00200001, 0x00200008, 0x00200009, 0x00200040, 0x00200041, 0x00200048, 0x00200049, 0x00200200, 25 | 0x00200201, 0x00200208, 0x00200209, 0x00200240, 0x00200241, 0x00200248, 0x00200249, 0x00201000, 26 | 0x00201001, 0x00201008, 0x00201009, 0x00201040, 0x00201041, 0x00201048, 0x00201049, 0x00201200, 27 | 0x00201201, 0x00201208, 0x00201209, 0x00201240, 0x00201241, 0x00201248, 0x00201249, 0x00208000, 28 | 0x00208001, 0x00208008, 0x00208009, 0x00208040, 0x00208041, 0x00208048, 0x00208049, 0x00208200, 29 | 0x00208201, 0x00208208, 0x00208209, 0x00208240, 0x00208241, 0x00208248, 0x00208249, 0x00209000, 30 | 0x00209001, 0x00209008, 0x00209009, 0x00209040, 0x00209041, 0x00209048, 0x00209049, 0x00209200, 31 | 0x00209201, 0x00209208, 0x00209209, 0x00209240, 0x00209241, 0x00209248, 0x00209249, 0x00240000, 32 | 0x00240001, 0x00240008, 0x00240009, 0x00240040, 0x00240041, 0x00240048, 0x00240049, 0x00240200, 33 | 0x00240201, 0x00240208, 0x00240209, 0x00240240, 0x00240241, 0x00240248, 0x00240249, 0x00241000, 34 | 0x00241001, 0x00241008, 0x00241009, 0x00241040, 0x00241041, 0x00241048, 0x00241049, 0x00241200, 35 | 0x00241201, 0x00241208, 0x00241209, 0x00241240, 0x00241241, 0x00241248, 0x00241249, 0x00248000, 36 | 0x00248001, 0x00248008, 0x00248009, 0x00248040, 0x00248041, 0x00248048, 0x00248049, 0x00248200, 37 | 0x00248201, 0x00248208, 0x00248209, 0x00248240, 0x00248241, 0x00248248, 0x00248249, 0x00249000, 38 | 0x00249001, 0x00249008, 0x00249009, 0x00249040, 0x00249041, 0x00249048, 0x00249049, 0x00249200, 39 | 0x00249201, 0x00249208, 0x00249209, 0x00249240, 0x00249241, 0x00249248, 0x00249249 40 | }; 41 | 42 | static const uint32_t host_morton256_y[256] = { 43 | 0x00000000, 44 | 0x00000002, 0x00000010, 0x00000012, 0x00000080, 0x00000082, 0x00000090, 0x00000092, 0x00000400, 45 | 0x00000402, 0x00000410, 0x00000412, 0x00000480, 0x00000482, 0x00000490, 0x00000492, 0x00002000, 46 | 0x00002002, 0x00002010, 0x00002012, 0x00002080, 0x00002082, 0x00002090, 0x00002092, 0x00002400, 47 | 0x00002402, 0x00002410, 0x00002412, 0x00002480, 0x00002482, 0x00002490, 0x00002492, 0x00010000, 48 | 0x00010002, 0x00010010, 0x00010012, 0x00010080, 0x00010082, 0x00010090, 0x00010092, 0x00010400, 49 | 0x00010402, 0x00010410, 0x00010412, 0x00010480, 0x00010482, 0x00010490, 0x00010492, 0x00012000, 50 | 0x00012002, 0x00012010, 0x00012012, 0x00012080, 0x00012082, 0x00012090, 0x00012092, 0x00012400, 51 | 0x00012402, 0x00012410, 0x00012412, 0x00012480, 0x00012482, 0x00012490, 0x00012492, 0x00080000, 52 | 0x00080002, 0x00080010, 0x00080012, 0x00080080, 0x00080082, 0x00080090, 0x00080092, 0x00080400, 53 | 0x00080402, 0x00080410, 0x00080412, 0x00080480, 0x00080482, 0x00080490, 0x00080492, 0x00082000, 54 | 0x00082002, 0x00082010, 0x00082012, 0x00082080, 0x00082082, 0x00082090, 0x00082092, 0x00082400, 55 | 0x00082402, 0x00082410, 0x00082412, 0x00082480, 0x00082482, 0x00082490, 0x00082492, 0x00090000, 56 | 0x00090002, 0x00090010, 0x00090012, 0x00090080, 0x00090082, 0x00090090, 0x00090092, 0x00090400, 57 | 0x00090402, 0x00090410, 0x00090412, 0x00090480, 0x00090482, 0x00090490, 0x00090492, 0x00092000, 58 | 0x00092002, 0x00092010, 0x00092012, 0x00092080, 0x00092082, 0x00092090, 0x00092092, 0x00092400, 59 | 0x00092402, 0x00092410, 0x00092412, 0x00092480, 0x00092482, 0x00092490, 0x00092492, 0x00400000, 60 | 0x00400002, 0x00400010, 0x00400012, 0x00400080, 0x00400082, 0x00400090, 0x00400092, 0x00400400, 61 | 0x00400402, 0x00400410, 0x00400412, 0x00400480, 0x00400482, 0x00400490, 0x00400492, 0x00402000, 62 | 0x00402002, 0x00402010, 0x00402012, 0x00402080, 0x00402082, 0x00402090, 0x00402092, 0x00402400, 63 | 0x00402402, 0x00402410, 0x00402412, 0x00402480, 0x00402482, 0x00402490, 0x00402492, 0x00410000, 64 | 0x00410002, 0x00410010, 0x00410012, 0x00410080, 0x00410082, 0x00410090, 0x00410092, 0x00410400, 65 | 0x00410402, 0x00410410, 0x00410412, 0x00410480, 0x00410482, 0x00410490, 0x00410492, 0x00412000, 66 | 0x00412002, 0x00412010, 0x00412012, 0x00412080, 0x00412082, 0x00412090, 0x00412092, 0x00412400, 67 | 0x00412402, 0x00412410, 0x00412412, 0x00412480, 0x00412482, 0x00412490, 0x00412492, 0x00480000, 68 | 0x00480002, 0x00480010, 0x00480012, 0x00480080, 0x00480082, 0x00480090, 0x00480092, 0x00480400, 69 | 0x00480402, 0x00480410, 0x00480412, 0x00480480, 0x00480482, 0x00480490, 0x00480492, 0x00482000, 70 | 0x00482002, 0x00482010, 0x00482012, 0x00482080, 0x00482082, 0x00482090, 0x00482092, 0x00482400, 71 | 0x00482402, 0x00482410, 0x00482412, 0x00482480, 0x00482482, 0x00482490, 0x00482492, 0x00490000, 72 | 0x00490002, 0x00490010, 0x00490012, 0x00490080, 0x00490082, 0x00490090, 0x00490092, 0x00490400, 73 | 0x00490402, 0x00490410, 0x00490412, 0x00490480, 0x00490482, 0x00490490, 0x00490492, 0x00492000, 74 | 0x00492002, 0x00492010, 0x00492012, 0x00492080, 0x00492082, 0x00492090, 0x00492092, 0x00492400, 75 | 0x00492402, 0x00492410, 0x00492412, 0x00492480, 0x00492482, 0x00492490, 0x00492492 76 | }; 77 | 78 | static const uint32_t host_morton256_z[256] = { 79 | 0x00000000, 80 | 0x00000004, 0x00000020, 0x00000024, 0x00000100, 0x00000104, 0x00000120, 0x00000124, 0x00000800, 81 | 0x00000804, 0x00000820, 0x00000824, 0x00000900, 0x00000904, 0x00000920, 0x00000924, 0x00004000, 82 | 0x00004004, 0x00004020, 0x00004024, 0x00004100, 0x00004104, 0x00004120, 0x00004124, 0x00004800, 83 | 0x00004804, 0x00004820, 0x00004824, 0x00004900, 0x00004904, 0x00004920, 0x00004924, 0x00020000, 84 | 0x00020004, 0x00020020, 0x00020024, 0x00020100, 0x00020104, 0x00020120, 0x00020124, 0x00020800, 85 | 0x00020804, 0x00020820, 0x00020824, 0x00020900, 0x00020904, 0x00020920, 0x00020924, 0x00024000, 86 | 0x00024004, 0x00024020, 0x00024024, 0x00024100, 0x00024104, 0x00024120, 0x00024124, 0x00024800, 87 | 0x00024804, 0x00024820, 0x00024824, 0x00024900, 0x00024904, 0x00024920, 0x00024924, 0x00100000, 88 | 0x00100004, 0x00100020, 0x00100024, 0x00100100, 0x00100104, 0x00100120, 0x00100124, 0x00100800, 89 | 0x00100804, 0x00100820, 0x00100824, 0x00100900, 0x00100904, 0x00100920, 0x00100924, 0x00104000, 90 | 0x00104004, 0x00104020, 0x00104024, 0x00104100, 0x00104104, 0x00104120, 0x00104124, 0x00104800, 91 | 0x00104804, 0x00104820, 0x00104824, 0x00104900, 0x00104904, 0x00104920, 0x00104924, 0x00120000, 92 | 0x00120004, 0x00120020, 0x00120024, 0x00120100, 0x00120104, 0x00120120, 0x00120124, 0x00120800, 93 | 0x00120804, 0x00120820, 0x00120824, 0x00120900, 0x00120904, 0x00120920, 0x00120924, 0x00124000, 94 | 0x00124004, 0x00124020, 0x00124024, 0x00124100, 0x00124104, 0x00124120, 0x00124124, 0x00124800, 95 | 0x00124804, 0x00124820, 0x00124824, 0x00124900, 0x00124904, 0x00124920, 0x00124924, 0x00800000, 96 | 0x00800004, 0x00800020, 0x00800024, 0x00800100, 0x00800104, 0x00800120, 0x00800124, 0x00800800, 97 | 0x00800804, 0x00800820, 0x00800824, 0x00800900, 0x00800904, 0x00800920, 0x00800924, 0x00804000, 98 | 0x00804004, 0x00804020, 0x00804024, 0x00804100, 0x00804104, 0x00804120, 0x00804124, 0x00804800, 99 | 0x00804804, 0x00804820, 0x00804824, 0x00804900, 0x00804904, 0x00804920, 0x00804924, 0x00820000, 100 | 0x00820004, 0x00820020, 0x00820024, 0x00820100, 0x00820104, 0x00820120, 0x00820124, 0x00820800, 101 | 0x00820804, 0x00820820, 0x00820824, 0x00820900, 0x00820904, 0x00820920, 0x00820924, 0x00824000, 102 | 0x00824004, 0x00824020, 0x00824024, 0x00824100, 0x00824104, 0x00824120, 0x00824124, 0x00824800, 103 | 0x00824804, 0x00824820, 0x00824824, 0x00824900, 0x00824904, 0x00824920, 0x00824924, 0x00900000, 104 | 0x00900004, 0x00900020, 0x00900024, 0x00900100, 0x00900104, 0x00900120, 0x00900124, 0x00900800, 105 | 0x00900804, 0x00900820, 0x00900824, 0x00900900, 0x00900904, 0x00900920, 0x00900924, 0x00904000, 106 | 0x00904004, 0x00904020, 0x00904024, 0x00904100, 0x00904104, 0x00904120, 0x00904124, 0x00904800, 107 | 0x00904804, 0x00904820, 0x00904824, 0x00904900, 0x00904904, 0x00904920, 0x00904924, 0x00920000, 108 | 0x00920004, 0x00920020, 0x00920024, 0x00920100, 0x00920104, 0x00920120, 0x00920124, 0x00920800, 109 | 0x00920804, 0x00920820, 0x00920824, 0x00920900, 0x00920904, 0x00920920, 0x00920924, 0x00924000, 110 | 0x00924004, 0x00924020, 0x00924024, 0x00924100, 0x00924104, 0x00924120, 0x00924124, 0x00924800, 111 | 0x00924804, 0x00924820, 0x00924824, 0x00924900, 0x00924904, 0x00924920, 0x00924924 112 | }; -------------------------------------------------------------------------------- /src/timer.h: -------------------------------------------------------------------------------- 1 | // Portable high-precision timer 2 | // Using QueryPerformanceCounter for Win32/Win64 3 | // And POSIX get_clock() for other platforms 4 | 5 | #pragma once 6 | 7 | #if _MSC_VER 8 | #include 9 | #elif __GNUC__ 10 | #include "time.h" 11 | #endif 12 | 13 | using namespace std; 14 | 15 | #if _MSC_VER 16 | struct Timer { // High performance Win64 timer using QPC events 17 | double pc_frequency = 0.0; 18 | double elapsed_time_milliseconds = 0.0; 19 | LARGE_INTEGER start_time = { 0 }; 20 | LARGE_INTEGER end_time = { 0 }; 21 | 22 | inline Timer() { 23 | LARGE_INTEGER li; 24 | QueryPerformanceFrequency(&li); 25 | pc_frequency = static_cast(li.QuadPart) / 1000.0; 26 | } 27 | 28 | inline void reset() { 29 | elapsed_time_milliseconds = 0.0; 30 | } 31 | 32 | inline void start() { 33 | QueryPerformanceCounter(&start_time); 34 | } 35 | 36 | inline void stop() { 37 | QueryPerformanceCounter(&end_time); 38 | elapsed_time_milliseconds += static_cast((end_time.QuadPart - start_time.QuadPart) / pc_frequency); 39 | } 40 | }; 41 | #else 42 | 43 | #define MILLION 1000000.0f 44 | 45 | struct Timer { // High performance timer using standard c++11 chrono 46 | double elapsed_time_milliseconds = 0; 47 | timespec t1; 48 | timespec t2; 49 | 50 | inline Timer() { 51 | } 52 | 53 | inline void start() { 54 | clock_gettime(CLOCK_REALTIME, &t1); 55 | } 56 | 57 | inline void stop() { 58 | clock_gettime(CLOCK_REALTIME, &t2); 59 | elapsed_time_milliseconds += (t2.tv_sec - t1.tv_sec) * 1000.0f; 60 | elapsed_time_milliseconds += ((float)(t2.tv_nsec - t1.tv_nsec)) / MILLION; 61 | } 62 | }; 63 | #endif 64 | -------------------------------------------------------------------------------- /src/todo.txt: -------------------------------------------------------------------------------- 1 | Readme.md 2 | - Performance (using schwarz algorithm, against binvox) 3 | - Switch to cuda libs for helper 4 | 5 | - estimate block/grid size 6 | - optimize, output to magicavoxel? Png? interact with polyvox? 7 | - is magicbits faster (less memory lookup) 8 | 9 | ### VOXELISATION 10 | - Cleanup writeout to binvox, use original file name, append gridsize (see ooc_svo_builder) 11 | 12 | OPTIMIZATIONS 13 | - model bbox can be computed in GPU pass 14 | - buffered bitset (no speedup) 15 | 16 | VOXEL TABLE 17 | - Variable size queue voor gevonden voxels? (hoe werkt dat in CUDA?) (zie stack overflow question) 18 | - Implementeren als hashmap+ telkens memory bij alloceren bij element toevoegen aan lijst, spinlock 19 | 20 | -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // This file contains various utility functions that are used throughout the program and didn't really belong in their own header 3 | 4 | #include 5 | #include "TriMesh.h" 6 | #include "cuda.h" 7 | #include "cuda_runtime.h" 8 | #include 9 | #include 10 | 11 | template 12 | inline float3 trimesh_to_float3(const trimeshtype a) { 13 | return make_float3(a.x, a.y, a.z); 14 | } 15 | template 16 | inline trimeshtype float3_to_trimesh(const float3 a) { 17 | return trimeshtype(a.x, a.y, a.z); 18 | } 19 | 20 | __host__ __device__ inline int3 float3_to_int3(const float3 a) { 21 | return make_int3(static_cast(a.x), static_cast(a.y), static_cast(a.z)); 22 | } 23 | 24 | // Check if a voxel in the voxel table is set 25 | __host__ __device__ inline bool checkVoxel(size_t x, size_t y, size_t z, const uint3 gridsize, const unsigned int* vtable){ 26 | size_t location = x + (y*gridsize.x) + (z*gridsize.x*gridsize.y); 27 | size_t int_location = location / size_t(32); 28 | /*size_t max_index = (gridsize*gridsize*gridsize) / __int64(32); 29 | if (int_location >= max_index){ 30 | fprintf(stdout, "Requested index too big: %llu \n", int_location); 31 | fprintf(stdout, "X %llu Y %llu Z %llu \n", int_location); 32 | }*/ 33 | unsigned int bit_pos = size_t(31) - (location % size_t(32)); // we count bit positions RtL, but array indices LtR 34 | if ((vtable[int_location]) & (1 << bit_pos)){ 35 | return true; 36 | } 37 | return false; 38 | } 39 | 40 | // An Axis Aligned Box (AAB) of a certain type - to be initialized with a min and max 41 | template 42 | struct AABox { 43 | T min; 44 | T max; 45 | __device__ __host__ AABox() : min(T()), max(T()) {} 46 | __device__ __host__ AABox(T min, T max) : min(min), max(max) {} 47 | }; 48 | 49 | // Voxelisation info (global parameters for the voxelization process) 50 | struct voxinfo { 51 | AABox bbox; 52 | uint3 gridsize; 53 | size_t n_triangles; 54 | float3 unit; 55 | 56 | voxinfo(const AABox bbox, const uint3 gridsize, const size_t n_triangles) 57 | : gridsize(gridsize), bbox(bbox), n_triangles(n_triangles) { 58 | unit.x = (bbox.max.x - bbox.min.x) / float(gridsize.x); 59 | unit.y = (bbox.max.y - bbox.min.y) / float(gridsize.y); 60 | unit.z = (bbox.max.z - bbox.min.z) / float(gridsize.z); 61 | } 62 | 63 | void print() { 64 | fprintf(stdout, "[Voxelization] Bounding Box: (%f,%f,%f)-(%f,%f,%f) \n", bbox.min.x, bbox.min.y, bbox.min.z, bbox.max.x, bbox.max.y, bbox.max.z); 65 | fprintf(stdout, "[Voxelization] Grid size: %i %i %i \n", gridsize.x, gridsize.y, gridsize.z); 66 | fprintf(stdout, "[Voxelization] Triangles: %zu \n", n_triangles); 67 | fprintf(stdout, "[Voxelization] Unit length: x: %f y: %f z: %f\n", unit.x, unit.y, unit.z); 68 | } 69 | }; 70 | 71 | // Create mesh BBOX _cube_, using the maximum length between bbox min and bbox max 72 | // We want to end up with a cube that is this max length. 73 | // So we pad the directions in which this length is not reached 74 | // 75 | // Example: (1,2,3) to (4,4,4) becomes: 76 | // Max distance is 3 77 | // 78 | // (1, 1.5, 2) to (4,4.5,5), which is a cube with side 3 79 | // 80 | template 81 | inline AABox createMeshBBCube(AABox box) { 82 | AABox answer(box.min, box.max); // initialize answer 83 | float3 lengths = box.max - box.min; // check length of given bbox in every direction 84 | float max_length = std::max(lengths.x, std::max(lengths.y, lengths.z)); // find max length 85 | 86 | if (max_length != lengths.x) { 87 | float delta = max_length - lengths.x; // compute difference between largest length and current (X,Y or Z) length 88 | answer.min.x = box.min.x - (delta / 2.0f); // pad with half the difference before current min 89 | answer.max.x = box.max.x + (delta / 2.0f); // pad with half the difference behind current max 90 | } 91 | if (max_length != lengths.y) { 92 | float delta = max_length - lengths.y; // compute difference between largest length and current (X,Y or Z) length 93 | answer.min.y = box.min.y - (delta / 2.0f); // pad with half the difference before current min 94 | answer.max.y = box.max.y + (delta / 2.0f); // pad with half the difference behind current max 95 | } 96 | if (max_length != lengths.z) { 97 | float delta = max_length - lengths.z; // compute difference between largest length and current (X,Y or Z) length 98 | answer.min.z = box.min.z - (delta / 2.0f); // pad with half the difference before current min 99 | answer.max.z = box.max.z + (delta / 2.0f); // pad with half the difference behind current max 100 | } 101 | 102 | // Next snippet adresses the problem reported here: https://github.com/Forceflow/cuda_voxelizer/issues/7 103 | // Suspected cause: If a triangle is axis-aligned and lies perfectly on a voxel edge, it sometimes gets counted / not counted 104 | // Probably due to a numerical instability (division by zero?) 105 | // Ugly fix: we pad the bounding box on all sides by 1/10001th of its total length, bringing all triangles ever so slightly off-grid 106 | float3 epsilon = (answer.max - answer.min) / 10001.0f; 107 | answer.min -= epsilon; 108 | answer.max += epsilon; 109 | return answer; 110 | } 111 | 112 | // Helper method to print bits 113 | void inline printBits(size_t const size, void const * const ptr) { 114 | unsigned char *b = (unsigned char*)ptr; 115 | unsigned char byte; 116 | int i, j; 117 | for (i = static_cast(size) - 1; i >= 0; i--) { 118 | for (j = 7; j >= 0; j--) { 119 | byte = b[i] & (1 << j); 120 | byte >>= j; 121 | if (byte) { 122 | printf("X"); 123 | } 124 | else { 125 | printf("."); 126 | } 127 | //printf("%u", byte); 128 | } 129 | } 130 | puts(""); 131 | } 132 | 133 | // readablesizestrings 134 | inline std::string readableSize(size_t bytes) { 135 | double bytes_d = static_cast(bytes); 136 | std::string r; 137 | if (bytes_d <= 0) r = "0 Bytes"; 138 | else if (bytes_d >= 1099511627776.0) r = std::to_string(static_cast(bytes_d / 1099511627776.0)) + " TB"; 139 | else if (bytes_d >= 1073741824.0) r = std::to_string(static_cast(bytes_d / 1073741824.0)) + " GB"; 140 | else if (bytes_d >= 1048576.0) r = std::to_string(static_cast(bytes_d / 1048576.0)) + " MB"; 141 | else if (bytes_d >= 1024.0) r = std::to_string(static_cast(bytes_d / 1024.0)) + " KB"; 142 | else r = std::to_string(static_cast(bytes_d)) + " bytes"; 143 | return r; 144 | }; 145 | 146 | // check if file exists 147 | inline bool file_exists(const std::string& name) { 148 | std::ifstream f(name.c_str()); 149 | bool exists = f.good(); 150 | f.close(); 151 | return exists; 152 | } 153 | -------------------------------------------------------------------------------- /src/util_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include "util_cuda.h" 2 | 3 | // Check if CUDA requirements are met 4 | bool initCuda(){ 5 | 6 | int device_count = 0; 7 | // Check if CUDA runtime calls work at all 8 | cudaError t = cudaGetDeviceCount(&device_count); 9 | if (t != cudaSuccess) { 10 | fprintf(stderr, "[CUDA] First call to CUDA Runtime API failed. Are the drivers installed? \n"); 11 | return false; 12 | } 13 | 14 | // Is there a CUDA device at all? 15 | checkCudaErrors(cudaGetDeviceCount(&device_count)); 16 | if(device_count < 1){ 17 | fprintf(stderr, "[CUDA] No CUDA devices found. Make sure CUDA device is powered, connected and available. \n \n"); 18 | fprintf(stderr, "[CUDA] On laptops: disable powersave/battery mode. \n"); 19 | fprintf(stderr, "[CUDA] Exiting... \n"); 20 | return false; 21 | } 22 | 23 | fprintf(stderr, "[CUDA] CUDA device(s) found, picking best one \n"); 24 | fprintf(stdout, "[CUDA] "); 25 | // We have at least 1 CUDA device, so now select the fastest (method from Nvidia helper library) 26 | int device = findCudaDevice(0, 0); 27 | 28 | // Print available device memory 29 | cudaDeviceProp properties; 30 | checkCudaErrors(cudaGetDeviceProperties(&properties,device)); 31 | fprintf(stdout, "[CUDA] Best device: %s \n", properties.name); 32 | size_t free, total; 33 | checkCudaErrors(cudaMemGetInfo(&free, &total)); 34 | fprintf(stdout,"[CUDA] Available device memory: %llu of %llu MB \n", (free >> 20), (total >> 20)); 35 | 36 | // Check compute capability 37 | if (properties.major < 2){ 38 | fprintf(stderr, "[CUDA] Your cuda device has compute capability %i.%i. We need at least 2.0 for atomic operations. \n", properties.major, properties.minor); 39 | return false; 40 | } 41 | return true; 42 | } -------------------------------------------------------------------------------- /src/util_cuda.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Standard libs 4 | #include 5 | #include 6 | // Cuda 7 | #include "cuda_runtime.h" 8 | #include "libs/cuda/helper_cuda.h" 9 | #include "libs/cuda/helper_math.h" 10 | 11 | // Function to check cuda requirements 12 | bool initCuda(); -------------------------------------------------------------------------------- /src/util_io.cpp: -------------------------------------------------------------------------------- 1 | #include "util_io.h" 2 | 3 | using namespace std; 4 | 5 | // helper function to get file length (in number of ASCII characters) 6 | size_t get_file_length(const std::string base_filename){ 7 | // open file at the end 8 | std::ifstream input(base_filename.c_str(), ios_base::ate | ios_base::binary); 9 | assert(input); 10 | size_t length = input.tellg(); 11 | input.close(); 12 | return length; // get file length 13 | } 14 | 15 | // read raw bytes from file 16 | void read_binary(void* data, const size_t length, const std::string base_filename){ 17 | // open file 18 | std::ifstream input(base_filename.c_str(), ios_base::in | ios_base::binary); 19 | assert(input); 20 | #ifndef SILENT 21 | fprintf(stdout, "[I/O] Reading %llu kb of binary data from file %s \n", size_t(length / 1024.0f), base_filename.c_str()); fflush(stdout); 22 | #endif 23 | input.seekg(0, input.beg); 24 | input.read((char*) data, 8); 25 | input.close(); 26 | return; 27 | } 28 | 29 | // Helper function to write single vertex normal to OBJ file 30 | static void write_vertex_normal(ofstream& output, const int3& v) { 31 | output << "vn " << v.x << " " << v.y << " " << v.z << endl; 32 | } 33 | 34 | // Helper function to write single vertex to OBJ file 35 | static void write_vertex(ofstream& output, const int3& v) { 36 | output << "v " << v.x << " " << v.y << " " << v.z << endl; 37 | } 38 | 39 | // Helper function to write single vertex 40 | static void write_face(ofstream& output, const int3& v) { 41 | output << "f " << v.x << " " << v.y << " " << v.z << endl; 42 | } 43 | 44 | // Helper function to write full cube (using relative vertex positions in the OBJ file - support for this should be widespread by now) 45 | void write_cube(const int x, const int y, const int z, ofstream& output) { 46 | // 2-------1 47 | // /| /| 48 | // / | / | 49 | // 7--|----8 | 50 | // | 4----|--3 51 | // | / | / 52 | // 5-------6 53 | // Create vertices 54 | int3 v1 = make_int3(x+1, y+1, z + 1); 55 | int3 v2 = make_int3(x, y+1, z + 1); 56 | int3 v3 = make_int3(x+1, y, z + 1); 57 | int3 v4 = make_int3(x, y, z + 1); 58 | int3 v5 = make_int3(x, y, z); 59 | int3 v6 = make_int3(x+1, y, z); 60 | int3 v7 = make_int3(x, y+1, z); 61 | int3 v8 = make_int3(x+1, y+1, z); 62 | // write them in reverse order, so relative position is -i for v_i 63 | write_vertex(output, v8); 64 | write_vertex(output, v7); 65 | write_vertex(output, v6); 66 | write_vertex(output, v5); 67 | write_vertex(output, v4); 68 | write_vertex(output, v3); 69 | write_vertex(output, v2); 70 | write_vertex(output, v1); 71 | // create faces 72 | // back 73 | write_face(output, make_int3(-1, -3, -4)); 74 | write_face(output, make_int3(-1, -4, -2)); 75 | // bottom 76 | write_face(output, make_int3(-4, -3, -6)); 77 | write_face(output, make_int3(-4, -6, -5)); 78 | // right 79 | write_face(output, make_int3(-3, -1, -8)); 80 | write_face(output, make_int3(-3, -8, -6)); 81 | // top 82 | write_face(output, make_int3(-1, -2, -7)); 83 | write_face(output, make_int3(-1, -7, -8)); 84 | // left 85 | write_face(output, make_int3(-2, -4, -5)); 86 | write_face(output, make_int3(-2, -5, -7)); 87 | // front 88 | write_face(output, make_int3(-5, -6, -8)); 89 | write_face(output, make_int3(-5, -8, -7)); 90 | } 91 | 92 | void write_obj_cubes(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename) { 93 | string filename_output = base_filename + string("_") + to_string(v_info.gridsize.x) + string("_voxels.obj"); 94 | ofstream output(filename_output.c_str(), ios::out); 95 | 96 | #ifndef SILENT 97 | fprintf(stdout, "[I/O] Writing data in obj voxels format to file %s \n", filename_output.c_str()); 98 | // Write stats 99 | size_t voxels_seen = 0; 100 | const size_t write_stats_25 = (size_t(v_info.gridsize.x) * size_t(v_info.gridsize.y) * size_t(v_info.gridsize.z)) / 4.0f; 101 | fprintf(stdout, "[I/O] Writing to file: 0%%..."); 102 | #endif 103 | 104 | 105 | // Write vertex normals once 106 | //write_vertex_normal(output, glm::ivec3(0, 0, -1)); // forward = 1 107 | //write_vertex_normal(output, glm::ivec3(0, 0, 1)); // backward = 2 108 | //write_vertex_normal(output, glm::ivec3(-1, 0, 0)); // left = 3 109 | //write_vertex_normal(output, glm::ivec3(1, 0, 0)); // right = 4 110 | //write_vertex_normal(output, glm::ivec3(0, -1, 0)); // bottom = 5 111 | //write_vertex_normal(output, glm::ivec3(0, 1, 0)); // top = 6 112 | //size_t voxels_written = 0; 113 | 114 | assert(output); 115 | for (size_t x = 0; x < v_info.gridsize.x; x++) { 116 | for (size_t y = 0; y < v_info.gridsize.y; y++) { 117 | for (size_t z = 0; z < v_info.gridsize.z; z++) { 118 | #ifndef SILENT 119 | voxels_seen++; 120 | if (voxels_seen == write_stats_25) {fprintf(stdout, "25%%...");} 121 | else if (voxels_seen == write_stats_25 * size_t(2)) {fprintf(stdout, "50%%...");} 122 | else if (voxels_seen == write_stats_25 * size_t(3)) {fprintf(stdout, "75%%...");} 123 | #endif 124 | if (checkVoxel(x, y, z, v_info.gridsize, vtable)) { 125 | //voxels_written += 1; 126 | write_cube(static_cast(x), static_cast(y), static_cast(z), output); 127 | } 128 | } 129 | } 130 | } 131 | #ifndef SILENT 132 | fprintf(stdout, "100%% \n"); 133 | #endif 134 | // std::cout << "written " << voxels_written << std::endl; 135 | 136 | #ifndef SILENT 137 | fprintf(stdout, "[I/O] Reordering / Optimizing mesh with Trimesh2 \n"); 138 | #endif 139 | // Load the file using TriMesh2 140 | trimesh::TriMesh* temp_mesh = trimesh::TriMesh::read(filename_output.c_str()); 141 | trimesh::reorder_verts(temp_mesh); 142 | //trimesh::faceflip(temp_mesh); 143 | //trimesh::edgeflip(temp_mesh); 144 | //temp_mesh->clear_normals(); 145 | //temp_mesh->need_normals(); 146 | #ifndef SILENT 147 | fprintf(stdout, "[I/O] Writing final mesh to file %s \n", filename_output.c_str()); 148 | #endif 149 | temp_mesh->write(filename_output.c_str()); 150 | 151 | output.close(); 152 | } 153 | 154 | void write_obj_pointcloud(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename) { 155 | string filename_output = base_filename + string("_") + to_string(v_info.gridsize.x) + string("_pointcloud.obj"); 156 | ofstream output(filename_output.c_str(), ios::out); 157 | 158 | #ifndef SILENT 159 | fprintf(stdout, "[I/O] Writing data in obj point cloud format to %s \n", filename_output.c_str()); 160 | size_t voxels_seen = 0; 161 | const size_t write_stats_25 = (size_t(v_info.gridsize.x) * size_t(v_info.gridsize.y) * size_t(v_info.gridsize.z)) / 4.0f; 162 | fprintf(stdout, "[I/O] Writing to file: 0%%..."); 163 | #endif 164 | 165 | // write stats 166 | size_t voxels_written = 0; 167 | 168 | assert(output); 169 | for (size_t x = 0; x < v_info.gridsize.x; x++) { 170 | for (size_t y = 0; y < v_info.gridsize.y; y++) { 171 | for (size_t z = 0; z < v_info.gridsize.z; z++) { 172 | #ifndef SILENT 173 | voxels_seen++; 174 | if (voxels_seen == write_stats_25) { fprintf(stdout, "25%%...");} 175 | else if (voxels_seen == write_stats_25 * size_t(2)) { fprintf(stdout, "50%%...");} 176 | else if (voxels_seen == write_stats_25 * size_t(3)) {fprintf(stdout, "75%%...");} 177 | #endif 178 | if (checkVoxel(x, y, z, v_info.gridsize, vtable)) { 179 | voxels_written += 1; 180 | output << "v " << (x+0.5) << " " << (y + 0.5) << " " << (z + 0.5) << endl; // +0.5 to put vertex in the middle of the voxel 181 | } 182 | } 183 | } 184 | } 185 | #ifndef SILENT 186 | fprintf(stdout, "100%% \n"); 187 | #endif 188 | // std::cout << "written " << voxels_written << std::endl; 189 | output.close(); 190 | } 191 | 192 | void write_binary(void* data, size_t bytes, const std::string base_filename){ 193 | string filename_output = base_filename + string(".bin"); 194 | #ifndef SILENT 195 | fprintf(stdout, "[I/O] Writing data in binary format to %s (%s) \n", filename_output.c_str(), readableSize(bytes).c_str()); 196 | #endif 197 | ofstream output(filename_output.c_str(), ios_base::out | ios_base::binary); 198 | output.write((char*)data, bytes); 199 | output.close(); 200 | } 201 | 202 | void write_binvox(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename){ 203 | // Open file 204 | string filename_output = base_filename + string("_") + to_string(v_info.gridsize.x) + string(".binvox"); 205 | #ifndef SILENT 206 | fprintf(stdout, "[I/O] Writing data in binvox format to %s \n", filename_output.c_str()); 207 | #endif 208 | ofstream output(filename_output.c_str(), ios::out | ios::binary); 209 | assert(output); 210 | // Write ASCII header 211 | output << "#binvox 1" << endl; 212 | output << "dim " << v_info.gridsize.x << " " << v_info.gridsize.y << " " << v_info.gridsize.z << "" << endl; 213 | output << "translate " << v_info.bbox.min.x << " " << v_info.bbox.min.y << " " << v_info.bbox.min.z << endl; 214 | output << "scale " << max(max(v_info.bbox.max.x - v_info.bbox.min.x, v_info.bbox.max.y - v_info.bbox.min.y), 215 | v_info.bbox.max.z - v_info.bbox.min.z) << endl; 216 | output << "data" << endl; 217 | 218 | // Write BINARY Data (and compress it a bit using run-length encoding) 219 | char currentvalue, current_seen; 220 | for (size_t x = 0; x < v_info.gridsize.x; x++){ 221 | for (size_t z = 0; z < v_info.gridsize.z; z++){ 222 | for (size_t y = 0; y < v_info.gridsize.y; y++){ 223 | if (x == 0 && y == 0 && z == 0){ // special case: first voxel 224 | currentvalue = checkVoxel(0, 0, 0, v_info.gridsize, vtable); 225 | output.write((char*)¤tvalue, 1); 226 | current_seen = 1; 227 | continue; 228 | } 229 | char nextvalue = checkVoxel(x, y, z, v_info.gridsize, vtable); 230 | if (nextvalue != currentvalue || current_seen == (char) 255){ 231 | output.write((char*)¤t_seen, 1); 232 | current_seen = 1; 233 | currentvalue = nextvalue; 234 | output.write((char*)¤tvalue, 1); 235 | } 236 | else { 237 | current_seen++; 238 | } 239 | } 240 | } 241 | } 242 | 243 | // Write rest 244 | output.write((char*)¤t_seen, 1); 245 | output.close(); 246 | } 247 | 248 | // Experimental MagicaVoxel file format output 249 | void write_vox(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename) { 250 | string filename_output = base_filename + string("_") + to_string(v_info.gridsize.x) + string(".vox"); 251 | vox::VoxWriter voxwriter; 252 | voxwriter.AddColor(255, 255, 255,0, 0); 253 | 254 | #ifndef SILENT 255 | fprintf(stdout, "[I/O] Writing data in vox format to %s \n", filename_output.c_str()); 256 | 257 | // Write stats 258 | size_t voxels_seen = 0; 259 | const size_t write_stats_25 = (size_t(v_info.gridsize.x) * size_t(v_info.gridsize.y) * size_t(v_info.gridsize.z)) / 4.0f; 260 | fprintf(stdout, "[I/O] Writing to file: 0%%..."); 261 | size_t voxels_written = 0; 262 | #endif 263 | 264 | for (size_t x = 0; x < v_info.gridsize.x; x++) { 265 | for (size_t y = 0; y < v_info.gridsize.z; y++) { 266 | for (size_t z = 0; z < v_info.gridsize.y; z++) { 267 | #ifndef SILENT 268 | // Progress stats 269 | voxels_seen++; 270 | if (voxels_seen == write_stats_25) { fprintf(stdout, "25%%..."); } 271 | else if (voxels_seen == write_stats_25 * size_t(2)) { fprintf(stdout, "50%%..."); } 272 | else if (voxels_seen == write_stats_25 * size_t(3)) { fprintf(stdout, "75%%..."); } 273 | #endif 274 | if (checkVoxel(x, y, z, v_info.gridsize, vtable)) { 275 | // Somehow, this makes the vox model come out correct way up. Some axes probably got switched along the way 276 | voxwriter.AddVoxel(x, -z + v_info.gridsize.z, y, 1); 277 | } 278 | } 279 | } 280 | } 281 | #ifndef SILENT 282 | fprintf(stdout, "100%% \n"); 283 | #endif 284 | voxwriter.SaveToFile(filename_output); 285 | } -------------------------------------------------------------------------------- /src/util_io.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "util.h" 7 | #include "TriMesh_algo.h" 8 | #include "util.h" 9 | #include "libs/magicavoxel_file_writer/VoxWriter.h" 10 | 11 | size_t get_file_length(const std::string base_filename); 12 | void read_binary(void* data, const size_t length, const std::string base_filename); 13 | void write_binary(void* data, const size_t bytes, const std::string base_filename); 14 | void write_binvox(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename); 15 | void write_obj_pointcloud(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename); 16 | void write_obj_cubes(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename); 17 | void write_vox(const unsigned int* vtable, const voxinfo v_info, const std::string base_filename); 18 | -------------------------------------------------------------------------------- /src/voxelize.cu: -------------------------------------------------------------------------------- 1 | #include "voxelize.cuh" 2 | 3 | // CUDA Global Memory variables 4 | 5 | // Debug counters for some sanity checks 6 | #ifdef _DEBUG 7 | __device__ size_t debug_d_n_voxels_marked = 0; 8 | __device__ size_t debug_d_n_triangles = 0; 9 | __device__ size_t debug_d_n_voxels_tested = 0; 10 | #endif 11 | 12 | // Possible optimization: buffer bitsets (for now: Disabled because too much overhead) 13 | //struct bufferedBitSetter{ 14 | // unsigned int* voxel_table; 15 | // size_t current_int_location; 16 | // unsigned int current_mask; 17 | // 18 | // __device__ __inline__ bufferedBitSetter(unsigned int* voxel_table, size_t index) : 19 | // voxel_table(voxel_table), current_mask(0) { 20 | // current_int_location = int(index / 32.0f); 21 | // } 22 | // 23 | // __device__ __inline__ void setBit(size_t index){ 24 | // size_t new_int_location = int(index / 32.0f); 25 | // if (current_int_location != new_int_location){ 26 | // flush(); 27 | // current_int_location = new_int_location; 28 | // } 29 | // unsigned int bit_pos = 31 - (unsigned int)(int(index) % 32); 30 | // current_mask = current_mask | (1 << bit_pos); 31 | // } 32 | // 33 | // __device__ __inline__ void flush(){ 34 | // if (current_mask != 0){ 35 | // atomicOr(&(voxel_table[current_int_location]), current_mask); 36 | // } 37 | // } 38 | //}; 39 | 40 | // Possible optimization: check bit before you set it - don't need to do atomic operation if it's already set to 1 41 | // For now: overhead, so it seems 42 | //__device__ __inline__ bool checkBit(unsigned int* voxel_table, size_t index){ 43 | // size_t int_location = index / size_t(32); 44 | // unsigned int bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR 45 | // return ((voxel_table[int_location]) & (1 << bit_pos)); 46 | //} 47 | 48 | // Set a bit in the giant voxel table. This involves doing an atomic operation on a 32-bit word in memory. 49 | // Blocking other threads writing to it for a very short time 50 | __device__ __inline__ void setBit(unsigned int* voxel_table, size_t index){ 51 | size_t int_location = index / size_t(32); 52 | unsigned int bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR 53 | unsigned int mask = 1 << bit_pos; 54 | atomicOr(&(voxel_table[int_location]), mask); 55 | } 56 | 57 | // Main triangle voxelization method 58 | __global__ void voxelize_triangle(voxinfo info, float* triangle_data, unsigned int* voxel_table, bool morton_order){ 59 | size_t thread_id = threadIdx.x + blockIdx.x * blockDim.x; 60 | size_t stride = blockDim.x * gridDim.x; 61 | 62 | // Common variables used in the voxelization process 63 | float3 delta_p = make_float3(info.unit.x, info.unit.y, info.unit.z); 64 | int3 grid_max = make_int3(info.gridsize.x - 1, info.gridsize.y - 1, info.gridsize.z - 1); // grid max (grid runs from 0 to gridsize-1) 65 | 66 | while (thread_id < info.n_triangles){ // every thread works on specific triangles in its stride 67 | size_t t = thread_id * 9; // triangle contains 9 vertices 68 | 69 | // COMPUTE COMMON TRIANGLE PROPERTIES 70 | // Move vertices to origin using bbox 71 | float3 v0 = make_float3(triangle_data[t], triangle_data[t + 1], triangle_data[t + 2]) - info.bbox.min; 72 | float3 v1 = make_float3(triangle_data[t + 3], triangle_data[t + 4], triangle_data[t + 5]) - info.bbox.min; 73 | float3 v2 = make_float3(triangle_data[t + 6], triangle_data[t + 7], triangle_data[t + 8]) - info.bbox.min; 74 | // Edge vectors 75 | float3 e0 = v1 - v0; 76 | float3 e1 = v2 - v1; 77 | float3 e2 = v0 - v2; 78 | // Normal vector pointing up from the triangle 79 | float3 n = normalize(cross(e0, e1)); 80 | 81 | // COMPUTE TRIANGLE BBOX IN GRID 82 | // Triangle bounding box in world coordinates is min(v0,v1,v2) and max(v0,v1,v2) 83 | AABox t_bbox_world(fminf(v0, fminf(v1, v2)), fmaxf(v0, fmaxf(v1, v2))); 84 | // Triangle bounding box in voxel grid coordinates is the world bounding box divided by the grid unit vector 85 | AABox t_bbox_grid; 86 | t_bbox_grid.min = clamp(float3_to_int3(t_bbox_world.min / info.unit), make_int3(0, 0, 0), grid_max); 87 | t_bbox_grid.max = clamp(float3_to_int3(t_bbox_world.max / info.unit), make_int3(0, 0, 0), grid_max); 88 | 89 | // PREPARE PLANE TEST PROPERTIES 90 | float3 c = make_float3(0.0f, 0.0f, 0.0f); 91 | if (n.x > 0.0f) { c.x = info.unit.x; } 92 | if (n.y > 0.0f) { c.y = info.unit.y; } 93 | if (n.z > 0.0f) { c.z = info.unit.z; } 94 | float d1 = dot(n, (c - v0)); 95 | float d2 = dot(n, ((delta_p - c) - v0)); 96 | 97 | // PREPARE PROJECTION TEST PROPERTIES 98 | // XY plane 99 | float2 n_xy_e0 = make_float2(-1.0f * e0.y, e0.x); 100 | float2 n_xy_e1 = make_float2(-1.0f * e1.y, e1.x); 101 | float2 n_xy_e2 = make_float2(-1.0f * e2.y, e2.x); 102 | if (n.z < 0.0f) { 103 | n_xy_e0 = -n_xy_e0; 104 | n_xy_e1 = -n_xy_e1; 105 | n_xy_e2 = -n_xy_e2; 106 | } 107 | float d_xy_e0 = (-1.0f * dot(n_xy_e0, make_float2(v0.x, v0.y))) + max(0.0f, info.unit.x * n_xy_e0.x) + max(0.0f, info.unit.y * n_xy_e0.y); 108 | float d_xy_e1 = (-1.0f * dot(n_xy_e1, make_float2(v1.x, v1.y))) + max(0.0f, info.unit.x * n_xy_e1.x) + max(0.0f, info.unit.y * n_xy_e1.y); 109 | float d_xy_e2 = (-1.0f * dot(n_xy_e2, make_float2(v2.x, v2.y))) + max(0.0f, info.unit.x * n_xy_e2.x) + max(0.0f, info.unit.y * n_xy_e2.y); 110 | // YZ plane 111 | float2 n_yz_e0 = make_float2(-1.0f * e0.z, e0.y); 112 | float2 n_yz_e1 = make_float2(-1.0f * e1.z, e1.y); 113 | float2 n_yz_e2 = make_float2(-1.0f * e2.z, e2.y); 114 | if (n.x < 0.0f) { 115 | n_yz_e0 = -n_yz_e0; 116 | n_yz_e1 = -n_yz_e1; 117 | n_yz_e2 = -n_yz_e2; 118 | } 119 | float d_yz_e0 = (-1.0f * dot(n_yz_e0, make_float2(v0.y, v0.z))) + max(0.0f, info.unit.y * n_yz_e0.x) + max(0.0f, info.unit.z * n_yz_e0.y); 120 | float d_yz_e1 = (-1.0f * dot(n_yz_e1, make_float2(v1.y, v1.z))) + max(0.0f, info.unit.y * n_yz_e1.x) + max(0.0f, info.unit.z * n_yz_e1.y); 121 | float d_yz_e2 = (-1.0f * dot(n_yz_e2, make_float2(v2.y, v2.z))) + max(0.0f, info.unit.y * n_yz_e2.x) + max(0.0f, info.unit.z * n_yz_e2.y); 122 | // ZX plane 123 | float2 n_zx_e0 = make_float2(-1.0f * e0.x, e0.z); 124 | float2 n_zx_e1 = make_float2(-1.0f * e1.x, e1.z); 125 | float2 n_zx_e2 = make_float2(-1.0f * e2.x, e2.z); 126 | if (n.y < 0.0f) { 127 | n_zx_e0 = -n_zx_e0; 128 | n_zx_e1 = -n_zx_e1; 129 | n_zx_e2 = -n_zx_e2; 130 | } 131 | float d_xz_e0 = (-1.0f * dot(n_zx_e0, make_float2(v0.z, v0.x))) + max(0.0f, info.unit.x * n_zx_e0.x) + max(0.0f, info.unit.z * n_zx_e0.y); 132 | float d_xz_e1 = (-1.0f * dot(n_zx_e1, make_float2(v1.z, v1.x))) + max(0.0f, info.unit.x * n_zx_e1.x) + max(0.0f, info.unit.z * n_zx_e1.y); 133 | float d_xz_e2 = (-1.0f * dot(n_zx_e2, make_float2(v2.z, v2.x))) + max(0.0f, info.unit.x * n_zx_e2.x) + max(0.0f, info.unit.z * n_zx_e2.y); 134 | 135 | // test possible grid boxes for overlap 136 | for (int z = t_bbox_grid.min.z; z <= t_bbox_grid.max.z; z++){ 137 | for (int y = t_bbox_grid.min.y; y <= t_bbox_grid.max.y; y++){ 138 | for (int x = t_bbox_grid.min.x; x <= t_bbox_grid.max.x; x++){ 139 | // if (checkBit(voxel_table, location)){ continue; } 140 | #ifdef _DEBUG 141 | atomicAdd(&debug_d_n_voxels_tested, 1); 142 | #endif 143 | // TRIANGLE PLANE THROUGH BOX TEST 144 | float3 p = make_float3(x * info.unit.x, y * info.unit.y, z * info.unit.z); 145 | float nDOTp = dot(n, p); 146 | if (((nDOTp + d1) * (nDOTp + d2)) > 0.0f) { continue; } 147 | 148 | // PROJECTION TESTS 149 | // XY 150 | float2 p_xy = make_float2(p.x, p.y); 151 | if ((dot(n_xy_e0, p_xy) + d_xy_e0) < 0.0f) { continue; } 152 | if ((dot(n_xy_e1, p_xy) + d_xy_e1) < 0.0f) { continue; } 153 | if ((dot(n_xy_e2, p_xy) + d_xy_e2) < 0.0f) { continue; } 154 | 155 | // YZ 156 | float2 p_yz = make_float2(p.y, p.z); 157 | if ((dot(n_yz_e0, p_yz) + d_yz_e0) < 0.0f) { continue; } 158 | if ((dot(n_yz_e1, p_yz) + d_yz_e1) < 0.0f) { continue; } 159 | if ((dot(n_yz_e2, p_yz) + d_yz_e2) < 0.0f) { continue; } 160 | 161 | // XZ 162 | float2 p_zx = make_float2(p.z, p.x); 163 | if ((dot(n_zx_e0, p_zx) + d_xz_e0) < 0.0f) { continue; } 164 | if ((dot(n_zx_e1, p_zx) + d_xz_e1) < 0.0f) { continue; } 165 | if ((dot(n_zx_e2, p_zx) + d_xz_e2) < 0.0f) { continue; } 166 | 167 | #ifdef _DEBUG 168 | atomicAdd(&debug_d_n_voxels_marked, 1); 169 | #endif 170 | 171 | if (morton_order){ 172 | size_t location = mortonEncode_LUT(x, y, z); 173 | setBit(voxel_table, location); 174 | } else { 175 | size_t location = 176 | static_cast(x) + 177 | (static_cast(y)* static_cast(info.gridsize.x)) + 178 | (static_cast(z)* (static_cast(info.gridsize.y)* static_cast(info.gridsize.x))); 179 | setBit(voxel_table, location); 180 | } 181 | continue; 182 | } 183 | } 184 | } 185 | #ifdef _DEBUG 186 | atomicAdd(&debug_d_n_triangles, 1); 187 | #endif 188 | thread_id += stride; 189 | } 190 | } 191 | 192 | void voxelize(const voxinfo& v, float* triangle_data, unsigned int* vtable, bool morton_code) { 193 | float elapsedTime; 194 | 195 | // Create timers, set start time 196 | cudaEvent_t start_vox, stop_vox; 197 | checkCudaErrors(cudaEventCreate(&start_vox)); 198 | checkCudaErrors(cudaEventCreate(&stop_vox)); 199 | 200 | // Copy morton LUT if we're encoding to morton 201 | if (morton_code){ 202 | checkCudaErrors(cudaMemcpyToSymbol(morton256_x, host_morton256_x, 256 * sizeof(uint32_t))); 203 | checkCudaErrors(cudaMemcpyToSymbol(morton256_y, host_morton256_y, 256 * sizeof(uint32_t))); 204 | checkCudaErrors(cudaMemcpyToSymbol(morton256_z, host_morton256_z, 256 * sizeof(uint32_t))); 205 | } 206 | 207 | // Estimate best block and grid size using CUDA Occupancy Calculator 208 | int blockSize; // The launch configurator returned block size 209 | int minGridSize; // The minimum grid size needed to achieve the maximum occupancy for a full device launch 210 | int gridSize; // The actual grid size needed, based on input size 211 | cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, voxelize_triangle, 0, 0); 212 | // Round up according to array size 213 | gridSize = static_cast((v.n_triangles + blockSize - 1) / blockSize); 214 | 215 | checkCudaErrors(cudaEventRecord(start_vox, 0)); 216 | voxelize_triangle << > > (v, triangle_data, vtable, morton_code); 217 | 218 | cudaDeviceSynchronize(); 219 | checkCudaErrors(cudaEventRecord(stop_vox, 0)); 220 | checkCudaErrors(cudaEventSynchronize(stop_vox)); 221 | checkCudaErrors(cudaEventElapsedTime(&elapsedTime, start_vox, stop_vox)); 222 | printf("[Perf] Voxelization GPU time: %.1f ms\n", elapsedTime); 223 | 224 | // SANITY CHECKS 225 | #ifdef _DEBUG 226 | size_t debug_n_triangles, debug_n_voxels_marked, debug_n_voxels_tested; 227 | checkCudaErrors(cudaMemcpyFromSymbol((void*)&(debug_n_triangles),debug_d_n_triangles, sizeof(debug_d_n_triangles), 0, cudaMemcpyDeviceToHost)); 228 | checkCudaErrors(cudaMemcpyFromSymbol((void*)&(debug_n_voxels_marked), debug_d_n_voxels_marked, sizeof(debug_d_n_voxels_marked), 0, cudaMemcpyDeviceToHost)); 229 | checkCudaErrors(cudaMemcpyFromSymbol((void*) & (debug_n_voxels_tested), debug_d_n_voxels_tested, sizeof(debug_d_n_voxels_tested), 0, cudaMemcpyDeviceToHost)); 230 | printf("[Debug] Processed %llu triangles on the GPU \n", debug_n_triangles); 231 | printf("[Debug] Tested %llu voxels for overlap on GPU \n", debug_n_voxels_tested); 232 | printf("[Debug] Marked %llu voxels as filled (includes duplicates!) \n", debug_n_voxels_marked); 233 | #endif 234 | 235 | // Destroy timers 236 | checkCudaErrors(cudaEventDestroy(start_vox)); 237 | checkCudaErrors(cudaEventDestroy(stop_vox)); 238 | } 239 | -------------------------------------------------------------------------------- /src/voxelize.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Commun functions for both the solid and non-solid voxelization methods 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "util.h" 10 | #include "util_cuda.h" 11 | #include "libs/cuda/helper_math.h" 12 | #include "morton_LUTs.h" 13 | 14 | // Morton LUTs for when we need them 15 | __constant__ uint32_t morton256_x[256]; 16 | __constant__ uint32_t morton256_y[256]; 17 | __constant__ uint32_t morton256_z[256]; 18 | 19 | // Encode morton code using LUT table 20 | __device__ inline uint64_t mortonEncode_LUT(unsigned int x, unsigned int y, unsigned int z){ 21 | uint64_t answer = 0; 22 | answer = morton256_z[(z >> 16) & 0xFF] | 23 | morton256_y[(y >> 16) & 0xFF] | 24 | morton256_x[(x >> 16) & 0xFF]; 25 | answer = answer << 48 | 26 | morton256_z[(z >> 8) & 0xFF] | 27 | morton256_y[(y >> 8) & 0xFF] | 28 | morton256_x[(x >> 8) & 0xFF]; 29 | answer = answer << 24 | 30 | morton256_z[(z)& 0xFF] | 31 | morton256_y[(y)& 0xFF] | 32 | morton256_x[(x)& 0xFF]; 33 | return answer; 34 | } -------------------------------------------------------------------------------- /src/voxelize_solid.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Solid voxelization based on the Schwarz-Seidel paper. 3 | */ 4 | 5 | #include "voxelize.cuh" 6 | 7 | #ifdef _DEBUG 8 | __device__ size_t debug_d_n_voxels_marked = 0; 9 | __device__ size_t debug_d_n_triangles = 0; 10 | __device__ size_t debug_d_n_voxels_tested = 0; 11 | #endif 12 | 13 | #define float_error 0.000001 14 | 15 | // use Xor for voxels whose corresponding bits have to flipped 16 | __device__ __inline__ void setBitXor(unsigned int* voxel_table, size_t index) { 17 | size_t int_location = index / size_t(32); 18 | unsigned int bit_pos = size_t(31) - (index % size_t(32)); // we count bit positions RtL, but array indices LtR 19 | unsigned int mask = 1 << bit_pos; 20 | atomicXor(&(voxel_table[int_location]), mask); 21 | } 22 | 23 | //check the location with point and triangle 24 | __device__ inline int check_point_triangle(float2 v0, float2 v1, float2 v2, float2 point) 25 | { 26 | float2 PA = point - v0; 27 | float2 PB = point - v1; 28 | float2 PC = point - v2; 29 | 30 | float t1 = PA.x*PB.y - PA.y*PB.x; 31 | if (fabs(t1) < float_error&&PA.x*PB.x <= 0 && PA.y*PB.y <= 0) 32 | return 1; 33 | 34 | float t2 = PB.x*PC.y - PB.y*PC.x; 35 | if (fabs(t2) < float_error&&PB.x*PC.x <= 0 && PB.y*PC.y <= 0) 36 | return 2; 37 | 38 | float t3 = PC.x*PA.y - PC.y*PA.x; 39 | if (fabs(t3) < float_error&&PC.x*PA.x <= 0 && PC.y*PA.y <= 0) 40 | return 3; 41 | 42 | if (t1*t2 > 0 && t1*t3 > 0) 43 | return 0; 44 | else 45 | return -1; 46 | } 47 | 48 | //find the x coordinate of the voxel 49 | __device__ inline float get_x_coordinate(float3 n, float3 v0, float2 point) 50 | { 51 | return (-(n.y*(point.x - v0.y) + n.z*(point.y - v0.z)) / n.x + v0.x); 52 | } 53 | 54 | //check the triangle is counterclockwise or not 55 | __device__ inline bool checkCCW(float2 v0, float2 v1, float2 v2) 56 | { 57 | float2 e0 = v1 - v0; 58 | float2 e1 = v2 - v0; 59 | float result = e0.x*e1.y - e1.x*e0.y; 60 | if (result > 0) 61 | return true; 62 | else 63 | return false; 64 | } 65 | 66 | //top-left rule 67 | __device__ inline bool TopLeftEdge(float2 v0, float2 v1) 68 | { 69 | return ((v1.yv1.x)); 70 | } 71 | 72 | //generate solid voxelization 73 | __global__ void voxelize_triangle_solid(voxinfo info, float* triangle_data, unsigned int* voxel_table, bool morton_order) 74 | { 75 | size_t thread_id = threadIdx.x + blockIdx.x * blockDim.x; 76 | size_t stride = blockDim.x * gridDim.x; 77 | 78 | while (thread_id < info.n_triangles) { // every thread works on specific triangles in its stride 79 | size_t t = thread_id * 9; // triangle contains 9 vertices 80 | 81 | // COMPUTE COMMON TRIANGLE PROPERTIES 82 | // Move vertices to origin using bbox 83 | float3 v0 = make_float3(triangle_data[t], triangle_data[t + 1], triangle_data[t + 2]) - info.bbox.min; 84 | float3 v1 = make_float3(triangle_data[t + 3], triangle_data[t + 4], triangle_data[t + 5]) - info.bbox.min; 85 | float3 v2 = make_float3(triangle_data[t + 6], triangle_data[t + 7], triangle_data[t + 8]) - info.bbox.min; 86 | // Edge vectors 87 | float3 e0 = v1 - v0; 88 | float3 e1 = v2 - v1; 89 | float3 e2 = v0 - v2; 90 | // Normal vector pointing up from the triangle 91 | float3 n = normalize(cross(e0, e1)); 92 | if (fabs(n.x) < float_error) { return; } 93 | 94 | // Calculate the projection of three point into yoz plane 95 | float2 v0_yz = make_float2(v0.y, v0.z); 96 | float2 v1_yz = make_float2(v1.y, v1.z); 97 | float2 v2_yz = make_float2(v2.y, v2.z); 98 | 99 | // Set the triangle counterclockwise 100 | if (!checkCCW(v0_yz, v1_yz, v2_yz)) 101 | { 102 | float2 v3 = v1_yz; 103 | v1_yz = v2_yz; 104 | v2_yz = v3; 105 | } 106 | 107 | // COMPUTE TRIANGLE BBOX IN GRID 108 | // Triangle bounding box in world coordinates is min(v0,v1,v2) and max(v0,v1,v2) 109 | float2 bbox_max = fmaxf(v0_yz, fmaxf(v1_yz, v2_yz)); 110 | float2 bbox_min = fminf(v0_yz, fminf(v1_yz, v2_yz)); 111 | 112 | float2 bbox_max_grid = make_float2(floor(bbox_max.x / info.unit.y - 0.5), floor(bbox_max.y / info.unit.z - 0.5)); 113 | float2 bbox_min_grid = make_float2(ceil(bbox_min.x / info.unit.y - 0.5), ceil(bbox_min.y / info.unit.z - 0.5)); 114 | 115 | for (int y = bbox_min_grid.x; y <= bbox_max_grid.x; y++) 116 | { 117 | for (int z = bbox_min_grid.y; z <= bbox_max_grid.y; z++) 118 | { 119 | float2 point = make_float2((y + 0.5) * info.unit.y, (z + 0.5) * info.unit.z); 120 | int checknum = check_point_triangle(v0_yz, v1_yz, v2_yz, point); 121 | if ((checknum == 1 && TopLeftEdge(v0_yz, v1_yz)) || (checknum == 2 && TopLeftEdge(v1_yz, v2_yz)) || (checknum == 3 && TopLeftEdge(v2_yz, v0_yz)) || (checknum == 0)) 122 | { 123 | int xmax = int(get_x_coordinate(n, v0, point) / info.unit.x - 0.5); 124 | for (int x = 0; x <= xmax; x++) 125 | { 126 | if (morton_order){ 127 | size_t location = mortonEncode_LUT(x, y, z); 128 | setBitXor(voxel_table, location); 129 | } else { 130 | size_t location = 131 | static_cast(x) + 132 | (static_cast(y) * static_cast(info.gridsize.x)) + 133 | (static_cast(z) * (static_cast(info.gridsize.y) * static_cast(info.gridsize.x))); 134 | setBitXor(voxel_table, location); 135 | } 136 | continue; 137 | } 138 | } 139 | } 140 | } 141 | // sanity check: atomically count triangles 142 | //atomicAdd(&triangles_seen_count, 1); 143 | thread_id += stride; 144 | } 145 | } 146 | 147 | void voxelize_solid(const voxinfo& v, float* triangle_data, unsigned int* vtable, bool morton_code) { 148 | float elapsedTime; 149 | 150 | // Create timers, set start time 151 | cudaEvent_t start_vox, stop_vox; 152 | checkCudaErrors(cudaEventCreate(&start_vox)); 153 | checkCudaErrors(cudaEventCreate(&stop_vox)); 154 | 155 | // Copy morton LUT if we're encoding to morton 156 | if (morton_code){ 157 | checkCudaErrors(cudaMemcpyToSymbol(morton256_x, host_morton256_x, 256 * sizeof(uint32_t))); 158 | checkCudaErrors(cudaMemcpyToSymbol(morton256_y, host_morton256_y, 256 * sizeof(uint32_t))); 159 | checkCudaErrors(cudaMemcpyToSymbol(morton256_z, host_morton256_z, 256 * sizeof(uint32_t))); 160 | } 161 | 162 | // Estimate best block and grid size using CUDA Occupancy Calculator 163 | int blockSize; // The launch configurator returned block size 164 | int minGridSize; // The minimum grid size needed to achieve the maximum occupancy for a full device launch 165 | int gridSize; // The actual grid size needed, based on input size 166 | cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, voxelize_triangle_solid, 0, 0); 167 | // Round up according to array size 168 | gridSize = static_cast((v.n_triangles + blockSize - 1) / blockSize); 169 | 170 | checkCudaErrors(cudaEventRecord(start_vox, 0)); 171 | voxelize_triangle_solid << > > (v, triangle_data, vtable, morton_code); 172 | 173 | cudaDeviceSynchronize(); 174 | checkCudaErrors(cudaEventRecord(stop_vox, 0)); 175 | checkCudaErrors(cudaEventSynchronize(stop_vox)); 176 | checkCudaErrors(cudaEventElapsedTime(&elapsedTime, start_vox, stop_vox)); 177 | printf("[Perf] Voxelization GPU time: %.1f ms\n", elapsedTime); 178 | 179 | // SANITY CHECKS 180 | #ifdef _DEBUG 181 | size_t debug_n_triangles, debug_n_voxels_marked, debug_n_voxels_tested; 182 | checkCudaErrors(cudaMemcpyFromSymbol((void*)&(debug_n_triangles),debug_d_n_triangles, sizeof(debug_d_n_triangles), 0, cudaMemcpyDeviceToHost)); 183 | checkCudaErrors(cudaMemcpyFromSymbol((void*)&(debug_n_voxels_marked), debug_d_n_voxels_marked, sizeof(debug_d_n_voxels_marked), 0, cudaMemcpyDeviceToHost)); 184 | checkCudaErrors(cudaMemcpyFromSymbol((void*) & (debug_n_voxels_tested), debug_d_n_voxels_tested, sizeof(debug_d_n_voxels_tested), 0, cudaMemcpyDeviceToHost)); 185 | printf("[Debug] Processed %llu triangles on the GPU \n", debug_n_triangles); 186 | printf("[Debug] Tested %llu voxels for overlap on GPU \n", debug_n_voxels_tested); 187 | printf("[Debug] Marked %llu voxels as filled (includes duplicates!) \n", debug_n_voxels_marked); 188 | #endif 189 | 190 | // Destroy timers 191 | checkCudaErrors(cudaEventDestroy(start_vox)); 192 | checkCudaErrors(cudaEventDestroy(stop_vox)); 193 | } -------------------------------------------------------------------------------- /test_models/credit.txt: -------------------------------------------------------------------------------- 1 | Stanford Bunny Model - (c) 1996 Stanford University - http://www.graphics.stanford.edu/data/3Dscanrep/ --------------------------------------------------------------------------------