├── 3rdParty.txt ├── samples ├── 3rdParty │ └── stb_image │ │ └── CMakeLists.txt ├── common │ ├── loadOBJ.h │ ├── loadBinMesh.h │ ├── CMakeLists.txt │ ├── loadBinMesh.cpp │ ├── CmdLine.h │ ├── IO.h │ ├── loadOBJ.cpp │ └── Generator.h ├── s01_closestPoint_points_cpu │ ├── CMakeLists.txt │ └── closestPoint.cpp ├── s01_closestPoint_points_gpu │ ├── CMakeLists.txt │ ├── closestPoint.cu │ └── closestPoint_WideBVH.cu ├── CMakeLists.txt ├── s05_lineOfSight │ └── lineOfSight.cu ├── s02_distanceToTriangleMesh │ └── distanceToTriangleMesh.cu ├── s03_insideOutsideOfClosedMesh │ └── insideOutside.cu ├── s06_anyTriangleWithinRadius │ └── anyTriangleWithinRadius.cu └── s04_boxOverlapsOrInsideSurfaceMesh │ └── boxOverlapsOrInsideSurfaceMesh.cu ├── .gitignore ├── cuBQL ├── traversal │ └── fixedAnyShapeQuery.h ├── builder │ ├── cpu │ │ ├── instantiate_builders.cpp │ │ └── spatialMedian.h │ ├── cpu.h │ └── cuda │ │ ├── gpu_builder.h │ │ ├── instantiate_builders.cu │ │ ├── builder_common.h │ │ └── wide_gpu_builder.h ├── math │ ├── random.h │ ├── math.h │ ├── Ray.h │ └── affine.h ├── queries │ ├── triangleData │ │ ├── lineOfSight.h │ │ ├── boxInsideOutsideIntersects.h │ │ ├── pointInsideOutside.h │ │ ├── anyWithinRadius.h │ │ ├── closestPointOnAnyTriangle.h │ │ ├── Triangle.h │ │ ├── trianglesInBox.h │ │ ├── math │ │ │ ├── pointToTriangleDistance.h │ │ │ └── rayTriangleIntersections.h │ │ └── crossingCount.h │ ├── common │ │ └── knn.h │ └── pointData │ │ └── knn.h ├── CMakeLists.txt └── bvh.h ├── .github ├── workflows │ ├── Ubuntu.yml │ └── Windows.yml └── action-scripts │ └── install_cuda_ubuntu.sh ├── CONTRIBUTING.md └── CMakeLists.txt /3rdParty.txt: -------------------------------------------------------------------------------- 1 | This project uses the following 3rd-Party open-source projects, under 2 | their respective licences: 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /samples/3rdParty/stb_image/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | add_library(stb_image INTERFACE) 5 | target_include_directories(stb_image INTERFACE ${CMAKE_CURRENT_LIST_DIR}) 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | textures 3 | *# 4 | .#* 5 | bin 6 | dbg 7 | *.user* 8 | *.sw? 9 | tags 10 | .ycm_extra_conf.pyc 11 | *.autosave 12 | *DS_Store* 13 | *.gz 14 | *.rpm 15 | *.zip 16 | *.bak 17 | *.patch 18 | .vscode 19 | deps 20 | tbb 21 | ispc 22 | *.aux 23 | *.bbl 24 | *.blg 25 | *.brf 26 | *.dvi 27 | *.lbl 28 | *.log 29 | *.swp 30 | *.out 31 | Session.vim 32 | .idea 33 | !*png/*.pdf 34 | .vs/ 35 | 36 | -------------------------------------------------------------------------------- /cuBQL/traversal/fixedAnyShapeQuery.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file fixedAnyShape.h common stuff for any fixed-shape query 5 | (fixed radius query, fixed box query, fixed ray query, etc */ 6 | 7 | #pragma once 8 | 9 | #include "cuBQL/bvh.h" 10 | 11 | #define CUBQL_TERMINATE_TRAVERSAL 1 12 | #define CUBQL_CONTINUE_TRAVERSAL 0 13 | 14 | namespace cuBQL { 15 | } 16 | 17 | -------------------------------------------------------------------------------- /samples/common/loadOBJ.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace cuBQL { 11 | namespace samples { 12 | 13 | std::vector loadOBJ(const std::string &fileName); 14 | 15 | void loadOBJ(std::vector &indices, 16 | std::vector &vertices, 17 | const std::string &fileName); 18 | } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /samples/common/loadBinMesh.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace cuBQL { 11 | namespace samples { 12 | 13 | std::vector loadBinMesh(const std::string &fileName); 14 | 15 | void loadBinMesh(std::vector &indices, 16 | std::vector &vertices, 17 | const std::string &fileName); 18 | } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /samples/s01_closestPoint_points_cpu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # ================================================================== 5 | # sample that performs gpu-side closest-point queries between sets of 6 | # float3 points 7 | # ================================================================== 8 | add_executable(cuBQL_sample01_points_closestPoint_cpu 9 | closestPoint.cpp 10 | ) 11 | target_link_libraries(cuBQL_sample01_points_closestPoint_cpu 12 | # the cpu-side builders for float3 data 13 | cuBQL_cpu_float3 14 | # common samples stuff 15 | cuBQL_samples_common 16 | ) 17 | 18 | -------------------------------------------------------------------------------- /samples/s01_closestPoint_points_gpu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | if (CUBQL_HAVE_CUDA) 5 | add_executable(cuBQL_sample01_points_closestPoint_cuda 6 | closestPoint.cu 7 | ) 8 | target_link_libraries(cuBQL_sample01_points_closestPoint_cuda 9 | # the cuda-side builders for float3 data 10 | cuBQL_cuda_float3 11 | # common samples stuff 12 | cuBQL_samples_common 13 | ) 14 | 15 | 16 | add_executable(cuBQL_sample01_points_closestPoint_wideBVH_cuda 17 | closestPoint_WideBVH.cu 18 | ) 19 | target_link_libraries(cuBQL_sample01_points_closestPoint_wideBVH_cuda 20 | # the cuda-side builders for float3 data 21 | cuBQL_cuda_float3 22 | # common samples stuff 23 | cuBQL_samples_common 24 | ) 25 | endif() 26 | -------------------------------------------------------------------------------- /cuBQL/builder/cpu/instantiate_builders.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! instantiates the GPU builder(s) */ 5 | #define CUBQL_CPU_BUILDER_IMPLEMENTATION 1 6 | 7 | #include "cuBQL/bvh.h" 8 | #include "cuBQL/builder/cpu/spatialMedian.h" 9 | 10 | #ifdef CUBQL_INSTANTIATE_T 11 | // instantiate an explict type and dimension 12 | CUBQL_CPU_INSTANTIATE_BINARY_BVH(CUBQL_INSTANTIATE_T,CUBQL_INSTANTIATE_D) 13 | CUBQL_CPU_INSTANTIATE_WIDE_BVH(CUBQL_INSTANTIATE_T,CUBQL_INSTANTIATE_D,4) 14 | CUBQL_CPU_INSTANTIATE_WIDE_BVH(CUBQL_INSTANTIATE_T,CUBQL_INSTANTIATE_D,8) 15 | #else 16 | // default instantiation(s) for float3 only 17 | CUBQL_CPU_INSTANTIATE_BINARY_BVH(float,3) 18 | CUBQL_CPU_INSTANTIATE_WIDE_BVH(float,3,4) 19 | CUBQL_CPU_INSTANTIATE_WIDE_BVH(float,3,8) 20 | #endif 21 | 22 | 23 | -------------------------------------------------------------------------------- /samples/common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | set(CUBQL_USER_DIM "" CACHE STRING "user-specified dim for tests w/ dim!=2,3,4") 5 | 6 | add_library(cuBQL_samples_common 7 | Generator.h 8 | Generator.cpp 9 | tiny_obj_loader.h 10 | loadOBJ.h 11 | loadOBJ.cpp 12 | loadBinMesh.h 13 | loadBinMesh.cpp 14 | ) 15 | target_link_libraries(cuBQL_samples_common 16 | PUBLIC 17 | cuBQL 18 | ) 19 | 20 | # ================================================================== 21 | # for testing, allow to specify a compile-time but user-defined 22 | # arbitrary dimensionality. not all tests might respect that (eg, 23 | # triangles only work for float3), but some will 24 | # ================================================================== 25 | if (NOT ("${CUBQL_USER_DIM}" STREQUAL "")) 26 | target_compile_definitions(cuBQL_samples_common 27 | PUBLIC 28 | CUBQL_USER_DIM=${CUBQL_USER_DIM}) 29 | endif() 30 | 31 | -------------------------------------------------------------------------------- /samples/common/loadBinMesh.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include 5 | #include 6 | 7 | namespace cuBQL { 8 | namespace samples { 9 | 10 | void loadBinMesh(std::vector &indices, 11 | std::vector &vertices, 12 | const std::string &inFileName) 13 | { 14 | std::ifstream in(inFileName.c_str(),std::ios::binary); 15 | size_t numVertices; 16 | size_t numTriangles; 17 | 18 | in.read((char*)&numVertices,sizeof(numVertices)); 19 | vertices.resize(numVertices); 20 | in.read((char*)vertices.data(),numVertices*sizeof(vec3f)); 21 | 22 | in.read((char*)&numTriangles,sizeof(numTriangles)); 23 | indices.resize(numTriangles); 24 | in.read((char*)indices.data(),numTriangles*sizeof(vec3i)); 25 | } 26 | 27 | std::vector loadBinMesh(const std::string &fileName) 28 | { 29 | std::vector indices; 30 | std::vector vertices; 31 | loadBinMesh(indices,vertices,fileName); 32 | std::vector res; 33 | for (auto idx : indices) 34 | res.push_back({vertices[idx.x],vertices[idx.y],vertices[idx.z]}); 35 | return res; 36 | } 37 | 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /samples/common/CmdLine.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace cuBQL { 12 | namespace samples { 13 | 14 | /*! simple helper class that allows for successively consuming 15 | arguments from a cmdline, with some error checking */ 16 | struct CmdLine { 17 | CmdLine(int ac, char **av) : ac(ac), av(av) {} 18 | /*! returns true iff all items have been consumed, and there is 19 | no other un-consumed argument to process */ 20 | inline bool consumed() const { return current == ac; } 21 | /*! get next argument off cmdline, without any type-conversion */ 22 | inline std::string getString(); 23 | /*! get next argument off cmdline, and convert to int */ 24 | inline int getInt() { return std::stoi(getString()); } 25 | /*! get next argument off cmdline, and convert to float */ 26 | inline float getFloat() { return std::stof(getString()); } 27 | /*! read a float2 from the cmdline */ 28 | inline float get2f(); 29 | /*! read a float3 from the cmdline */ 30 | inline float get3f(); 31 | private: 32 | int current = 1; 33 | const int ac; 34 | char **const av; 35 | }; 36 | 37 | /*! get next argument off cmdline, without any type-conversion */ 38 | inline std::string CmdLine::getString() 39 | { 40 | if (current >= ac) 41 | throw std::runtime_error 42 | ("CmdLine: requested to get next argument, but no more " 43 | "un-consumed arguments available"); 44 | return av[current++]; 45 | } 46 | 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /cuBQL/math/random.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/math/common.h" 7 | #include 8 | 9 | namespace cuBQL { 10 | 11 | /*! simple 24-bit linear congruence generator */ 12 | template 13 | struct LCG { 14 | 15 | inline __cubql_both LCG() 16 | { /* intentionally empty so we can use it in device vars that 17 | don't allow dynamic initialization (ie, PRD) */ 18 | } 19 | inline __cubql_both LCG(unsigned int val0, unsigned int val1) 20 | { init(val0,val1); } 21 | 22 | inline __cubql_both void init(unsigned int val0, unsigned int val1) 23 | { 24 | unsigned int v0 = val0; 25 | unsigned int v1 = val1; 26 | unsigned int s0 = 0; 27 | 28 | for (unsigned int n = 0; n < N; n++) { 29 | s0 += 0x9e3779b9; 30 | v0 += ((v1<<4)+0xa341316c)^(v1+s0)^((v1>>5)+0xc8013ea4); 31 | v1 += ((v0<<4)+0xad90777d)^(v0+s0)^((v0>>5)+0x7e95761e); 32 | } 33 | state = v0; 34 | } 35 | 36 | inline __cubql_both uint32_t ui32() 37 | { 38 | const uint32_t LCG_A = 1664525u; 39 | const uint32_t LCG_C = 1013904223u; 40 | state = (LCG_A * state + LCG_C); 41 | return uint32_t(state); 42 | } 43 | 44 | /*! Generate random unsigned int in [0, 2^24), then use that to 45 | generate random float in [0.f,1.f) */ 46 | inline __cubql_both float operator() () 47 | { 48 | const uint32_t LCG_A = 1664525u; 49 | const uint32_t LCG_C = 1013904223u; 50 | state = (LCG_A * state + LCG_C); 51 | return (state & 0x00FFFFFF) / (float) 0x01000000; 52 | } 53 | 54 | uint32_t state; 55 | }; 56 | 57 | } 58 | 59 | -------------------------------------------------------------------------------- /samples/common/IO.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace cuBQL { 12 | namespace samples { 13 | 14 | /*! load a vector of (binary) data from a binary-dump file. the 15 | data file is supposed to start with a size_t that specifies 16 | the *number* N of elements to be expected in that file, 17 | followed by the N "raw"-binary data items */ 18 | template 19 | std::vector loadBinary(const std::string &fileName) 20 | { 21 | std::ifstream in(fileName.c_str(),std::ios::binary); 22 | if (!in.good()) 23 | throw std::runtime_error("could not open '"+fileName+"'"); 24 | size_t count; 25 | in.read((char*)&count,sizeof(count)); 26 | 27 | std::vector data(count); 28 | in.read((char*)data.data(),count*sizeof(T)); 29 | return data; 30 | } 31 | 32 | /*! write a vector of (binary) data into a binary-dump file. the 33 | data file is supposed to start with a size_t that specifies 34 | the *number* N of elements to be expected in that file, 35 | followed by the N "raw"-binary data items */ 36 | template 37 | void saveBinary(const std::string &fileName, 38 | const std::vector &data) 39 | { 40 | std::ofstream out(fileName.c_str(),std::ios::binary); 41 | size_t count = data.size(); 42 | out.write((char*)&count,sizeof(count)); 43 | 44 | out.write((char*)data.data(),count*sizeof(T)); 45 | } 46 | 47 | template 48 | std::vector> convert(const std::vector> &in) { 49 | std::vector> result(in.size()); 50 | for (size_t i=0;i(in[i]); 52 | return result; 53 | }; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /cuBQL/math/math.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/math/common.h" 7 | #ifdef __CUDACC__ 8 | #include 9 | #endif 10 | #include 11 | 12 | namespace cuBQL { 13 | 14 | #ifdef __CUDACC__ 15 | // make sure we use the built-in cuda functoins that use floats, not 16 | // the c-stdlib ones that use doubles. 17 | using ::min; 18 | using ::max; 19 | #else 20 | using std::min; 21 | using std::max; 22 | #endif 23 | 24 | #ifdef __CUDA_ARCH__ 25 | # define CUBQL_INF ::cuda::std::numeric_limits::infinity() 26 | #else 27 | # define CUBQL_INF std::numeric_limits::infinity() 28 | #endif 29 | 30 | #ifdef __CUDA_ARCH__ 31 | #else 32 | inline float __int_as_float(int i) { return (const float &)i; } 33 | inline int __float_as_int(float f) { return (const int &)f; } 34 | #endif 35 | 36 | inline __cubql_both float squareOf(float f) { return f*f; } 37 | 38 | 39 | template struct log_of { enum { value = -1 }; }; 40 | template<> struct log_of< 2> { enum { value = 1 }; }; 41 | template<> struct log_of< 4> { enum { value = 2 }; }; 42 | template<> struct log_of< 8> { enum { value = 3 }; }; 43 | template<> struct log_of<16> { enum { value = 4 }; }; 44 | template<> struct log_of<32> { enum { value = 5 }; }; 45 | 46 | /*! square of a value */ 47 | inline __cubql_both float sqr(float f) { return f*f; } 48 | 49 | /*! unary functors on scalar types, so we can lift them to vector types later on */ 50 | inline __cubql_both float rcp(float f) { return 1.f/f; } 51 | inline __cubql_both double rcp(double d) { return 1./d; } 52 | 53 | template 54 | inline __cubql_both T clamp(T t, T lo=T(0), T hi=T(1)) 55 | { return min(max(t,lo),hi); } 56 | 57 | inline __cubql_both float saturate(float f) { return clamp(f,0.f,1.f); } 58 | inline __cubql_both double saturate(double f) { return clamp(f,0.,1.); } 59 | 60 | // inline __cubql_both float sqrt(float f) { return ::sqrtf(f); } 61 | // inline __cubql_both double sqrt(double d) { return ::sqrt(d); } 62 | } 63 | 64 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/lineOfSight.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | // for 'fixedRayQuery' 7 | #include "cuBQL/traversal/rayQueries.h" 8 | // for 'rayIntersectsTriangle()' 9 | #include "cuBQL/queries/triangleData/math/rayTriangleIntersections.h" 10 | 11 | namespace cuBQL { 12 | namespace triangles { 13 | 14 | // ============================================================================= 15 | // *** INTERFACE *** 16 | // ============================================================================= 17 | 18 | /*! performs line-of-sight comutation between two points, and 19 | returns true if both points are mutually visible (ie, _not_ 20 | occluded by any triangle), or false if the line of sightis 21 | blocked by at least one triangle 22 | 23 | getTriangle is lambda getTriangle(uint32_t triID)->Triangle 24 | */ 25 | template 26 | inline __cubql_both 27 | bool pointsMutuallyVisible(bvh3f bvh, 28 | GetTriangleLambda getTriangle, 29 | const vec3f pointA, 30 | const vec3f pointB); 31 | 32 | // ============================================================================= 33 | // *** IMPLEMENTATION *** 34 | // ============================================================================= 35 | 36 | template 37 | inline __cubql_both 38 | bool pointsMutuallyVisible(bvh3f bvh, 39 | GetTriangleLambda getTriangle, 40 | const vec3f pointA, 41 | const vec3f pointB) 42 | { 43 | bool mutuallyVisible = true; 44 | Ray queryRay(pointA,pointB-pointA,0.f,1.f); 45 | auto perTriangle=[&mutuallyVisible,getTriangle,queryRay](uint32_t primID) 46 | { 47 | Triangle triangle = getTriangle(primID); 48 | if (rayIntersectsTriangle(queryRay,triangle)) { 49 | mutuallyVisible = false; 50 | return CUBQL_TERMINATE_TRAVERSAL; 51 | } 52 | return CUBQL_CONTINUE_TRAVERSAL; 53 | }; 54 | cuBQL::fixedRayQuery::forEachPrim(perTriangle,bvh,queryRay); 55 | return mutuallyVisible; 56 | } 57 | 58 | } // ::cuBQL::triangles 59 | } // ::cuBQL 60 | -------------------------------------------------------------------------------- /cuBQL/queries/common/knn.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file cuBQL/queries/common/knn.h Helper tools for knn-style queries 5 | 6 | Different types of kNN queries may have differnet ways of computing 7 | distance between query and data primitmives, and different BVHes may 8 | have different traversal routines - but all types of knns require 9 | some form of "candidate management" - ie, tracking how many (and 10 | which) possible k nearest candidates have already been found, 11 | inserting or evicting candidates from that list, etc. This 12 | functionality is indepenent of data type, and should only be written 13 | once 14 | */ 15 | 16 | #pragma once 17 | 18 | #include "cuBQL/bvh.h" 19 | 20 | namespace cuBQL { 21 | namespace knn { 22 | 23 | /*! by default we return the *number* of found candidates (within 24 | the query radius), and the (square) distance to the most 25 | distant one, or (square) max query radius if less than the 26 | requested num prims were found */ 27 | struct Result { 28 | /*! number of kNNs found */ 29 | int numFound; 30 | /*! (square) distance to most distant found candidate, or 31 | INFINITY if numFound is 0 */ 32 | float sqrDistMax; 33 | }; 34 | 35 | /*! one of the cancidates in a kNN candidate list; this refers to 36 | a data primitive, and the (square) distance to that candidate 37 | primitive that the respective data type has computed for the 38 | given query primitive */ 39 | struct Candidate { 40 | int primID; 41 | float sqrDist; 42 | }; 43 | 44 | inline __cubql_both 45 | void insert_linear(Candidate *candidates, 46 | int maxCandidates, 47 | Candidate newCandidate, 48 | Result &result) 49 | { 50 | int insertPos; 51 | if (result.numFound < maxCandidates) { 52 | insertPos = result.numFound++; 53 | } else { 54 | if (newCandidate.sqrDist >= result.sqrDistMax) 55 | return; 56 | insertPos = maxCandidates-1; 57 | } 58 | while (insertPos > 0 && candidates[insertPos-1].sqrDist > newCandidate.sqrDist) { 59 | candidates[insertPos] = candidates[insertPos-1]; 60 | --insertPos; 61 | } 62 | candidates[insertPos] = newCandidate; 63 | if (result.numFound == maxCandidates) 64 | result.sqrDistMax = candidates[maxCandidates-1].sqrDist; 65 | } 66 | 67 | } // ::cuBQL::knn 68 | } // ::cuBQL 69 | -------------------------------------------------------------------------------- /.github/workflows/Ubuntu.yml: -------------------------------------------------------------------------------- 1 | name: Ubuntu 2 | 3 | #on: 4 | # workflow_dispatch: 5 | 6 | on: 7 | workflow_dispatch: 8 | push: 9 | branches: [ main ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | jobs: 14 | build: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: false 18 | # explicit include-based build matrix, of known valid options 19 | matrix: 20 | include: 21 | - os: ubuntu-24.04 22 | cuda: "13.0.2" 23 | gcc: 13 24 | config: Release 25 | - os: ubuntu-24.04 26 | cuda: "13.0.2" 27 | gcc: 13 28 | config: Debug 29 | - os: ubuntu-24.04 30 | cuda: "12.6.0" 31 | gcc: 13 32 | config: Release 33 | - os: ubuntu-24.04 34 | cuda: "12.6.0" 35 | gcc: 13 36 | config: Debug 37 | - os: ubuntu-22.04 38 | cuda: "12.0.0" 39 | gcc: 11 40 | config: Release 41 | env: 42 | build_dir: "build" 43 | 44 | steps: 45 | 46 | - name: Checkout OWL 47 | uses: actions/checkout@v5 48 | with: 49 | submodules: true 50 | 51 | - name: Install CUDA 52 | uses: Jimver/cuda-toolkit@master 53 | id: cuda-toolkit 54 | with: 55 | cuda: ${{ matrix.cuda }} 56 | method: 'network' 57 | log-file-suffix: '${{matrix.os}}-gcc${{matrix.gcc}}-config${{matrix.config}}.txt' 58 | 59 | # - name: Install CUDA 60 | # env: 61 | # cuda: ${{ matrix.cuda }} 62 | # run: ./.github/action-scripts/install_cuda_ubuntu.sh 63 | # shell: bash 64 | 65 | # Specify the correct host compilers 66 | - name: Install/Select gcc and g++ 67 | run: | 68 | sudo apt-get install -y gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} 69 | echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> $GITHUB_ENV 70 | echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV 71 | echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV 72 | 73 | - name: cmake configure 74 | run: | 75 | mkdir ${{github.workspace}}/build 76 | cd ${{github.workspace}}/build 77 | cmake .. -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/install-DCMAKE_BUILD_TYPE=${{matrix.config}} 78 | 79 | - name: cmake build 80 | run: | 81 | cd ${{github.workspace}}/build 82 | cmake --build . --config ${{ matrix.config }} 83 | 84 | - name: cmake install 85 | run: | 86 | cd ${{github.workspace}}/build 87 | cmake --install . --config ${{ matrix.config }} 88 | -------------------------------------------------------------------------------- /cuBQL/builder/cpu.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #define CUBQL_CPU_BUILDER_IMPLEMENTATION 1 7 | #include "cuBQL/builder/cpu/spatialMedian.h" 8 | 9 | namespace cuBQL { 10 | namespace cpu { 11 | 12 | // ================================================================== 13 | // for regular, BINARY BVHes 14 | // ================================================================== 15 | template 16 | void spatialMedian(BinaryBVH &bvh, 17 | const box_t *boxes, 18 | uint32_t numPrims, 19 | BuildConfig buildConfig); 20 | 21 | template 22 | inline void freeBVH(BinaryBVH &bvh) 23 | { 24 | delete[] bvh.nodes; 25 | delete[] bvh.primIDs; 26 | bvh.nodes = 0; 27 | bvh.primIDs = 0; 28 | } 29 | 30 | // ================================================================== 31 | // for WIDE BVHes 32 | // ================================================================== 33 | template 34 | void spatialMedian(WideBVH &bvh, 35 | const box_t *boxes, 36 | uint32_t numPrims, 37 | BuildConfig buildConfig); 38 | 39 | template 40 | inline void freeBVH(WideBVH &bvh) 41 | { 42 | delete[] bvh.nodes; 43 | delete[] bvh.primIDs; 44 | bvh.nodes = 0; 45 | bvh.primIDs = 0; 46 | } 47 | 48 | } // ::cuBQL::cpu 49 | 50 | /*! non-specialized 'cuBQL::cpuBuilder' entry point purely witin the 51 | cuBQL:: namespace, which wraps all builder variants */ 52 | template 53 | void cpuBuilder(WideBVH &bvh, 54 | const box_t *boxes, 55 | uint32_t numPrims, 56 | BuildConfig buildConfig) 57 | { 58 | /*! right now, only have a slow spatial median builder */ 59 | cpu::spatialMedian(bvh,boxes,numPrims,buildConfig); 60 | } 61 | /*! non-specialized 'cuBQL::cpuBuilder' entry point purely witin the 62 | cuBQL:: namespace, which wraps all builder variants */ 63 | template 64 | void cpuBuilder(BinaryBVH &bvh, 65 | const box_t *boxes, 66 | uint32_t numPrims, 67 | BuildConfig buildConfig) 68 | { 69 | /*! right now, only have a slow spatial median builder */ 70 | cpu::spatialMedian(bvh,boxes,numPrims,buildConfig); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /cuBQL/builder/cuda/gpu_builder.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/builder/cuda/builder_common.h" 7 | #include "cuBQL/builder/cuda/sm_builder.h" 8 | #include "cuBQL/builder/cuda/sah_builder.h" 9 | #include "cuBQL/builder/cuda/elh_builder.h" 10 | 11 | namespace cuBQL { 12 | 13 | template 14 | struct is3f { enum { value = false }; }; 15 | template<> 16 | struct is3f { enum { value = true }; }; 17 | 18 | template 19 | void gpuBuilder(BinaryBVH &bvh, 20 | const box_t *boxes, 21 | uint32_t numBoxes, 22 | BuildConfig buildConfig, 23 | cudaStream_t s, 24 | GpuMemoryResource &memResource) 25 | { 26 | if (numBoxes == 0) return; 27 | 28 | int devID; 29 | CUBQL_CUDA_CALL(GetDevice(&devID)); 30 | 31 | if (buildConfig.buildMethod == BuildConfig::SAH) { 32 | if (buildConfig.makeLeafThreshold == 0) 33 | // unless explicitly specified, use default for spatial median 34 | // builder: 35 | buildConfig.makeLeafThreshold = 1; 36 | if (is3f::value) { 37 | /* for D == 3 these typecasts won't do anything; for D != 3 38 | they'd be invalid, but won't ever happen */ 39 | sahBuilder_impl::sahBuilder((BinaryBVH&)bvh,(const box_t *)boxes, 40 | numBoxes,buildConfig,s,memResource); 41 | } else 42 | throw std::runtime_error("SAH builder not supported for this type of BVH"); 43 | } else if (buildConfig.buildMethod == BuildConfig::ELH) { 44 | /* edge-length-heurstic; splits based on sum of the lengths of 45 | the edges of the bounding box - not as good as sah for 46 | tracing rays, but often somewhat better than spatial median 47 | for kNN style queries */ 48 | elhBuilder_impl::elhBuilder(bvh,boxes,numBoxes,buildConfig,s,memResource); 49 | } else { 50 | if (buildConfig.makeLeafThreshold == 0) 51 | // unless explicitly specified, use default for spatial median 52 | // builder: 53 | buildConfig.makeLeafThreshold = 1; 54 | gpuBuilder_impl::build(bvh,boxes,numBoxes,buildConfig,s,memResource); 55 | } 56 | gpuBuilder_impl::refit(bvh,boxes,s,memResource); 57 | } 58 | 59 | namespace cuda { 60 | template 61 | void free(BinaryBVH &bvh, 62 | cudaStream_t s, 63 | GpuMemoryResource &memResource) 64 | { 65 | gpuBuilder_impl::_FREE(bvh.primIDs,s,memResource); 66 | gpuBuilder_impl::_FREE(bvh.nodes,s,memResource); 67 | CUBQL_CUDA_CALL(StreamSynchronize(s)); 68 | bvh.primIDs = 0; 69 | } 70 | } 71 | } 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/boxInsideOutsideIntersects.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file queries/triangleData/pointInsideOutside.h computes whether a 5 | 3D point is inside/outside a (supposedly closed) triangle 6 | mesh. Relies on crossing-count kernel to compute odd/equal 7 | crossing counts for semi-infinite ray(s) starting at that point */ 8 | #pragma once 9 | 10 | #include "cuBQL/bvh.h" 11 | #include "cuBQL/queries/triangleData/pointInsideOutside.h" 12 | #include "cuBQL/queries/triangleData/trianglesInBox.h" 13 | 14 | namespace cuBQL { 15 | namespace triangles { 16 | namespace boxInsideOutsideIntersects { 17 | // ============================================================================= 18 | // *** INTERFACE *** 19 | // ============================================================================= 20 | 21 | typedef enum { INSIDE=0, OUTSIDE, INTERSECTS } result_t; 22 | 23 | template 24 | inline __cubql_both 25 | result_t queryVsActualTriangles(bvh3f bvh, 26 | GetTriangleLambda getTriangle, 27 | box3f queryBox); 28 | 29 | template 30 | inline __cubql_both 31 | result_t queryVsTriangleBoundingBoxes(bvh3f bvh, 32 | GetTriangleLambda getTriangle, 33 | box3f queryBox); 34 | 35 | // ============================================================================= 36 | // *** IMPLEMENTATION *** 37 | // ============================================================================= 38 | 39 | template 40 | inline __cubql_both 41 | result_t queryVsActualTriangles(bvh3f bvh, 42 | GetTriangleLambda getTriangle, 43 | box3f queryBox) 44 | { 45 | if (countTrianglesIntersectingQueryBox 46 | (bvh,getTriangle,queryBox,/* at most */1) > 0) 47 | return INTERSECTS; 48 | return pointIsInsideSurface(bvh,getTriangle,queryBox.center()) 49 | ? INSIDE 50 | : OUTSIDE; 51 | } 52 | 53 | template 54 | inline __cubql_both 55 | result_t queryVsTriangleBoundingBoxes(bvh3f bvh, 56 | GetTriangleLambda getTriangle, 57 | box3f queryBox) 58 | { 59 | if (countTrianglesWhoseBoundsOverlapQueryBox 60 | (bvh,getTriangle,queryBox,/* at most */1) > 0) 61 | return INTERSECTS; 62 | return pointIsInsideSurface(bvh,getTriangle,queryBox.center()) 63 | ? INSIDE 64 | : OUTSIDE; 65 | } 66 | 67 | } // ::cuBQL::triangles::boxInsideOutsideIntersect 68 | } // ::cuBQL::triangles 69 | } // ::cuBQL 70 | -------------------------------------------------------------------------------- /.github/workflows/Windows.yml: -------------------------------------------------------------------------------- 1 | name: Windows 2 | 3 | #on: 4 | # workflow_dispatch: 5 | 6 | on: 7 | workflow_dispatch: 8 | push: 9 | branches: [ main ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | jobs: 14 | build: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: false 18 | # explicit include-based build matrix, of known valid options 19 | matrix: 20 | include: 21 | - os: windows-2022 22 | cuda: "12.6.2" 23 | visual-studio: "Visual Studio 17 2022" 24 | shell: "powershell" 25 | config: Release 26 | - os: windows-2022 27 | cuda: "13.0.0" 28 | visual-studio: "Visual Studio 17 2022" 29 | shell: "powershell" 30 | config: Release 31 | - os: windows-2025 32 | cuda: "12.6.2" 33 | visual-studio: "Visual Studio 17 2022" 34 | shell: "powershell" 35 | config: Release 36 | - os: windows-2025 37 | cuda: "13.0.0" 38 | visual-studio: "Visual Studio 17 2022" 39 | shell: "powershell" 40 | config: Release 41 | - os: windows-2025 42 | cuda: "13.0.2" 43 | visual-studio: "Visual Studio 17 2022" 44 | shell: "powershell" 45 | config: Release 46 | 47 | env: 48 | cmake_generator: "${{ matrix.visual-studio }}" 49 | visual_studio: "${{ matrix.visual-studio }}" 50 | build_dir: "build" 51 | steps: 52 | - name: Checkout owl 53 | uses: actions/checkout@v5 54 | with: 55 | submodules: true 56 | 57 | - name: Install CUDA 58 | uses: Jimver/cuda-toolkit@master 59 | id: cuda-toolkit 60 | with: 61 | cuda: ${{ matrix.cuda }} 62 | 63 | # - name: Install CUDA on Ubuntu systems 64 | # if: contains(matrix.os, 'ubuntu') 65 | # env: 66 | # cuda: ${{ matrix.cuda }} 67 | # run: .github/action-scripts/install-cuda-ubuntu.sh 68 | # shell: bash 69 | 70 | # - name: Install CUDA on Windows 71 | # if: contains(matrix.os, 'windows') 72 | # env: 73 | # cuda: ${{ matrix.cuda }} 74 | # run: .github\action-scripts\install-cuda-windows.ps1 75 | # shell: powershell 76 | 77 | - name: create shared install dir 78 | run: | 79 | mkdir ${{github.workspace}}\install 80 | 81 | - name: cmake configure 82 | run: | 83 | mkdir ${{github.workspace}}\build 84 | cd ${{github.workspace}}\build 85 | cmake -A x64 -G "${{ matrix.visual-studio }}" .. -DCMAKE_INSTALL_PREFIX=${{github.workspace}}\install -DCMAKE_BUILD_TYPE=${{matrix.config}} 86 | shell: powershell 87 | 88 | - name: cmake build 89 | run: | 90 | cd ${{github.workspace}}\build 91 | cmake --build . --config ${{ matrix.config }} 92 | 93 | - name: cmake install 94 | run: | 95 | cd ${{github.workspace}}\build 96 | cmake --install . --config ${{ matrix.config }} 97 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/pointInsideOutside.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file queries/triangleData/pointInsideOutside.h computes whether a 5 | 3D point is inside/outside a (supposedly closed) triangle 6 | mesh. Relies on crossing-count kernel to compute odd/equal 7 | crossing counts for semi-infinite ray(s) starting at that point */ 8 | #pragma once 9 | 10 | #include "cuBQL/bvh.h" 11 | #include "cuBQL/queries/triangleData/crossingCount.h" 12 | 13 | namespace cuBQL { 14 | namespace triangles { 15 | 16 | // ============================================================================= 17 | // *** INTERFACE *** 18 | // ============================================================================= 19 | 20 | /*! given a bvh ('bvh') built over a supposedly closed triangle 21 | mesh (with a triangle accessor function 22 | getTriangle()->Triangle), compute whether a given point 23 | `queryPoint` is inside or outside the surface mesh 24 | 25 | getTriangle is lambda getTriangle(uint32_t triID)->Triangle 26 | */ 27 | template 28 | inline __cubql_both 29 | bool pointIsInsideSurface(bvh3f bvh, 30 | const GetTriangleLambda getTriangle, 31 | vec3f queryPoint, 32 | bool dbg=false); 33 | 34 | // ============================================================================= 35 | // *** IMPLEMENTATION *** 36 | // ============================================================================= 37 | 38 | template 39 | inline __cubql_both 40 | bool pointIsInsideSurface(bvh3f bvh, 41 | const GetTriangleLambda getTriangle, 42 | vec3f queryPoint, 43 | bool dbg) 44 | { 45 | /*! we trace 6 rays - one per principle axis - using the 46 | AxisAlignedRay rayquery. In theory, if the mesh is closed then 47 | these 6 calls should all agree; but in practice there's always 48 | some holes or double counting when rays going right through 49 | vertices or edges, so we just trace one ray in each direction 50 | and take a majority vote. */ 51 | int n0 = signedCrossingCount(bvh,getTriangle,AxisAlignedRay<0,-1>(queryPoint),dbg); 52 | int p0 = signedCrossingCount(bvh,getTriangle,AxisAlignedRay<0,+1>(queryPoint),dbg); 53 | int n1 = signedCrossingCount(bvh,getTriangle,AxisAlignedRay<1,-1>(queryPoint),dbg); 54 | int p1 = signedCrossingCount(bvh,getTriangle,AxisAlignedRay<1,+1>(queryPoint),dbg); 55 | int n2 = signedCrossingCount(bvh,getTriangle,AxisAlignedRay<2,-1>(queryPoint),dbg); 56 | int p2 = signedCrossingCount(bvh,getTriangle,AxisAlignedRay<2,+1>(queryPoint),dbg); 57 | int numIn = p0+p1+p2+n0+n1+n2; 58 | if (dbg) printf("inside results %i %i %i %i %i %i\n", 59 | n0,n1,n2,p0,p1,p2); 60 | 61 | // if (numIn != 0 && numIn != 6) { 62 | // printf("disagreement: inside results %i %i %i %i %i %i\n", 63 | // n0,n1,n2,p0,p1,p2); 64 | // } 65 | return /* take a majority vote ... */numIn > 3; // == 3;//> 3; 66 | } 67 | 68 | } // ::cuBQL::triangles 69 | } // ::cuBQL 70 | -------------------------------------------------------------------------------- /samples/s01_closestPoint_points_cpu/closestPoint.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file closestPointGPU.cu Implements a small demo-app that 5 | generates a set of data points, another set of query points, and 6 | then uses cuBQL to perform closest-point qeuries (ie, it finds, 7 | for each query point, the respectively closest data point */ 8 | 9 | // cuBQL itself, and the BVH type(s) it defines 10 | #include "cuBQL/bvh.h" 11 | #include "cuBQL/builder/cpu.h" 12 | // some specialized query kernels for find-closest, on 'points' data 13 | #include "cuBQL/queries/pointData/findClosest.h" 14 | // helper class to generate various data distributions 15 | #include "samples/common/Generator.h" 16 | #include 17 | 18 | using namespace cuBQL; 19 | 20 | void computeBoxes(int tid, 21 | box3f *d_boxes, const vec3f *d_data, int numData) 22 | { 23 | d_boxes[tid] = box3f().including(d_data[tid]); 24 | } 25 | 26 | void runQueries(int tid, 27 | bvh3f bvh, 28 | const vec3f *d_data, 29 | const vec3f *d_queries, 30 | int numQueries) 31 | { 32 | vec3f queryPoint = d_queries[tid]; 33 | int closestID = cuBQL::points::findClosest 34 | (/* the cubql bvh we've built */ 35 | bvh, 36 | /* data that this bvh was built over*/ 37 | d_data, 38 | queryPoint); 39 | vec3f closestPoint = d_data[closestID]; 40 | printf("[%i] closest point to (%f %f %f) is point #%i, at (%f %f %f)\n", 41 | tid, 42 | queryPoint.x, 43 | queryPoint.y, 44 | queryPoint.z, 45 | closestID, 46 | closestPoint.x, 47 | closestPoint.y, 48 | closestPoint.z); 49 | } 50 | 51 | 52 | int main(int, char **) 53 | { 54 | int numDataPoints = 10000; 55 | int numQueryPoints = 20; 56 | /*! generate 10,000 uniformly distributed data points */ 57 | std::vector dataPoints 58 | = cuBQL::samples::convert 59 | (cuBQL::samples::UniformPointGenerator<3>().generate(numDataPoints,290374)); 60 | std::cout << "#cubql: generated " << dataPoints.size() << " data points" << std::endl; 61 | std::vector queryPoints 62 | = cuBQL::samples::convert 63 | (cuBQL::samples::UniformPointGenerator<3>().generate(numQueryPoints,/*seed*/1234567)); 64 | std::cout << "#cubql: generated " << queryPoints.size() << " query points" << std::endl; 65 | 66 | vec3f *d_queryPoints = 0; 67 | vec3f *d_dataPoints = 0; 68 | box3f *d_primBounds = 0; 69 | (void *&)d_queryPoints = malloc(queryPoints.size()*sizeof(vec3f)); 70 | memcpy(d_queryPoints,queryPoints.data(), 71 | queryPoints.size()*sizeof(queryPoints[0])); 72 | (void *&)d_dataPoints = malloc(dataPoints.size()*sizeof(vec3f)); 73 | memcpy(d_dataPoints,dataPoints.data(), 74 | dataPoints.size()*sizeof(dataPoints[0])); 75 | (void *&)d_primBounds = malloc(dataPoints.size()*sizeof(box3f)); 76 | for (int tid=0;tid>> 78 | (tid,d_primBounds,d_dataPoints,numDataPoints); 79 | 80 | // generate cuBQL bvh 81 | bvh3f bvh; 82 | cuBQL::cpuBuilder(bvh,d_primBounds,numDataPoints,BuildConfig()); 83 | for (int tid=0;tid>> 85 | (tid,bvh,d_dataPoints,d_queryPoints,numQueryPoints); 86 | 87 | return 0; 88 | } 89 | 90 | -------------------------------------------------------------------------------- /cuBQL/math/Ray.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/math/vec.h" 7 | #include "cuBQL/math/box.h" 8 | 9 | namespace cuBQL { 10 | 11 | // ============================================================================= 12 | // *** INTERFACE *** 13 | // ============================================================================= 14 | 15 | template 16 | struct ray_t { 17 | using vec3 = vec_t; 18 | 19 | __cubql_both ray_t(vec3 org, vec3 dir, T tMin, T tMax); 20 | __cubql_both ray_t(vec3 org, vec3 dir); 21 | vec3 origin; 22 | vec3 direction; 23 | T tMin = T(0); 24 | T tMax = T(CUBQL_INF); 25 | }; 26 | 27 | using ray3f = ray_t; 28 | using ray3d = ray_t; 29 | using Ray = ray_t; 30 | 31 | template 32 | struct AxisAlignedRay { 33 | __cubql_both AxisAlignedRay(const vec3f origin); 34 | __cubql_both AxisAlignedRay(const vec3f origin, float tMin, float tMax); 35 | 36 | vec3f origin; 37 | float tMin=0.f, tMax=CUBQL_INF; 38 | 39 | inline __cubql_both vec3f direction() const; 40 | inline __cubql_both Ray makeRay() const; 41 | }; 42 | 43 | template 44 | inline __cubql_both 45 | bool rayIntersectsBox(ray_t ray, box_t box); 46 | 47 | // ======================================================================== 48 | // *** IMPLEMENTATION *** 49 | // ======================================================================== 50 | 51 | template 52 | inline __cubql_both ray_t::ray_t(typename ray_t::vec3 org, 53 | typename ray_t::vec3 dir, 54 | T tMin, T tMax) 55 | : origin(org), direction(dir), tMin(tMin), tMax(tMax) 56 | {} 57 | 58 | template 59 | inline __cubql_both ray_t::ray_t(typename ray_t::vec3 org, 60 | typename ray_t::vec3 dir) 61 | : origin(org), direction(dir) 62 | {} 63 | 64 | template 65 | inline __cubql_both 66 | AxisAlignedRay::AxisAlignedRay(const vec3f origin, 67 | float tMin, float tMax) 68 | : origin(origin), tMin(tMin), tMax(tMax) 69 | {} 70 | 71 | template 72 | inline __cubql_both 73 | AxisAlignedRay::AxisAlignedRay(const vec3f origin) 74 | : origin(origin) 75 | {} 76 | 77 | template 78 | inline __cubql_both vec3f AxisAlignedRay::direction() const 79 | { 80 | return { 81 | (axis == 0) ? (sign > 0 ? +1.f : -1.f) : 0.f, 82 | (axis == 1) ? (sign > 0 ? +1.f : -1.f) : 0.f, 83 | (axis == 2) ? (sign > 0 ? +1.f : -1.f) : 0.f 84 | }; 85 | } 86 | 87 | template 88 | inline __cubql_both ray_t AxisAlignedRay::makeRay() const 89 | { 90 | return { origin, tMin, direction(), tMax }; 91 | } 92 | 93 | template 94 | inline __cubql_both dbgout operator<<(dbgout o, AxisAlignedRay ray) 95 | { 96 | o << "AARay<"<("< 101 | inline __cubql_both dbgout operator<<(dbgout o, ray_t ray) 102 | { 103 | o << "Ray{"<= numData) return; 24 | 25 | d_boxes[tid] = box3f().including(d_data[tid]); 26 | } 27 | 28 | __global__ 29 | void runQueries(bvh3f bvh, 30 | const vec3f *d_data, 31 | const vec3f *d_queries, 32 | int numQueries) 33 | { 34 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 35 | if (tid >= numQueries) return; 36 | 37 | vec3f queryPoint = d_queries[tid]; 38 | int closestID = cuBQL::points::findClosest(/* the cubql bvh we've built */ 39 | bvh, 40 | /* data that this bvh was built over*/ 41 | d_data, 42 | queryPoint); 43 | vec3f closestPoint = d_data[closestID]; 44 | printf("[%i] closest point to (%f %f %f) is point #%i, at (%f %f %f)\n", 45 | tid, 46 | queryPoint.x, 47 | queryPoint.y, 48 | queryPoint.z, 49 | closestID, 50 | closestPoint.x, 51 | closestPoint.y, 52 | closestPoint.z); 53 | } 54 | 55 | 56 | int main(int, char **) 57 | { 58 | int numDataPoints = 10000; 59 | int numQueryPoints = 20; 60 | /*! generate 10,000 uniformly distributed data points */ 61 | std::vector dataPoints 62 | = cuBQL::samples::convert 63 | (cuBQL::samples::UniformPointGenerator<3>() 64 | .generate(numDataPoints,290374)); 65 | std::cout << "#cubql: generated " << dataPoints.size() 66 | << " data points" << std::endl; 67 | std::vector queryPoints 68 | = cuBQL::samples::convert 69 | (cuBQL::samples::UniformPointGenerator<3>() 70 | .generate(numQueryPoints,/*seed*/1234567)); 71 | std::cout << "#cubql: generated " << queryPoints.size() 72 | << " query points" << std::endl; 73 | 74 | vec3f *d_queryPoints = 0; 75 | vec3f *d_dataPoints = 0; 76 | box3f *d_primBounds = 0; 77 | CUBQL_CUDA_CALL(Malloc((void **)&d_queryPoints,queryPoints.size()*sizeof(vec3f))); 78 | CUBQL_CUDA_CALL(Memcpy(d_queryPoints,queryPoints.data(), 79 | queryPoints.size()*sizeof(queryPoints[0]), 80 | cudaMemcpyDefault)); 81 | CUBQL_CUDA_CALL(Malloc((void **)&d_dataPoints,dataPoints.size()*sizeof(vec3f))); 82 | CUBQL_CUDA_CALL(Memcpy(d_dataPoints,dataPoints.data(), 83 | dataPoints.size()*sizeof(dataPoints[0]), 84 | cudaMemcpyDefault)); 85 | CUBQL_CUDA_CALL(Malloc((void **)&d_primBounds,dataPoints.size()*sizeof(box3f))); 86 | computeBoxes<<>> 87 | (d_primBounds,d_dataPoints,numDataPoints); 88 | 89 | // generate cuBQL bvh 90 | bvh3f bvh; 91 | cuBQL::gpuBuilder(bvh,d_primBounds,numDataPoints,BuildConfig()); 92 | runQueries<<>> 93 | (bvh,d_dataPoints,d_queryPoints,numQueryPoints); 94 | 95 | CUBQL_CUDA_SYNC_CHECK(); 96 | return 0; 97 | } 98 | 99 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/anyWithinRadius.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/bvh.h" 7 | // the kind of model data we operate on 8 | #include "cuBQL/queries/triangleData/closestPointOnAnyTriangle.h" 9 | // the kind of traversal we need for this query 10 | #include "cuBQL/traversal/fixedRadiusQuery.h" 11 | 12 | namespace cuBQL { 13 | /*! \namespace triangles for any queries operating on triangle model data */ 14 | namespace triangles { 15 | 16 | // ============================================================================= 17 | // *** INTERFACE *** 18 | // ============================================================================= 19 | 20 | /*! returns number of triangles within a given (non-squared) radius 21 | r around a point P. Will only look for up to 22 | 'maxNumToLookFor'; as soon as that many are found this will 23 | return this value. 24 | 25 | getTriangle is lambda getTriangle(uint32_t triID)->Triangle 26 | */ 27 | template 28 | inline __cubql_both 29 | int numWithinRadius(bvh3f bvh, 30 | GetTriangleLambda getTriangle, 31 | vec3f queryBallCenter, 32 | float queryBallRadius, 33 | int maxNumToLookFor=INT_MAX, 34 | bool dbg = false); 35 | 36 | /*! checks if there are _any_ triangles within a given 37 | (non-squared) radius r of a point P */ 38 | template 39 | inline __cubql_both 40 | bool anyWithinRadius(bvh3f bvh, 41 | GetTriangleLambda getTriangle, 42 | vec3f queryBallCenter, 43 | float queryBallRadius, 44 | bool dbg = false); 45 | 46 | // ============================================================================= 47 | // *** IMPLEMENTATION *** 48 | // ============================================================================= 49 | 50 | 51 | template 52 | inline __cubql_both 53 | int numWithinRadius(bvh3f bvh, 54 | GetTriangleLambda getTriangle, 55 | vec3f queryBallCenter, 56 | float queryBallRadius, 57 | int maxNumToLookFor, 58 | bool dbg) 59 | { 60 | int numFound = 0; 61 | auto perPrim 62 | = [&numFound,maxNumToLookFor,getTriangle,queryBallCenter,queryBallRadius,dbg] 63 | (uint32_t triID) 64 | { 65 | auto sqrDist = computeClosestPoint(queryBallCenter,getTriangle(triID),dbg).sqrDist; 66 | if (sqrDist != CUBQL_INF && sqrDist <= squareOf(queryBallRadius)) 67 | ++numFound; 68 | return numFound >= maxNumToLookFor 69 | ? CUBQL_TERMINATE_TRAVERSAL 70 | : CUBQL_CONTINUE_TRAVERSAL; 71 | }; 72 | fixedRadiusQuery::forEachPrim(perPrim,bvh, 73 | queryBallCenter, 74 | /* traversal templates uses SQUARE of radius!! */ 75 | squareOf(queryBallRadius), 76 | dbg); 77 | return numFound; 78 | } 79 | 80 | template 81 | inline __cubql_both 82 | bool anyWithinRadius(bvh3f bvh, 83 | GetTriangleLambda getTriangle, 84 | vec3f queryBallCenter, 85 | float queryBallRadius, 86 | bool dbg) 87 | { return numWithinRadius(bvh,getTriangle, 88 | queryBallCenter, 89 | queryBallRadius, 90 | /* max to look for for early exit */1, 91 | dbg 92 | ) > 0; } 93 | 94 | 95 | } // ::cuBQL::triangles 96 | } // ::cuBQL 97 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## nvCUBQL OSS Contribution Rules 2 | 3 | #### Issue Tracking 4 | 5 | * All enhancement, bugfix, or change requests must begin with the creation of a [nvCUBQL Issue Request](https://github.com/nvidia/nvCUBQL/issues). 6 | * The issue request must be reviewed by nvCUBQL engineers and approved prior to code review. 7 | 8 | 9 | #### Pull Requests 10 | Developer workflow for code contributions is as follows: 11 | 12 | 1. Developers must first [fork](https://help.github.com/en/articles/fork-a-repo) the [upstream](https://github.com/nvidia/nvCUBQL) nvCUBQL OSS repository. 13 | 14 | 2. Git clone the forked repository and push changes to the personal fork. 15 | 16 | ```bash 17 | git clone https://github.com/YOUR_USERNAME/YOUR_FORK.git nvCUBQL 18 | # Checkout the targeted branch and commit changes 19 | # Push the commits to a branch on the fork (remote). 20 | git push -u origin : 21 | ``` 22 | 23 | 3. Once the code changes are staged on the fork and ready for review, a [Pull Request](https://help.github.com/en/articles/about-pull-requests) (PR) can be [requested](https://help.github.com/en/articles/creating-a-pull-request) to merge the changes from a branch of the fork into a selected branch of upstream. 24 | * Exercise caution when selecting the source and target branches for the PR. 25 | Note that versioned releases of nvCUBQL OSS are posted to `release/` branches of the upstream repo. 26 | * Creation of a PR creation kicks off the code review process. 27 | * Atleast one nvCUBQL engineer will be assigned for the review. 28 | * While under review, mark your PRs as work-in-progress by prefixing the PR title with [WIP]. 29 | 30 | 4. Since there is no CI/CD process in place yet, the PR will be accepted and the corresponding issue closed only after adequate testing has been completed, manually, by the developer and/or nvCUBQL engineer reviewing the code. 31 | 32 | 33 | #### Signing Your Work 34 | 35 | * We require that all contributors "sign-off" on their commits. This certifies that the contribution is your original work, or you have rights to submit it under the same license, or a compatible license. 36 | 37 | * Any contribution which contains commits that are not Signed-Off will not be accepted. 38 | 39 | * To sign off on a commit you simply use the `--signoff` (or `-s`) option when committing your changes: 40 | ```bash 41 | $ git commit -s -m "Add cool feature." 42 | ``` 43 | This will append the following to your commit message: 44 | ``` 45 | Signed-off-by: Your Name 46 | ``` 47 | 48 | * Full text of the DCO: 49 | 50 | ``` 51 | Developer Certificate of Origin 52 | Version 1.1 53 | 54 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 55 | 1 Letterman Drive 56 | Suite D4700 57 | San Francisco, CA, 94129 58 | 59 | Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. 60 | ``` 61 | 62 | ``` 63 | Developer's Certificate of Origin 1.1 64 | 65 | By making a contribution to this project, I certify that: 66 | 67 | (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or 68 | 69 | (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or 70 | 71 | (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. 72 | 73 | (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. 74 | ``` 75 | -------------------------------------------------------------------------------- /samples/s01_closestPoint_points_gpu/closestPoint_WideBVH.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file closestPointGPU.cu Implements a small demo-app that 5 | generates a set of data points, another set of query points, and 6 | then uses cuBQL to perform closest-point qeuries (ie, it finds, 7 | for each query point, the respectively closest data point */ 8 | 9 | // cuBQL itself, and the BVH type(s) it defines 10 | #include "cuBQL/bvh.h" 11 | #include "cuBQL/builder/cuda.h" 12 | // some specialized query kernels for find-closest, on 'points' data 13 | #include "cuBQL/queries/pointData/findClosest.h" 14 | // helper class to generate various data distributions 15 | #include "samples/common/Generator.h" 16 | 17 | using namespace cuBQL; 18 | 19 | enum { BVH_WIDTH = 8 }; 20 | 21 | typedef cuBQL::WideBVH wide_bvh3f; 22 | 23 | __global__ 24 | void computeBoxes(box3f *d_boxes, const vec3f *d_data, int numData) 25 | { 26 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 27 | if (tid >= numData) return; 28 | 29 | d_boxes[tid] = box3f().including(d_data[tid]); 30 | } 31 | 32 | __global__ 33 | void runQueries(wide_bvh3f bvh, 34 | const vec3f *d_data, 35 | const vec3f *d_queries, 36 | int numQueries) 37 | { 38 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 39 | if (tid >= numQueries) return; 40 | 41 | vec3f queryPoint = d_queries[tid]; 42 | int closestID = cuBQL::points::findClosest(/* the cubql bvh we've built */ 43 | bvh, 44 | /* data that this bvh was built over*/ 45 | d_data, 46 | queryPoint); 47 | vec3f closestPoint = d_data[closestID]; 48 | printf("[%i] closest point to (%f %f %f) is point #%i, at (%f %f %f)\n", 49 | tid, 50 | queryPoint.x, 51 | queryPoint.y, 52 | queryPoint.z, 53 | closestID, 54 | closestPoint.x, 55 | closestPoint.y, 56 | closestPoint.z); 57 | } 58 | 59 | 60 | int main(int, char **) 61 | { 62 | int numDataPoints = 10000; 63 | int numQueryPoints = 20; 64 | /*! generate 10,000 uniformly distributed data points */ 65 | std::vector dataPoints 66 | = cuBQL::samples::convert 67 | (cuBQL::samples::UniformPointGenerator<3>() 68 | .generate(numDataPoints,290374)); 69 | std::cout << "#cubql: generated " << dataPoints.size() 70 | << " data points" << std::endl; 71 | std::vector queryPoints 72 | = cuBQL::samples::convert 73 | (cuBQL::samples::UniformPointGenerator<3>() 74 | .generate(numQueryPoints,/*seed*/1234567)); 75 | std::cout << "#cubql: generated " << queryPoints.size() 76 | << " query points" << std::endl; 77 | 78 | vec3f *d_queryPoints = 0; 79 | vec3f *d_dataPoints = 0; 80 | box3f *d_primBounds = 0; 81 | CUBQL_CUDA_CALL(Malloc((void **)&d_queryPoints,queryPoints.size()*sizeof(vec3f))); 82 | CUBQL_CUDA_CALL(Memcpy(d_queryPoints,queryPoints.data(), 83 | queryPoints.size()*sizeof(queryPoints[0]), 84 | cudaMemcpyDefault)); 85 | CUBQL_CUDA_CALL(Malloc((void **)&d_dataPoints,dataPoints.size()*sizeof(vec3f))); 86 | CUBQL_CUDA_CALL(Memcpy(d_dataPoints,dataPoints.data(), 87 | dataPoints.size()*sizeof(dataPoints[0]), 88 | cudaMemcpyDefault)); 89 | CUBQL_CUDA_CALL(Malloc((void **)&d_primBounds,dataPoints.size()*sizeof(box3f))); 90 | computeBoxes<<>> 91 | (d_primBounds,d_dataPoints,numDataPoints); 92 | 93 | // generate cuBQL bvh 94 | wide_bvh3f bvh; 95 | cuBQL::gpuBuilder(bvh,d_primBounds,numDataPoints,BuildConfig()); 96 | runQueries<<>> 97 | (bvh,d_dataPoints,d_queryPoints,numQueryPoints); 98 | 99 | CUBQL_CUDA_SYNC_CHECK(); 100 | return 0; 101 | } 102 | 103 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/closestPointOnAnyTriangle.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/bvh.h" 7 | // the kind of model data we operate on 8 | #include "cuBQL/queries/triangleData/math/pointToTriangleDistance.h" 9 | // the kind of traversal we need for this query 10 | #include "cuBQL/traversal/shrinkingRadiusQuery.h" 11 | 12 | namespace cuBQL { 13 | /*! \namespace triangles for any queries operating on triangle model data */ 14 | namespace triangles { 15 | 16 | /*! result of a cpat query (cpat = 17 | closest-point-on-any-triangle); if no result was found within 18 | the specified max seach distance, triangleIex will be returned 19 | as -1 */ 20 | struct CPAT : public PointToTriangleTestResult 21 | { 22 | /* INHERITED: float sqrDist = INFINITY; */ 23 | /* INHERITED: vec3f P; */ 24 | 25 | /*! index of triangle that had closest hit; -1 means 'none found 26 | that was closer than cut-off distance */ 27 | int triangleIdx = -1; 28 | 29 | /*! performs one complete query, starting with an empty CPAT 30 | result, traversing the BVH for the givne mesh, and processing 31 | every triangle that needs consideration. Only intersections 32 | that are < maxQueryRadius will get accepted */ 33 | inline __cubql_both 34 | void runQuery(const cuBQL::vec3f *mesh_vertices, 35 | const cuBQL::vec3i *mesh_indices, 36 | const cuBQL::bvh3f bvh, 37 | const cuBQL::vec3f queryPoint, 38 | float maxQueryRadius = CUBQL_INF); 39 | 40 | /*! performs one complete query, starting with an empty CPAT 41 | result, traversing the BVH for the givne mesh, and processing 42 | every triangle that needs consideration. Only intersections 43 | that are < maxQueryRadius will get accepted */ 44 | inline __cubql_both 45 | void runQuery(const cuBQL::Triangle *triangles, 46 | const cuBQL::bvh3f bvh, 47 | const cuBQL::vec3f queryPoint, 48 | float maxQueryRadius = CUBQL_INF); 49 | 50 | }; 51 | 52 | 53 | // ============================================================================= 54 | // *** IMPLEMENTATION *** 55 | // ============================================================================= 56 | 57 | /*! performs one complete query, starting with an empty CPAT 58 | result, traversing the BVH for the givne mesh, and processing 59 | every triangle that needs consideration. Only intersections 60 | that are < maxQueryRadius will get accepted */ 61 | inline __cubql_both 62 | void CPAT::runQuery(const cuBQL::Triangle *triangles, 63 | const cuBQL::bvh3f bvh, 64 | const cuBQL::vec3f queryPoint, 65 | float maxQueryRadius) 66 | { 67 | triangleIdx = -1; 68 | sqrDist = maxQueryRadius*maxQueryRadius; 69 | auto perPrimitiveCode 70 | = [bvh,triangles,queryPoint,this] 71 | (uint32_t triangleIdx)->float 72 | { 73 | const Triangle triangle = triangles[triangleIdx]; 74 | if (cuBQL::triangles::computeClosestPoint(*this,triangle,queryPoint)) 75 | this->triangleIdx = triangleIdx; 76 | /*! the (possibly new?) max cut-off radius (squared, as 77 | traversals operate on square distances!) */ 78 | return this->sqrDist; 79 | }; 80 | // careful: traversals operate on the SQUARE radii 81 | const float maxQueryRadiusDistance 82 | = maxQueryRadius * maxQueryRadius; 83 | cuBQL::shrinkingRadiusQuery::forEachPrim 84 | (/* what we want to execute for each candidate: */perPrimitiveCode, 85 | /* what we're querying into*/bvh, 86 | /* where we're querying */queryPoint, 87 | /* initial maximum search radius */maxQueryRadiusDistance 88 | ); 89 | } 90 | 91 | } // ::cuBQL::triangles 92 | } // ::cuBQL 93 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/Triangle.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file cuBQL/triangles/Triangle.h Defines a generic triangle type and 5 | some operations thereon, that various queries can then build on */ 6 | 7 | #pragma once 8 | 9 | #include "cuBQL/math/vec.h" 10 | #include "cuBQL/math/box.h" 11 | 12 | namespace cuBQL { 13 | 14 | // ========================================================================= 15 | // *** INTERFACE *** 16 | // ========================================================================= 17 | 18 | // /*! a simple triangle consisting of three vertices. In order to not 19 | // overload this class with too many functions the actual 20 | // operations on triangles - such as intersectin with a ray, 21 | // computing distance to a point, etc - will be defined in the 22 | // respective queries */ 23 | // struct Triangle { 24 | // /*! returns an axis aligned bounding box enclosing this triangle */ 25 | // inline __cubql_both box3f bounds() const; 26 | // inline __cubql_both vec3f sample(float u, float v) const; 27 | // inline __cubql_both vec3f normal() const; 28 | 29 | // vec3f a, b, c; 30 | // }; 31 | 32 | template 33 | struct triangle_t 34 | { 35 | using vec3 = vec_t; 36 | using box3 = box_t; 37 | 38 | inline __cubql_both box3 bounds() const; 39 | inline __cubql_both vec3 sample(float u, float v) const; 40 | inline __cubql_both vec3 normal() const; 41 | 42 | vec3 a; 43 | vec3 b; 44 | vec3 c; 45 | }; 46 | 47 | using Triangle = triangle_t; 48 | 49 | /*! a typical triangle mesh, with array of vertices and 50 | indices. This class will NOT do any allocation/deallocation, not 51 | use smart pointers - it's just a 'view' on what whoever else 52 | might own and manage, and may thus be used exactly the same on 53 | device as well as on host. */ 54 | struct TriangleMesh { 55 | inline __cubql_both Triangle getTriangle(int i) const; 56 | 57 | /*! pointer to array of vertices; must be in same memory space as 58 | the operations performed on it (eg, if passed to a gpu builder 59 | it has to be gepu memory */ 60 | vec3f *vertices; 61 | 62 | /*! pointer to array of vertices; must be in same memory space as 63 | the operations performed on it (eg, if passed to a gpu builder 64 | it has to be gepu memory */ 65 | vec3i *indices; 66 | 67 | int numVertices; 68 | int numIndices; 69 | }; 70 | 71 | 72 | // ======================================================================== 73 | // *** IMPLEMENTATION *** 74 | // ======================================================================== 75 | 76 | // ---------------------- TriangleMesh ---------------------- 77 | inline __cubql_both Triangle TriangleMesh::getTriangle(int i) const 78 | { 79 | vec3i index = indices[i]; 80 | return { vertices[index.x],vertices[index.y],vertices[index.z] }; 81 | } 82 | 83 | // ---------------------- Triangle ---------------------- 84 | template 85 | inline __cubql_both vec_t triangle_t::normal() const 86 | { return cross(b-a,c-a); } 87 | 88 | template 89 | inline __cubql_both box_t triangle_t::bounds() const 90 | { return box_t().including(a).including(b).including(c); } 91 | 92 | template 93 | inline __cubql_both float area(triangle_t tri) 94 | { return length(cross(tri.b-tri.a,tri.c-tri.a)); } 95 | 96 | template 97 | inline __cubql_both vec_t 98 | triangle_t::sample(float u, float v) const 99 | { 100 | if (u+v >= 1.f) { u = 1.f-u; v = 1.f-v; } 101 | return (1.f-u-v)*a + u * b + v * c; 102 | } 103 | // inline __cubql_both vec3f Triangle::sample(float u, float v) const 104 | // { 105 | // if (u+v >= 1.f) { u = 1.f-u; v = 1.f-v; } 106 | // return (1.f-u-v)*a + u * b + v * c; 107 | // } 108 | 109 | template 110 | inline __cubql_both 111 | dbgout operator<<(dbgout o, const triangle_t &triangle) 112 | { 113 | o << "{" << triangle.a << "," << triangle.b << "," << triangle.c << "}"; 114 | return o; 115 | } 116 | 117 | 118 | } // ::cuBQL 119 | 120 | -------------------------------------------------------------------------------- /samples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # common stuff - data generators and IO for loading/generating test data 5 | add_subdirectory(common) 6 | if (NOT (TARGET stb_image)) 7 | add_subdirectory(3rdParty/stb_image) 8 | endif() 9 | 10 | 11 | # ================================================================== 12 | # sample that performs gpu-side closest-point queries between sets of 13 | # float3 points 14 | # ================================================================== 15 | add_subdirectory(s01_closestPoint_points_cpu) 16 | add_subdirectory(s01_closestPoint_points_gpu) 17 | 18 | # ================================================================== 19 | # sample that performs gpu-side closest-point queries between sets of 20 | # float3 points 21 | # ================================================================== 22 | if (CUBQL_HAVE_CUDA) 23 | add_executable(sample02_distanceToTriangleMesh 24 | s02_distanceToTriangleMesh/distanceToTriangleMesh.cu 25 | ) 26 | target_link_libraries(sample02_distanceToTriangleMesh 27 | # the cuda-side builders for float3 data 28 | cuBQL_cuda_float3 29 | # common samples stuff 30 | cuBQL_samples_common 31 | ) 32 | endif() 33 | 34 | 35 | # ================================================================== 36 | # sample that performs gpu-side inside/outside test for a given input 37 | # mesh and a generated set of query points. Input mesh is supposed to 38 | # be a 'proper' surface mesh without holes and proper normal 39 | # orientation (normal points to 'outside'); generated poitns lie on a 40 | # NxNxN grid. QUeries are performed by tracing axis-aligned rays and 41 | # computing cuqbl::crossingCount queries. 42 | # ================================================================== 43 | if (CUBQL_HAVE_CUDA) 44 | add_executable(sample03_insideOutside 45 | s03_insideOutsideOfClosedMesh/insideOutside.cu 46 | ) 47 | target_link_libraries(sample03_insideOutside 48 | # the cuda-side builders for float3 data 49 | cuBQL_cuda_float3 50 | # common samples stuff 51 | cuBQL_samples_common 52 | ) 53 | endif() 54 | 55 | 56 | # ================================================================== 57 | # sample that computes, for every cell of a 3D cartesian grid put 58 | # around a surface mesh, whether the box representing that cell is 59 | # full inside the surface (0), fully outside (1), or whether it 60 | # intersects with one of the triangles of the surface mesh 61 | # ================================================================== 62 | if (CUBQL_HAVE_CUDA) 63 | add_executable(sample04_boxOverlapsOrInsideSurfaceMesh 64 | s04_boxOverlapsOrInsideSurfaceMesh/boxOverlapsOrInsideSurfaceMesh.cu 65 | ) 66 | target_link_libraries(sample04_boxOverlapsOrInsideSurfaceMesh 67 | # the cuda-side builders for float3 data 68 | cuBQL_cuda_float3 69 | # common samples stuff 70 | cuBQL_samples_common 71 | ) 72 | endif() 73 | 74 | 75 | 76 | # ================================================================== 77 | # sample that traces a grid of NxM rays diagonally through the 78 | # bounding box of a loaded triangle mesh, and computes - 'xray-style' 79 | # whether the ray did pass through unoccluded (white) or got occluded 80 | # (black), then saves that as an image 81 | # ================================================================== 82 | if (CUBQL_HAVE_CUDA) 83 | add_executable(sample05_lineOfSight 84 | s05_lineOfSight/lineOfSight.cu 85 | ) 86 | target_link_libraries(sample05_lineOfSight 87 | # the cuda-side builders for float3 data 88 | cuBQL_cuda_float3 89 | # common samples stuff 90 | cuBQL_samples_common 91 | stb_image 92 | ) 93 | endif() 94 | 95 | 96 | # ================================================================== 97 | # sample that computes a volume of NxMxK floats that each store '1' if 98 | # point is within a given distance of the loaded surface mesh, and '0' 99 | # if not. Chosen distance for this sample is 1% of scene diameter; 100 | # this uses the triangles::anyWithinRadius() query. 101 | # ================================================================== 102 | if (CUBQL_HAVE_CUDA) 103 | add_executable(sample06_anyTriangleWithinRadius 104 | s06_anyTriangleWithinRadius/anyTriangleWithinRadius.cu 105 | ) 106 | target_link_libraries(sample06_anyTriangleWithinRadius 107 | # the cuda-side builders for float3 data 108 | cuBQL_cuda_float3 109 | # common samples stuff 110 | cuBQL_samples_common 111 | stb_image 112 | ) 113 | endif() 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/trianglesInBox.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file queries/triangles/trianglesInBox.h implements a kernel that 5 | allows for checking whether a given query box intersects any 6 | triangle of a given model */ 7 | 8 | #pragma once 9 | 10 | #include "cuBQL/traversal/rayQueries.h" 11 | // the kind of model data we operate on 12 | #include "cuBQL/queries/triangleData/Triangle.h" 13 | #include "cuBQL/queries/triangleData/math/boxTriangleIntersections.h" 14 | 15 | /*! \namespace cuBQL - *cu*BQL based geometric *q*ueries */ 16 | namespace cuBQL { 17 | namespace triangles { 18 | 19 | // ============================================================================= 20 | // *** INTERFACE *** 21 | // ============================================================================= 22 | 23 | /*! given a bvh built over a triangle mesh ('bvh'), and a lambda 24 | that let's us retrieve a triangle by its triangle index 25 | ('lambda'), this kernel counts how many of the underlying 26 | mesh's triangles are actually intersecting the query box (ie, 27 | not just overlapping that box with their bounding box, but 28 | where some actual point or portion of the triangle lies within 29 | the actual box). 30 | 31 | 'maxIntersectionsToLookFor' allows for an early exit; once the 32 | number of triangles found reaches that value we stop any further 33 | queries and return this value. In particular, this allows for 34 | checking if *any* triangle intersects this box by calling this 35 | kernel with maxIntersectionsToLookFor==1 36 | 37 | getTriangle is lambda getTriangle(uint32_t triID)->Triangle 38 | */ 39 | template 40 | inline __cubql_both 41 | int countTrianglesIntersectingQueryBox(const bvh3f bvh, 42 | const GetTrianglesLambda getTriangle, 43 | const box3f queryBox, 44 | int maxIntersectionsToLookFor=INT_MAX); 45 | 46 | /*! similar to \see countTrianglesIntersectingQueryBox, but 47 | doesn't perform actual trianle-box tests, and instead only 48 | uses cheaper (and conservative) test against the triangles' 49 | bounding boxes 50 | 51 | getTriangle is lambda getTriangle(uint32_t triID)->Triangle 52 | */ 53 | template 54 | inline __cubql_both 55 | int countTrianglesWhoseBoundsOverlapQueryBox(const bvh3f bvh, 56 | const GetTrianglesLambda getTriangle, 57 | const box3f queryBox, 58 | int maxIntersectionsToLookFor=INT_MAX); 59 | 60 | 61 | // ============================================================================= 62 | // *** IMPLEMENTATION *** 63 | // ============================================================================= 64 | 65 | template 66 | inline __cubql_both 67 | int countTrianglesIntersectingQueryBox(const bvh3f bvh, 68 | const GetTrianglesLambda getTriangle, 69 | const box3f queryBox, 70 | int maxIntersectionsToLookFor) 71 | { 72 | int count = 0; 73 | auto perTriangle 74 | = [&count,getTriangle,queryBox,maxIntersectionsToLookFor](uint32_t primID) 75 | { 76 | if (triangles::triangleIntersectsBox(getTriangle(primID),queryBox)) 77 | ++count; 78 | return (count >= maxIntersectionsToLookFor) 79 | ? CUBQL_TERMINATE_TRAVERSAL 80 | : CUBQL_CONTINUE_TRAVERSAL; 81 | }; 82 | fixedBoxQuery::forEachPrim(perTriangle,bvh,queryBox,/*dbg*/false); 83 | return count; 84 | } 85 | 86 | template 87 | inline __cubql_both 88 | int countTrianglesWhoseBoundsOverlapQueryBox(const bvh3f bvh, 89 | const GetTrianglesLambda getTriangle, 90 | const box3f queryBox, 91 | int maxIntersectionsToLookFor) 92 | { 93 | int count = 0; 94 | auto perTriangle 95 | = [&count,getTriangle,queryBox,maxIntersectionsToLookFor](uint32_t primID)->int 96 | { 97 | if (getTriangle(primID).bounds().overlaps(queryBox)) 98 | ++count; 99 | return (count >= maxIntersectionsToLookFor) 100 | ? CUBQL_TERMINATE_TRAVERSAL 101 | : CUBQL_CONTINUE_TRAVERSAL; 102 | }; 103 | fixedBoxQuery::forEachPrim(perTriangle,bvh,queryBox,/*dbg*/false); 104 | return count; 105 | } 106 | 107 | } // ::cuBQL::triangles 108 | } // ::cuBQL 109 | -------------------------------------------------------------------------------- /cuBQL/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | # a interface-only library that only sets include paths etc; when 5 | # using this the user has to manulaly set CUBQL_BUILDER_INSTANTIATION 6 | # in one of his/her source files 7 | add_library(cuBQL INTERFACE) 8 | 9 | target_sources(cuBQL INTERFACE 10 | # main public "interface" to this library 11 | bvh.h 12 | # general math struff to make public stuff work 13 | math/common.h 14 | math/math.h 15 | math/vec.h 16 | math/box.h 17 | # general (lambda-templated) traversal routines 18 | traversal/shrinkingRadiusQuery.h 19 | traversal/fixedBoxQuery.h 20 | # host side builder(s) 21 | builder/cpu.h 22 | builder/cpu/spatialMedian.h 23 | # cuda side builder(s) 24 | builder/cuda.h 25 | builder/cuda/builder_common.h 26 | builder/cuda/sm_builder.h 27 | builder/cuda/sah_builder.h 28 | builder/cuda/gpu_builder.h 29 | builder/cuda/radix.h 30 | builder/cuda/rebinMortonBuilder.h 31 | builder/cuda/wide_gpu_builder.h 32 | ) 33 | target_include_directories(cuBQL INTERFACE 34 | ${PROJECT_SOURCE_DIR} 35 | ) 36 | set_target_properties(cuBQL 37 | PROPERTIES 38 | CXX_VISIBILITY_PRESET default 39 | CUDA_VISIBILITY_PRESET default 40 | POSITION_INDEPENDENT_CODE ON 41 | CUDA_SEPARABLE_COMPILATION ON 42 | CUDA_RESOLVE_DEVICE_SYMBOLS ON 43 | CUDA_USE_STATIC_CUDA_RUNTIME ON 44 | ) 45 | 46 | # helper for creating type- and device-specific implementations of 47 | # cubql; i.e., one for host_float4, one for cuda_int3, etc. Since 48 | # everything other than the builders are entirely header only these 49 | # type-specific targets will, in fact, only contain instantiations of 50 | # the specific builders for the given type and device 51 | function(add_specific_instantiation device suffix T D) 52 | add_library(cuBQL_${device}_${T}${D} SHARED EXCLUDE_FROM_ALL 53 | builder/${device}/instantiate_builders.${suffix} 54 | ) 55 | if (${device} STREQUAL "cuda") 56 | target_compile_definitions(cuBQL_${device}_${T}${D} 57 | PUBLIC 58 | -DCUBQL_HAVE_CUDA=1 59 | ) 60 | endif() 61 | set_target_properties(cuBQL_${device}_${T}${D} 62 | PROPERTIES 63 | POSITION_INDEPENDENT_CODE ON 64 | WINDOWS_EXPORT_ALL_SYMBOLS TRUE 65 | FOLDER "CuBQL" 66 | ) 67 | target_link_libraries(cuBQL_${device}_${T}${D} 68 | PUBLIC 69 | cuBQL 70 | ) 71 | target_compile_definitions(cuBQL_${device}_${T}${D} 72 | PRIVATE 73 | -DCUBQL_INSTANTIATE_T=${T} 74 | -DCUBQL_INSTANTIATE_D=${D} 75 | ) 76 | 77 | add_library(cuBQL_${device}_${T}${D}_static STATIC EXCLUDE_FROM_ALL 78 | builder/${device}/instantiate_builders.${suffix} 79 | ) 80 | if (${device} STREQUAL "cuda") 81 | target_compile_definitions(cuBQL_${device}_${T}${D}_static 82 | PUBLIC 83 | -DCUBQL_HAVE_CUDA=1 84 | ) 85 | endif() 86 | set_target_properties(cuBQL_${device}_${T}${D}_static 87 | PROPERTIES 88 | POSITION_INDEPENDENT_CODE ON 89 | FOLDER "CuBQL" 90 | ) 91 | target_link_libraries(cuBQL_${device}_${T}${D}_static 92 | PUBLIC 93 | cuBQL 94 | ) 95 | set_target_properties(cuBQL_${device}_${T}${D}_static 96 | PROPERTIES 97 | CXX_VISIBILITY_PRESET default 98 | CUDA_VISIBILITY_PRESET default 99 | POSITION_INDEPENDENT_CODE ON 100 | CUDA_SEPARABLE_COMPILATION ON 101 | CUDA_RESOLVE_DEVICE_SYMBOLS ON 102 | CUDA_USE_STATIC_CUDA_RUNTIME ON 103 | ) 104 | target_compile_definitions(cuBQL_${device}_${T}${D}_static 105 | PRIVATE 106 | -DCUBQL_INSTANTIATE_T=${T} 107 | -DCUBQL_INSTANTIATE_D=${D} 108 | ) 109 | endfunction() 110 | 111 | # ------------------------------------------------------------------ 112 | # generate all type/dim specific targets, ie, one target for each 113 | # {int,float,double,long}x{2,3,4}. Each such target contains the 114 | # pre-compiled builder(s) for that specific type/dimension, for the 115 | # case where the user does NOT want to use the header-only 116 | # mechanism. To avoid "polluting" the user's project with lots of 117 | # different targets that he or she may or may not need we add all 118 | # those as 'EXCLUDE_FROM_ALL', so only those that get actually used 119 | # will actually get built 120 | # ------------------------------------------------------------------ 121 | foreach(T IN ITEMS float int double longlong) 122 | foreach(D IN ITEMS 2 3 4) 123 | add_specific_instantiation(cpu cpp ${T} ${D}) 124 | if (CUBQL_HAVE_CUDA) 125 | add_specific_instantiation(cuda cu ${T} ${D}) 126 | endif() 127 | endforeach() 128 | endforeach() 129 | 130 | # the collection of all different type-specific queries currently 131 | # supplied by cubql. all these should be header-only and should all 132 | # automatically on both cpu and cuda, so this is a INTERFACE library 133 | add_library(cuBQL_queries INTERFACE 134 | # 135 | queries/pointData/knn.h 136 | # 137 | queries/pointData/findClosest.h 138 | queries/pointData/knn.h 139 | ) 140 | 141 | set_target_properties(cuBQL_queries PROPERTIES FOLDER "CuBQL") 142 | 143 | target_link_libraries(cuBQL_queries 144 | INTERFACE 145 | cuBQL 146 | ) 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /cuBQL/builder/cuda/instantiate_builders.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! instantiates the GPU builder(s) */ 5 | #define CUBQL_GPU_BUILDER_IMPLEMENTATION 1 6 | #include "cuBQL/bvh.h" 7 | #include "cuBQL/builder/cuda.h" 8 | #include "cuBQL/builder/cuda/radix.h" 9 | #include "cuBQL/builder/cuda/rebinMortonBuilder.h" 10 | 11 | 12 | #define CUBQL_INSTANTIATE_BINARY_BVH(T,D) \ 13 | namespace cuBQL { \ 14 | namespace radixBuilder_impl { \ 15 | template \ 16 | void build(BinaryBVH &bvh, \ 17 | const typename BuildState::box_t *boxes, \ 18 | uint32_t numPrims, \ 19 | BuildConfig buildConfig, \ 20 | cudaStream_t s, \ 21 | GpuMemoryResource &memResource); \ 22 | } \ 23 | template void gpuBuilder(BinaryBVH &bvh, \ 24 | const box_t *boxes, \ 25 | uint32_t numBoxes, \ 26 | BuildConfig buildConfig, \ 27 | cudaStream_t s, \ 28 | GpuMemoryResource &mem_resource); \ 29 | namespace cuda { \ 30 | template \ 31 | void radixBuilder(BinaryBVH &bvh, \ 32 | const box_t *boxes, \ 33 | uint32_t numBoxes, \ 34 | BuildConfig buildConfig, \ 35 | cudaStream_t s, \ 36 | GpuMemoryResource &mem_resource); \ 37 | template \ 38 | void rebinRadixBuilder(BinaryBVH &bvh, \ 39 | const box_t *boxes, \ 40 | uint32_t numBoxes, \ 41 | BuildConfig buildConfig, \ 42 | cudaStream_t s, \ 43 | GpuMemoryResource &mem_resource); \ 44 | template \ 45 | void sahBuilder(BinaryBVH &bvh, \ 46 | const box_t *boxes, \ 47 | uint32_t numBoxes, \ 48 | BuildConfig buildConfig, \ 49 | cudaStream_t s, \ 50 | GpuMemoryResource &mem_resource); \ 51 | template \ 52 | void free(BinaryBVH &bvh, \ 53 | cudaStream_t s, \ 54 | GpuMemoryResource &mem_resource); \ 55 | } \ 56 | } \ 57 | 58 | #define CUBQL_INSTANTIATE_WIDE_BVH(T,D,N) \ 59 | namespace cuBQL { \ 60 | template void gpuBuilder(WideBVH &bvh, \ 61 | const box_t *boxes, \ 62 | uint32_t numBoxes, \ 63 | BuildConfig buildConfig, \ 64 | cudaStream_t s, \ 65 | GpuMemoryResource &mem_resource); \ 66 | template void free(WideBVH &bvh, \ 67 | cudaStream_t s, \ 68 | GpuMemoryResource& mem_resource); \ 69 | } 70 | 71 | 72 | // CUBQL_INSTANTIATE_BINARY_BVH(CUBQL_INSTANTIATE_T,CUBQL_INSTANTIATE_D) 73 | 74 | #ifdef CUBQL_INSTANTIATE_T 75 | // instantiate an explict type and dimension 76 | CUBQL_INSTANTIATE_BINARY_BVH(CUBQL_INSTANTIATE_T,CUBQL_INSTANTIATE_D) 77 | CUBQL_INSTANTIATE_WIDE_BVH(CUBQL_INSTANTIATE_T,CUBQL_INSTANTIATE_D,4) 78 | CUBQL_INSTANTIATE_WIDE_BVH(CUBQL_INSTANTIATE_T,CUBQL_INSTANTIATE_D,8) 79 | CUBQL_INSTANTIATE_WIDE_BVH(CUBQL_INSTANTIATE_T,CUBQL_INSTANTIATE_D,12) 80 | #else 81 | // default instantiation(s) for float3 only 82 | CUBQL_INSTANTIATE_BINARY_BVH(float,3) 83 | CUBQL_INSTANTIATE_WIDE_BVH(float,3,4) 84 | CUBQL_INSTANTIATE_WIDE_BVH(float,3,8) 85 | CUBQL_INSTANTIATE_WIDE_BVH(float,3,12) 86 | #endif 87 | 88 | 89 | -------------------------------------------------------------------------------- /cuBQL/queries/pointData/knn.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file cuBQL/queries/points/findClosest Provides kernels for 5 | finding closest point(s) on point data */ 6 | 7 | #pragma once 8 | 9 | #include "cuBQL/traversal/shrinkingRadiusQuery.h" 10 | #include "cuBQL/queries/common/knn.h" 11 | 12 | namespace cuBQL { 13 | namespace points { 14 | 15 | // ****************************************************************** 16 | // INTERFACE 17 | // (which functions this header file provides) 18 | // ****************************************************************** 19 | 20 | /*! given a bvh build over a set of float points, perform a 21 | closest-point query that returns the index of the input point 22 | closest to the query point (if one exists within the given max 23 | query radius), or -1 (if not). 24 | 25 | \returns Index of point in points[] array that is closest to 26 | query point, or -1 if no point exists within provided max query 27 | range 28 | 29 | \note If more than one point with similar closest distance 30 | exist, then this function will not make any guarantees as to 31 | which of them will be returned (though we can expect that 32 | succesuve such queries on the _same_ bvh will return the same 33 | result, different BVHs built even over the same input data may 34 | not) 35 | */ 36 | template 37 | inline __cubql_both 38 | cuBQL::knn::Result findKNN(/*! memory to return the list of found knn candidates in */ 39 | cuBQL::knn::Candidate *foundCandidates, 40 | /*! number of knn candidates we're 41 | looking for (ie, the "N" in 42 | N-neighbors) */ 43 | int maxN, 44 | /*! binary bvh built over the given points[] 45 | specfied below */ 46 | BinaryBVH bvhOverPoints, 47 | /*! data points that the bvh was built over */ 48 | const vec_t *points, 49 | /*! the query point for which we want to know the 50 | result */ 51 | vec_t queryPoint, 52 | /*! square of the maximum query distance in which 53 | this query is to look for candidates. note 54 | this is the SQUARE distance */ 55 | float squareOfMaxQueryDistance=INFINITY); 56 | 57 | 58 | // ****************************************************************** 59 | // IMPLEMENTATION 60 | // ****************************************************************** 61 | 62 | template 63 | inline __cubql_both 64 | cuBQL::knn::Result findKNN(/*! memory to return the list of found knn candidates in */ 65 | cuBQL::knn::Candidate *foundCandidates, 66 | /*! number of knn candidates we're 67 | looking for (ie, the "N" in 68 | N-neighbors) */ 69 | int maxN, 70 | /*! binary bvh built over the given points[] 71 | specfied below */ 72 | BinaryBVH bvhOverPoints, 73 | /*! data points that the bvh was built over */ 74 | const vec_t *points, 75 | /*! the query point for which we want to know the 76 | result */ 77 | vec_t queryPoint, 78 | /*! square of the maximum query distance in which 79 | this query is to look for candidates. note 80 | this is the SQUARE distance */ 81 | float squareOfMaxQueryDistance) 82 | { 83 | cuBQL::knn::Result result = { 0, squareOfMaxQueryDistance }; 84 | // callback that processes each candidate, and checks if its 85 | // closer than current best 86 | auto candidateLambda 87 | = [&result,foundCandidates,maxN,points,queryPoint] 88 | (int pointID)->float 89 | { 90 | // compute (square distance) 91 | float sqrDist = fSqrDistance_rd(points[pointID],queryPoint); 92 | cuBQL::knn::Candidate thisCandidate = { pointID, sqrDist }; 93 | cuBQL::knn::insert_linear(foundCandidates,maxN,thisCandidate,result); 94 | // return the maximum query distance from here on out: 95 | return result.sqrDistMax; 96 | }; 97 | 98 | cuBQL::shrinkingRadiusQuery::forEachPrim(candidateLambda, 99 | bvhOverPoints, 100 | queryPoint, 101 | squareOfMaxQueryDistance); 102 | return result; 103 | } 104 | 105 | } // ::cuBQL::points 106 | } // ::cuBQL 107 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | cmake_minimum_required(VERSION 3.16) 5 | 6 | cmake_policy(SET CMP0048 NEW) 7 | set(CMAKE_BUILD_TYPE_INIT "Release") 8 | project(cuBQL VERSION 1.1.0 LANGUAGES C CXX) 9 | 10 | if (CUBQL_DISABLE_CUDA) 11 | message("#cuBQL: CUDA _DISABLED_ by user request") 12 | set(CUBQL_HAVE_CUDA OFF) 13 | else() 14 | if (NOT CMAKE_CUDA_COMPILER) 15 | include(CheckLanguage) 16 | check_language(CUDA) 17 | endif() 18 | 19 | if (CMAKE_CUDA_COMPILER) 20 | message("#cuBQL: CUDA _FOUND_! building both cuda and host libs") 21 | enable_language(CUDA) 22 | set(CUBQL_HAVE_CUDA ON) 23 | else() 24 | message(AUTHOR_WARNING 25 | " ===========================================================\n" 26 | " #cuBQL: could not find CUDA - going to build only host libs\n" 27 | " ===========================================================\n" 28 | ) 29 | set(CUBQL_HAVE_CUDA OFF) 30 | endif() 31 | endif() 32 | 33 | 34 | set(CMAKE_CXX_STANDARD 17) 35 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 36 | 37 | if(${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.18) 38 | cmake_policy(SET CMP0104 NEW) 39 | endif() 40 | 41 | if (NOT (${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR})) 42 | set(CUBQL_IS_SUBPROJECT ON) 43 | else() 44 | set(CUBQL_IS_SUBPROJECT OFF) 45 | endif() 46 | 47 | # ------------------------------------------------------------------ 48 | # check if we're a subproject, and if so, only include the library 49 | # itself 50 | # ------------------------------------------------------------------ 51 | if (CUBQL_IS_SUBPROJECT) 52 | # ------------------------------------------------------------------ 53 | # we're included from a parent project; it's this parent project's 54 | # job to project-specific configs like configure output paths and 55 | # build type, and to set CUQBL_CUDA_ARCHITECTURES to whatever 56 | # arch(s) the project wants us to build for. 57 | # 58 | # Check if CUBQL_CUDA_ARCHITECTURES is set, and throw an error if 59 | # not 60 | # ------------------------------------------------------------------ 61 | if (CUBQL_HAVE_CUDA AND (NOT CMAKE_CUDA_ARCHITECTURES) 62 | OR 63 | ((${CMAKE_VERSION} VERSION_LESS 3.24) AND ("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "52")) 64 | ) 65 | message(FATAL_ERROR 66 | " =====================================================================\n" 67 | " #cuBQL: you've included cuBQL as a subproject (as it should!),\n" 68 | " but CMAKE_CUDA_ARCHITECTURES was not set. As it is\n" 69 | " almost certainly going to cause some problems in your project\n" 70 | " if we were to just build cuBQL for archs different than your own\n" 71 | " project's we will cowardly refuse do so. Before including cuBQL\n" 72 | " you should explicitly set the cmake variable\n" 73 | " CMAKE_CUDA_ARCHITECTURES to whatever the main project will want\n" 74 | " to use, too.\n" 75 | 76 | " \n" 77 | 78 | " If in doubt as to what to use, at least for cmake >= 3.24 you can also use\n" 79 | " set(CMAKE_CUDA_ARCHITECTURES \"all-major\")\n" 80 | " add_subdirectory( 11 | #endif 12 | 13 | namespace cuBQL { 14 | 15 | /*! struct used to control how exactly the builder is supposed to 16 | build the tree; in particular, at which threshold to make a 17 | leaf */ 18 | struct BuildConfig { 19 | inline BuildConfig &enableSAH() { buildMethod = SAH; return *this; } 20 | inline BuildConfig &enableELH() { buildMethod = ELH; return *this; } 21 | typedef enum 22 | { 23 | /*! simple 'adaptive spatial median' strategy. When splitting a 24 | subtree, this first computes the centroid of each input 25 | primitive in that subtree, then computes the bounding box of 26 | those centroids, then creates a split plane along the widest 27 | dimension of that centroid boundig box, right through the 28 | middle */ 29 | SPATIAL_MEDIAN=0, 30 | /*! use good old surface area heurstic. In theory that only 31 | makes sense for BVHes that are used for tracing rays 32 | (theoretic motivation is a bit wobbly for other sorts of 33 | queries), but it seems to help even for other queries. Much 34 | more expensive to build, though */ 35 | SAH, 36 | /*! edge-length heuristic - experimental */ 37 | ELH 38 | } BuildMethod; 39 | 40 | /*! what leaf size the builder is _allowed_ to make; no matter 41 | what input is specified, the builder may never produce leaves 42 | larger than this value */ 43 | int maxAllowedLeafSize = 1<<15; 44 | 45 | /*! threshold below which the builder should make a leaf, no 46 | matter what the prims in the subtree look like. A value of 0 47 | means "leave it to the builder" */ 48 | int makeLeafThreshold = 0; 49 | 50 | BuildMethod buildMethod = SPATIAL_MEDIAN; 51 | }; 52 | 53 | /*! the most basic type of BVH where each BVH::Node is either a leaf 54 | (and contains Node::count primitives), or is a inner node (and 55 | points to a pair of child nodes). Node 0 is the root node; node 56 | 1 is always unused (so all other node pairs start on n even 57 | index) */ 58 | template 59 | struct BinaryBVH { 60 | using scalar_t = _scalar_t; 61 | enum { numDims = _numDims }; 62 | using vec_t = cuBQL::vec_t; 63 | using box_t = cuBQL::box_t; 64 | 65 | static constexpr int const node_width = 1; 66 | 67 | struct CUBQL_ALIGN(16) Node { 68 | enum { count_bits = 16, offset_bits = 64-count_bits }; 69 | 70 | box_t bounds; 71 | 72 | struct Admin { 73 | /*! For inner nodes, this points into the nodes[] array, with 74 | left child at nodes.offset+0, and right child at 75 | nodes.offset+1. For leaf nodes, this points into the 76 | primIDs[] array, which first prim beign primIDs[offset], 77 | next one primIDs[offset+1], etc. */ 78 | union { 79 | struct { 80 | uint64_t offset : offset_bits; 81 | /* number of primitives in this leaf, if a leaf; 0 for inner 82 | nodes. */ 83 | uint64_t count : count_bits; 84 | }; 85 | // the same as a single int64, so we can read/write with a 86 | // single op 87 | uint64_t offsetAndCountBits; 88 | }; 89 | }; 90 | Admin admin; 91 | }; 92 | 93 | enum { maxLeafSize=((1< 105 | struct WideBVH { 106 | using scalar_t = _scalar_t; 107 | 108 | enum { numDims = _numDims }; 109 | using vec_t = cuBQL::vec_t; 110 | using box_t = cuBQL::box_t; 111 | 112 | static constexpr int const node_width = BVH_WIDTH; 113 | 114 | /*! a n-wide node of this BVH; note that unlike BinaryBVH::Node 115 | this is not a "single" node, but actually N nodes merged 116 | together */ 117 | struct CUBQL_ALIGN(16) Node { 118 | struct CUBQL_ALIGN(16) Child { 119 | box_t bounds; 120 | struct { 121 | uint64_t valid : 1; 122 | uint64_t offset : 45; 123 | uint64_t count : 16; 124 | }; 125 | } children[BVH_WIDTH]; 126 | }; 127 | 128 | using node_t = Node; 129 | node_t *nodes = 0; 130 | //! number of (multi-)nodes on this WideBVH 131 | uint32_t numNodes = 0; 132 | uint32_t *primIDs = 0; 133 | uint32_t numPrims = 0; 134 | }; 135 | 136 | 137 | template 138 | using bvh_t = BinaryBVH; 139 | 140 | // easy short-hand - though cubql also supports other types of bvhs, 141 | // scalars, etc, this will likely be the most commonly used one. 142 | using bvh3f = BinaryBVH; 143 | 144 | #ifdef __CUDACC__ 145 | typedef BinaryBVH bvh_float2; 146 | typedef BinaryBVH bvh_float3; 147 | typedef BinaryBVH bvh_float4; 148 | #endif 149 | 150 | } // ::cuBQL 151 | 152 | #ifdef __CUDACC__ 153 | # include "cuBQL/builder/cuda.h" 154 | #endif 155 | # include "cuBQL/builder/cpu.h" 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /samples/s05_lineOfSight/lineOfSight.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // for 'pointsMutuallyVisible()' 5 | #include "cuBQL/queries/triangleData/lineOfSight.h" 6 | #include "cuBQL/builder/cuda.h" 7 | #include 8 | #include "../common/loadOBJ.h" 9 | #define STB_IMAGE_IMPLEMENTATION 1 10 | #define STB_IMAGE_WRITE_IMPLEMENTATION 1 11 | #include "stb/stb_image.h" 12 | #include "stb/stb_image_write.h" 13 | 14 | using cuBQL::Triangle; 15 | using cuBQL::vec2i; 16 | using cuBQL::vec2f; 17 | using cuBQL::vec3i; 18 | using cuBQL::vec3f; 19 | using cuBQL::box3f; 20 | using cuBQL::bvh3f; 21 | using cuBQL::divRoundUp; 22 | 23 | __global__ 24 | void d_computeImage(uint32_t *d_result, 25 | vec2i dims, 26 | vec3i *d_indices, 27 | vec3f *d_vertices, 28 | box3f worldBounds, 29 | bvh3f bvh) 30 | { 31 | int ix = threadIdx.x+blockIdx.x*blockDim.x; if (ix >= dims.x) return; 32 | int iy = threadIdx.y+blockIdx.y*blockDim.y; if (iy >= dims.y) return; 33 | 34 | vec3f up(0.f,1.f,0.f); 35 | vec3f diag = worldBounds.size(); 36 | vec3f du = length(diag)*normalize(cross(diag,up)); 37 | vec3f dv = length(diag)*normalize(cross(du,diag)); 38 | 39 | vec2f f = (vec2f(ix,iy)) / vec2f(dims) - vec2f(.5f); 40 | 41 | vec3f A = worldBounds.center() - .5f*diag + f.x * du + f.y * dv; 42 | vec3f B = A + diag; 43 | 44 | auto getTriangle = [d_indices,d_vertices](uint32_t primID) 45 | { 46 | vec3i idx = d_indices[primID]; 47 | return Triangle{d_vertices[idx.x],d_vertices[idx.y],d_vertices[idx.z]}; 48 | }; 49 | 50 | using namespace cuBQL::triangles; 51 | bool visible = cuBQL::triangles::pointsMutuallyVisible(bvh,getTriangle,A,B); 52 | d_result[ix+iy*dims.x] 53 | = visible 54 | ? 0xff000000 55 | : 0xffffffff; 56 | } 57 | 58 | template 59 | T *upload(const std::vector &vec) 60 | { 61 | T *d_vec = 0; 62 | cudaMalloc((void**)&d_vec,vec.size()*sizeof(T)); 63 | cudaMemcpy(d_vec,vec.data(),vec.size()*sizeof(T),cudaMemcpyDefault); 64 | return d_vec; 65 | } 66 | 67 | __global__ void fillBounds(box3f *d_bounds, 68 | int numTriangles, 69 | const vec3i *d_indices, 70 | const vec3f *d_vertices) 71 | { 72 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 73 | if (tid >= numTriangles) return; 74 | vec3i idx = d_indices[tid]; 75 | d_bounds[tid] = box3f() 76 | .extend(d_vertices[idx.x]) 77 | .extend(d_vertices[idx.y]) 78 | .extend(d_vertices[idx.z]); 79 | } 80 | 81 | cuBQL::bvh3f buildBVH(int numTriangles, 82 | const vec3i *d_indices, 83 | const vec3f *d_vertices) 84 | { 85 | box3f *d_boxes; 86 | cudaMalloc((void**)&d_boxes,numTriangles*sizeof(box3f)); 87 | fillBounds<<>> 88 | (d_boxes,numTriangles,d_indices,d_vertices); 89 | 90 | std::cout << "building bvh" << std::endl; 91 | bvh3f bvh; 92 | ::cuBQL::gpuBuilder(bvh,d_boxes,numTriangles); 93 | std::cout << " ... done." << std::endl; 94 | cudaFree(d_boxes); 95 | return bvh; 96 | } 97 | 98 | std::vector computeImage(const std::vector &indices, 99 | const std::vector &vertices, 100 | vec2i dims, 101 | box3f worldBounds) 102 | { 103 | int numCells = dims.x*dims.y; 104 | std::vector result(numCells); 105 | uint32_t *d_result = 0; 106 | cudaMalloc((void **)&d_result,numCells*sizeof(uint32_t)); 107 | 108 | vec3f *d_vertices = upload(vertices); 109 | vec3i *d_indices = upload(indices); 110 | 111 | bvh3f bvh = buildBVH(indices.size(),d_indices,d_vertices); 112 | 113 | vec2i bs(8); 114 | vec2i nb = divRoundUp(dims,bs); 115 | d_computeImage<<<(dim3)nb,(dim3)bs>>>(d_result,dims,d_indices,d_vertices, 116 | worldBounds,bvh); 117 | 118 | cuBQL::cuda::free(bvh); 119 | 120 | cudaMemcpy(result.data(),d_result,numCells*sizeof(uint32_t),cudaMemcpyDefault); 121 | cudaFree(d_result); 122 | cudaFree(d_indices); 123 | cudaFree(d_vertices); 124 | return result; 125 | } 126 | 127 | void usage(const std::string &error) 128 | { 129 | std::cerr << "Error : " << error << "\n\n"; 130 | std::cout << "Usage: ./insideOutside inFile.obj -o outFilePrefix [-n maxRes]" << std::endl; 131 | exit(0); 132 | } 133 | 134 | int main(int ac, char **av) 135 | { 136 | std::string inFileName = ""; 137 | std::string outFileName = ""; 138 | vec2i dims(1024,1024); 139 | for (int i=1;i vertices; 156 | std::vector indices; 157 | std::cout << "loading obj file " << inFileName << std::endl; 158 | cuBQL::samples::loadOBJ(indices,vertices,inFileName); 159 | std::cout << "done, got " << indices.size() << " triangles" << std::endl; 160 | box3f bb; 161 | for (auto v : vertices) 162 | bb.extend(v); 163 | std::vector result 164 | = computeImage(indices,vertices,dims,bb); 165 | stbi_flip_vertically_on_write(true); 166 | stbi_write_png(outFileName.c_str(),dims.x,dims.y,4, 167 | result.data(),dims.x*sizeof(uint32_t)); 168 | std::cout << "done. image saved to " << outFileName << std::endl; 169 | } 170 | -------------------------------------------------------------------------------- /cuBQL/builder/cuda/builder_common.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /* this file contains the entire builder; this should never be included directly */ 5 | #pragma once 6 | 7 | #include "cuBQL/bvh.h" 8 | // #include "cuBQL/builder/cuda.h" 9 | #ifdef __HIPCC__ 10 | # include 11 | #else 12 | # include 13 | #endif 14 | #include 15 | #include 16 | 17 | namespace cuBQL { 18 | namespace gpuBuilder_impl { 19 | 20 | template 21 | inline void _ALLOC(T *&ptr, count_t count, cudaStream_t s, 22 | GpuMemoryResource &mem_resource) 23 | { mem_resource.malloc((void**)&ptr,count*sizeof(T),s); } 24 | 25 | template 26 | inline void _FREE(T *&ptr, cudaStream_t s, GpuMemoryResource &mem_resource) 27 | { mem_resource.free((void*)ptr,s); ptr = 0; } 28 | 29 | typedef enum : int8_t { OPEN_BRANCH, OPEN_NODE, DONE_NODE } NodeState; 30 | 31 | template 32 | struct CUBQL_ALIGN(8) AtomicBox { 33 | inline __device__ bool is_empty() const { return lower[0] > upper[0]; } 34 | inline __device__ void set_empty(); 35 | // set_empty, in owl::common-style naming 36 | inline __device__ void clear() { set_empty(); } 37 | inline __device__ float get_center(int dim) const; 38 | inline __device__ box_t make_box() const; 39 | 40 | inline __device__ float get_lower(int dim) const { 41 | if (box_t::numDims>4) 42 | return decode(lower[dim]); 43 | else if (box_t::numDims==4) { 44 | return decode(dim>1 45 | ?((dim>2)?lower[3]:lower[2]) 46 | :((dim )?lower[1]:lower[0])); 47 | } else if (box_t::numDims==3) { 48 | return decode(dim>1 49 | ?lower[2] 50 | :((dim )?lower[1]:lower[0])); 51 | } else 52 | return decode(lower[dim]); 53 | } 54 | inline __device__ float get_upper(int dim) const { 55 | if (box_t::numDims>4) 56 | return decode(upper[dim]); 57 | else if (box_t::numDims==4) { 58 | return decode(dim>1 59 | ?((dim>2)?upper[3]:upper[2]) 60 | :((dim )?upper[1]:upper[0])); 61 | } else if (box_t::numDims==3) 62 | return decode(dim>1 63 | ?upper[2] 64 | :((dim )?upper[1]:upper[0])); 65 | else 66 | return decode(upper[dim]); 67 | } 68 | 69 | int32_t lower[box_t::numDims]; 70 | int32_t upper[box_t::numDims]; 71 | 72 | inline static __device__ int32_t encode(float f); 73 | inline static __device__ float decode(int32_t bits); 74 | }; 75 | 76 | template 77 | inline __device__ float AtomicBox::get_center(int dim) const 78 | { 79 | return 0.5f*(get_lower(dim)+get_upper(dim)); 80 | // return 0.5f*(decode(lower[dim])+decode(upper[dim])); 81 | } 82 | 83 | template 84 | inline __device__ box_t AtomicBox::make_box() const 85 | { 86 | box_t box; 87 | #pragma unroll 88 | for (int d=0;d 96 | inline __device__ int32_t AtomicBox::encode(float f) 97 | { 98 | const int32_t sign = 0x80000000; 99 | int32_t bits = __float_as_int(f); 100 | if (bits & sign) bits ^= 0x7fffffff; 101 | return bits; 102 | } 103 | 104 | template 105 | inline __device__ float AtomicBox::decode(int32_t bits) 106 | { 107 | const int32_t sign = 0x80000000; 108 | if (bits & sign) bits ^= 0x7fffffff; 109 | return __int_as_float(bits); 110 | } 111 | 112 | template 113 | inline __device__ void AtomicBox::set_empty() 114 | { 115 | #pragma unroll 116 | for (int d=0;d inline __device__ 123 | void atomic_grow(AtomicBox &abox, const typename box_t::vec_t &other) 124 | { 125 | #pragma unroll 126 | for (int d=0;d::encode(other[d]);//get(other,d)); 128 | if (enc < abox.lower[d]) 129 | atomicMin(&abox.lower[d],enc); 130 | if (enc > abox.upper[d]) 131 | atomicMax(&abox.upper[d],enc); 132 | } 133 | } 134 | 135 | template 136 | inline __device__ void atomic_grow(AtomicBox &abox, const box_t &other) 137 | { 138 | #pragma unroll 139 | for (int d=0;d::encode(other.get_lower(d)); 141 | const int32_t enc_upper = AtomicBox::encode(other.get_upper(d)); 142 | if (enc_lower < abox.lower[d]) atomicMin(&abox.lower[d],enc_lower); 143 | if (enc_upper > abox.upper[d]) atomicMax(&abox.upper[d],enc_upper); 144 | } 145 | } 146 | 147 | template 148 | inline __device__ void atomic_grow(AtomicBox &abox, const AtomicBox &other) 149 | { 150 | #pragma unroll 151 | for (int d=0;d abox.upper[d]) atomicMax(&abox.upper[d],enc_upper); 156 | } 157 | } 158 | 159 | struct BuildState { 160 | uint32_t numNodes; 161 | }; 162 | 163 | } // ::cuBQL::gpuBuilder_impl 164 | } // ::cuBQL 165 | 166 | -------------------------------------------------------------------------------- /samples/s02_distanceToTriangleMesh/distanceToTriangleMesh.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file samples/closestPointOnTrianglesSurface Simple example of 5 | building bvhes over, and quering closest points on, sets of 3D 6 | triangles 7 | 8 | This example will, in successive steps: 9 | 10 | 1) load a cmdline-specified OBJ file of triangles 11 | 12 | 2) build BVH over those triangles 13 | 14 | 3) run some sample find-closst-point queries: generate a grid of 15 | 512x512x512 cells (stretched over the bounding box of the model), 16 | then for each cell center, perform a bvh fcp closest-point query 17 | on those line segmetns. 18 | */ 19 | 20 | // cuBQL: 21 | #define CUBQL_GPU_BUILDER_IMPLEMENTATION 1 22 | #define CUBQL_TRIANGLE_CPAT_IMPLEMENTATION 1 23 | #include "cuBQL/bvh.h" 24 | #include "cuBQL/queries/triangleData/closestPointOnAnyTriangle.h" 25 | #include "samples/common/loadOBJ.h" 26 | 27 | // std: 28 | #include 29 | #include 30 | 31 | using cuBQL::vec3i; 32 | using cuBQL::vec3f; 33 | using cuBQL::box3f; 34 | using cuBQL::bvh3f; 35 | using cuBQL::divRoundUp; 36 | using cuBQL::prettyNumber; 37 | using cuBQL::prettyDouble; 38 | using cuBQL::getCurrentTime; 39 | using cuBQL::Triangle; 40 | 41 | /*! helper function that allocates managed memory, and cheks for errors */ 42 | template 43 | T *allocManaged(int N) 44 | { 45 | T *ptr = 0; 46 | CUBQL_CUDA_CALL(MallocManaged((void **)&ptr,N*sizeof(T))); 47 | return ptr; 48 | } 49 | 50 | /*! generate boxes (required for bvh builder) from prim type 'index line triangles' */ 51 | __global__ void generateBoxes(box3f *boxForBuilder, 52 | const Triangle *triangles, 53 | int numTriangles) 54 | { 55 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 56 | if (tid >= numTriangles) return; 57 | 58 | auto triangle = triangles[tid]; 59 | boxForBuilder[tid] = triangle.bounds(); 60 | } 61 | 62 | 63 | /*! the actual sample query: generates points in a gridDim^3 grid of points, then for each such grid point perform a query */ 64 | __global__ 65 | void runQueries(bvh3f trianglesBVH, 66 | const Triangle *triangles, 67 | box3f worldBounds, 68 | int numQueries) 69 | { 70 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 71 | if (tid >= numQueries) return; 72 | 73 | // compute a point on the diagonal of the world bounding box 74 | float t = tid / (numQueries-1.f); 75 | t = -.2f + t * 1.4f; 76 | 77 | vec3f queryPoint = worldBounds.lerp(vec3f(t)); 78 | 79 | cuBQL::triangles::CPAT cpat; 80 | 81 | cpat.runQuery(triangles, 82 | trianglesBVH, 83 | queryPoint); 84 | 85 | printf("[%i] closest surface point to point (%f %f %f) is on triangle %i, at (%f %f %f), and %f units away\n", 86 | tid, 87 | queryPoint.x, 88 | queryPoint.y, 89 | queryPoint.z, 90 | cpat.triangleIdx, 91 | cpat.P.x, 92 | cpat.P.y, 93 | cpat.P.z, 94 | sqrtf(cpat.sqrDist)); 95 | } 96 | 97 | 98 | int main(int ac, const char **av) 99 | { 100 | const char *inFileName = "../samples/bunny.obj"; 101 | if (ac != 1) 102 | inFileName = av[1]; 103 | 104 | // ------------------------------------------------------------------ 105 | // step 1: load triangle mesh 106 | // ------------------------------------------------------------------ 107 | std::cout << "loading triangles from " << inFileName << std::endl; 108 | std::vector h_triangles 109 | = cuBQL::samples::loadOBJ(inFileName); 110 | int numTriangles = (int)h_triangles.size(); 111 | std::cout << "loaded OBJ file, got " << prettyNumber(numTriangles) 112 | << " triangles" << std::endl; 113 | box3f worldBounds; 114 | for (auto tri : h_triangles) 115 | worldBounds.extend(tri.bounds()); 116 | std::cout << "world bounding box of triangles is " << worldBounds 117 | << std::endl; 118 | 119 | // upload to the device: 120 | Triangle *d_triangles = 0; 121 | CUBQL_CUDA_CALL(Malloc((void**)&d_triangles,numTriangles*sizeof(Triangle))); 122 | CUBQL_CUDA_CALL(Memcpy(d_triangles,h_triangles.data(), 123 | numTriangles*sizeof(Triangle),cudaMemcpyDefault)); 124 | 125 | // ------------------------------------------------------------------ 126 | // step 2) build BVH over those triangles, so we can do queries on 127 | // them 128 | // ------------------------------------------------------------------ 129 | 130 | bvh3f trianglesBVH; 131 | { 132 | 133 | // allocate memory for bounding boxes (to build BVH over) 134 | box3f *d_boxes = 0; 135 | CUBQL_CUDA_CALL(Malloc((void**)&d_boxes,numTriangles*sizeof(box3f))); 136 | 137 | // run cuda kernel that generates a bounding box for each point 138 | generateBoxes<<>> 139 | (d_boxes,d_triangles,numTriangles); 140 | 141 | // ... aaaand build the BVH 142 | cuBQL::gpuBuilder(trianglesBVH,d_boxes,numTriangles,cuBQL::BuildConfig()); 143 | // free the boxes - we could actually re-use that memory below, but 144 | // let's just do this cleanly here. 145 | CUBQL_CUDA_CALL(Free(d_boxes)); 146 | std::cout << "done building BVH over " << prettyNumber(numTriangles) 147 | << " triangles" << std::endl; 148 | } 149 | 150 | // ------------------------------------------------------------------ 151 | // step 3: run some sample query - this query will generate query 152 | // points on the diagonal, and just print the results on the 153 | // terminal 154 | // ------------------------------------------------------------------ 155 | 156 | int numQueries = 16; 157 | float *sqrDist = allocManaged(numQueries); 158 | runQueries<<>> 159 | (trianglesBVH,d_triangles,worldBounds,numQueries); 160 | CUBQL_CUDA_SYNC_CHECK(); 161 | 162 | CUBQL_CUDA_CALL(Free(d_triangles)); 163 | cuBQL::cuda::free(trianglesBVH); 164 | 165 | return 0; 166 | } 167 | -------------------------------------------------------------------------------- /samples/s03_insideOutsideOfClosedMesh/insideOutside.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cuBQL/queries/triangleData/pointInsideOutside.h" 5 | #include "cuBQL/builder/cuda.h" 6 | #include 7 | #include "../common/loadOBJ.h" 8 | 9 | using cuBQL::Triangle; 10 | using cuBQL::vec3i; 11 | using cuBQL::vec3f; 12 | using cuBQL::box3f; 13 | using cuBQL::bvh3f; 14 | using cuBQL::divRoundUp; 15 | 16 | __global__ void d_computeVolume(float *d_result, 17 | vec3i dims, 18 | vec3i *d_indices, 19 | vec3f *d_vertices, 20 | box3f worldBounds, 21 | bvh3f bvh, 22 | bool useTotalCount) 23 | { 24 | int ix = threadIdx.x+blockIdx.x*blockDim.x; if (ix >= dims.x) return; 25 | int iy = threadIdx.y+blockIdx.y*blockDim.y; if (iy >= dims.y) return; 26 | int iz = threadIdx.z+blockIdx.z*blockDim.z; if (iz >= dims.z) return; 27 | 28 | bool dbg = vec3i(ix,iy,iz) == vec3i(6,83,89);//dims/2; 29 | 30 | vec3f f = (vec3f(ix,iy,iz)+.5f) / vec3f(dims); 31 | vec3f P = (1.f-f)*worldBounds.lower + f*worldBounds.upper; 32 | 33 | auto getTriangle = [d_indices,d_vertices](uint32_t primID) 34 | { 35 | vec3i idx = d_indices[primID]; 36 | return Triangle{d_vertices[idx.x],d_vertices[idx.y],d_vertices[idx.z]}; 37 | }; 38 | 39 | bool inside = cuBQL::triangles::pointIsInsideSurface(bvh,getTriangle,P// ,dbg 40 | ); 41 | // if (inside) printf(" bug %i %i %i\n",ix,iy,iz); 42 | d_result[ix+iy*dims.x+iz*dims.x*dims.y] = (float)inside; 43 | } 44 | 45 | template 46 | T *upload(const std::vector &vec) 47 | { 48 | T *d_vec = 0; 49 | cudaMalloc((void**)&d_vec,vec.size()*sizeof(T)); 50 | cudaMemcpy(d_vec,vec.data(),vec.size()*sizeof(T),cudaMemcpyDefault); 51 | return d_vec; 52 | } 53 | 54 | __global__ void fillBounds(box3f *d_bounds, 55 | int numTriangles, 56 | const vec3i *d_indices, 57 | const vec3f *d_vertices) 58 | { 59 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 60 | if (tid >= numTriangles) return; 61 | vec3i idx = d_indices[tid]; 62 | d_bounds[tid] = box3f() 63 | .extend(d_vertices[idx.x]) 64 | .extend(d_vertices[idx.y]) 65 | .extend(d_vertices[idx.z]); 66 | } 67 | 68 | cuBQL::bvh3f buildBVH(int numTriangles, 69 | const vec3i *d_indices, 70 | const vec3f *d_vertices) 71 | { 72 | box3f *d_boxes; 73 | cudaMalloc((void**)&d_boxes,numTriangles*sizeof(box3f)); 74 | fillBounds<<>> 75 | (d_boxes,numTriangles,d_indices,d_vertices); 76 | 77 | std::cout << "building bvh" << std::endl; 78 | bvh3f bvh; 79 | ::cuBQL::gpuBuilder(bvh,d_boxes,numTriangles); 80 | std::cout << " ... done." << std::endl; 81 | cudaFree(d_boxes); 82 | return bvh; 83 | } 84 | 85 | std::vector computeVolume(const std::vector &indices, 86 | const std::vector &vertices, 87 | vec3i dims, 88 | box3f worldBounds, 89 | bool useTotalCount) 90 | { 91 | int numCells = dims.x*dims.y*dims.z; 92 | std::vector result(numCells); 93 | float *d_result = 0; 94 | cudaMalloc((void **)&d_result,numCells*sizeof(float)); 95 | 96 | vec3f *d_vertices = upload(vertices); 97 | vec3i *d_indices = upload(indices); 98 | 99 | bvh3f bvh = buildBVH(indices.size(),d_indices,d_vertices); 100 | 101 | vec3i bs(8); 102 | vec3i nb = divRoundUp(dims,bs); 103 | d_computeVolume<<<(dim3)nb,(dim3)bs>>>(d_result,dims,d_indices,d_vertices, 104 | worldBounds, 105 | bvh,useTotalCount); 106 | 107 | cuBQL::cuda::free(bvh); 108 | 109 | cudaMemcpy(result.data(),d_result,numCells*sizeof(float),cudaMemcpyDefault); 110 | cudaFree(d_result); 111 | cudaFree(d_indices); 112 | cudaFree(d_vertices); 113 | return result; 114 | } 115 | 116 | void usage(const std::string &error) 117 | { 118 | std::cerr << "Error : " << error << "\n\n"; 119 | std::cout << "Usage: ./insideOutside inFile.obj -o outFilePrefix [-n maxRes]" << std::endl; 120 | exit(0); 121 | } 122 | 123 | int main(int ac, char **av) 124 | { 125 | std::string inFileName = ""; 126 | std::string outFilePrefix = ""; 127 | bool useTotalCount = false; 128 | int n = 256; 129 | for (int i=1;i vertices; 147 | std::vector indices; 148 | std::cout << "loading obj file " << inFileName << std::endl; 149 | cuBQL::samples::loadOBJ(indices,vertices,inFileName); 150 | std::cout << "done, got " << indices.size() << " triangles" << std::endl; 151 | for (auto &v : vertices) v = v * 1000.f; 152 | box3f bb; 153 | for (auto v : vertices) 154 | bb.extend(v); 155 | PRINT(bb); 156 | vec3f size = bb.size(); 157 | float max_size = reduce_max(size); 158 | vec3i dims = min(vec3i(n),vec3i(size/max_size*vec3f(n)+1.f)); 159 | std::cout << "using volume dims of " << dims << std::endl; 160 | 161 | std::vector result 162 | = computeVolume(indices,vertices,dims,bb,useTotalCount); 163 | const std::string outFileName = 164 | outFilePrefix 165 | +"_"+std::to_string(dims.x) 166 | +"x"+std::to_string(dims.y) 167 | +"x"+std::to_string(dims.z) 168 | +"_float.raw"; 169 | std::ofstream out(outFileName, 170 | std::ios::binary); 171 | out.write((const char *)result.data(), 172 | dims.x*dims.y*dims.z*sizeof(float)); 173 | std::cout << "done. volume saved to " << outFileName << std::endl; 174 | } 175 | -------------------------------------------------------------------------------- /samples/common/loadOBJ.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "loadOBJ.h" 5 | 6 | #define TINYOBJLOADER_IMPLEMENTATION 7 | #include "tiny_obj_loader.h" 8 | 9 | namespace std { 10 | inline bool operator<(const tinyobj::index_t &a, 11 | const tinyobj::index_t &b) 12 | { 13 | if (a.vertex_index < b.vertex_index) return true; 14 | if (a.vertex_index > b.vertex_index) return false; 15 | 16 | if (a.normal_index < b.normal_index) return true; 17 | if (a.normal_index > b.normal_index) return false; 18 | 19 | if (a.texcoord_index < b.texcoord_index) return true; 20 | if (a.texcoord_index > b.texcoord_index) return false; 21 | 22 | return false; 23 | } 24 | } 25 | 26 | namespace cuBQL { 27 | namespace samples { 28 | 29 | std::vector loadOBJ(const std::string &objFile) 30 | { 31 | std::string modelDir = ""; 32 | tinyobj::attrib_t attributes; 33 | std::vector shapes; 34 | std::vector materials; 35 | 36 | std::string err = ""; 37 | bool readOK 38 | = tinyobj::LoadObj(&attributes, 39 | &shapes, 40 | &materials, 41 | &err, 42 | &err, 43 | objFile.c_str(), 44 | modelDir.c_str(), 45 | /* triangulate */true); 46 | if (!readOK) 47 | throw std::runtime_error("Could not read OBJ model from "+objFile+" : "+err); 48 | 49 | std::vector triangles; 50 | const vec3f *vertex_array = (const vec3f*)attributes.vertices.data(); 51 | for (int shapeID=0;shapeID<(int)shapes.size();shapeID++) { 52 | tinyobj::shape_t &shape = shapes[shapeID]; 53 | for (size_t faceID=0;faceID &indices, 68 | std::vector &vertices, 69 | const std::string &objFile) 70 | { 71 | std::string modelDir = ""; 72 | tinyobj::attrib_t attributes; 73 | std::vector shapes; 74 | std::vector materials; 75 | 76 | std::string err = ""; 77 | bool readOK 78 | = tinyobj::LoadObj(&attributes, 79 | &shapes, 80 | &materials, 81 | &err, 82 | &err, 83 | objFile.c_str(), 84 | modelDir.c_str(), 85 | /* triangulate */true); 86 | if (!readOK) 87 | throw std::runtime_error("Could not read OBJ model from "+objFile+" : "+err); 88 | 89 | std::vector triangles; 90 | const vec3f *vertex_array = (const vec3f*)attributes.vertices.data(); 91 | int maxUsedVertex = 0; 92 | for (int shapeID=0;shapeID<(int)shapes.size();shapeID++) { 93 | tinyobj::shape_t &shape = shapes[shapeID]; 94 | for (size_t faceID=0;faceID= 0 && b >=0 && c >= 0 && a != b && a != c && b != c) 109 | indices.push_back({a,b,c}); 110 | } 111 | } 112 | vertices.resize(maxUsedVertex+1); 113 | std::copy(vertex_array,vertex_array+maxUsedVertex+1,vertices.data()); 114 | } 115 | 116 | void saveOBJ(const std::vector &triangles, 117 | const std::string &outFileName) 118 | { 119 | std::ofstream out(outFileName.c_str()); 120 | for (auto tri : triangles) { 121 | out << "v " << tri.a.x << " " << tri.a.y << " " << tri.a.z << std::endl; 122 | out << "v " << tri.b.x << " " << tri.b.y << " " << tri.b.z << std::endl; 123 | out << "v " << tri.c.x << " " << tri.c.y << " " << tri.c.z << std::endl; 124 | out << "f -1 -2 -3" << std::endl; 125 | } 126 | } 127 | 128 | std::vector triangulate(const std::vector &boxes) 129 | { 130 | std::vector triangles; 131 | int indices[] = {0,1,3, 2,3,0, 132 | 5,7,6, 5,6,4, 133 | 0,4,5, 0,5,1, 134 | 2,3,7, 2,7,6, 135 | 1,5,7, 1,7,3, 136 | 4,0,2, 4,2,6}; 137 | Triangle tri; 138 | for (auto box : boxes) { 139 | vec3f vertices[8], *vtx = vertices; 140 | for (int iz=0;iz<2;iz++) 141 | for (int iy=0;iy<2;iy++) 142 | for (int ix=0;ix<2;ix++) { 143 | vtx->x = (ix?box.lower:box.upper).x; 144 | vtx->y = (iy?box.lower:box.upper).y; 145 | vtx->z = (iz?box.lower:box.upper).z; 146 | vtx++; 147 | } 148 | for (int i=0;i<12;i++) { 149 | tri.a = vertices[indices[3*i+0]]; 150 | tri.b = vertices[indices[3*i+1]]; 151 | tri.c = vertices[indices[3*i+2]]; 152 | triangles.push_back(tri); 153 | } 154 | } 155 | return triangles; 156 | } 157 | 158 | } // ::cuBQL::samples 159 | } // ::cuBQL 160 | 161 | -------------------------------------------------------------------------------- /samples/s06_anyTriangleWithinRadius/anyTriangleWithinRadius.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cuBQL/queries/triangleData/anyWithinRadius.h" 5 | #include "cuBQL/builder/cuda.h" 6 | #include 7 | #include "../common/loadOBJ.h" 8 | 9 | using cuBQL::Triangle; 10 | using cuBQL::vec3i; 11 | using cuBQL::vec3f; 12 | using cuBQL::box3f; 13 | using cuBQL::bvh3f; 14 | using cuBQL::divRoundUp; 15 | 16 | __global__ 17 | void d_computeVolume(float *d_result, 18 | vec3i dims, 19 | vec3i *d_indices, 20 | vec3f *d_vertices, 21 | box3f worldBounds, 22 | bvh3f bvh, 23 | /*! if true, we only check the bounding boxes of 24 | triangles, not actual box-triangle tests */ 25 | bool checkOnlyBoundingBoxes) 26 | { 27 | int ix = threadIdx.x+blockIdx.x*blockDim.x; if (ix >= dims.x) return; 28 | int iy = threadIdx.y+blockIdx.y*blockDim.y; if (iy >= dims.y) return; 29 | int iz = threadIdx.z+blockIdx.z*blockDim.z; if (iz >= dims.z) return; 30 | 31 | vec3f f = (vec3f(ix,iy,iz)+vec3f(.5f)) / vec3f(dims); 32 | vec3f queryPoint = worldBounds.lerp(f); 33 | auto getTriangle = [d_indices,d_vertices](uint32_t primID) 34 | { 35 | vec3i idx = d_indices[primID]; 36 | return Triangle{d_vertices[idx.x],d_vertices[idx.y],d_vertices[idx.z]}; 37 | }; 38 | 39 | /* as radius, in this example use 1% of scene diameter */ 40 | float queryRadius = length(worldBounds.size()) * .05f; 41 | 42 | bool dbg = vec3f(ix,iy,iz) == vec3f(16); 43 | 44 | using namespace cuBQL::triangles; 45 | bool closeToSurface 46 | = cuBQL::triangles::anyWithinRadius(// the model we're querying 47 | bvh,getTriangle, 48 | // the point and radius we're querying with 49 | queryPoint,queryRadius,dbg); 50 | d_result[ix+iy*dims.x+iz*dims.x*dims.y] = closeToSurface?1.f:0.f; 51 | } 52 | 53 | template 54 | T *upload(const std::vector &vec) 55 | { 56 | T *d_vec = 0; 57 | cudaMalloc((void**)&d_vec,vec.size()*sizeof(T)); 58 | cudaMemcpy(d_vec,vec.data(),vec.size()*sizeof(T),cudaMemcpyDefault); 59 | return d_vec; 60 | } 61 | 62 | __global__ void fillBounds(box3f *d_bounds, 63 | int numTriangles, 64 | const vec3i *d_indices, 65 | const vec3f *d_vertices) 66 | { 67 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 68 | if (tid >= numTriangles) return; 69 | vec3i idx = d_indices[tid]; 70 | d_bounds[tid] = box3f() 71 | .extend(d_vertices[idx.x]) 72 | .extend(d_vertices[idx.y]) 73 | .extend(d_vertices[idx.z]); 74 | } 75 | 76 | cuBQL::bvh3f buildBVH(int numTriangles, 77 | const vec3i *d_indices, 78 | const vec3f *d_vertices) 79 | { 80 | box3f *d_boxes; 81 | cudaMalloc((void**)&d_boxes,numTriangles*sizeof(box3f)); 82 | fillBounds<<>> 83 | (d_boxes,numTriangles,d_indices,d_vertices); 84 | 85 | std::cout << "building bvh" << std::endl; 86 | bvh3f bvh; 87 | ::cuBQL::gpuBuilder(bvh,d_boxes,numTriangles); 88 | std::cout << " ... done." << std::endl; 89 | cudaFree(d_boxes); 90 | return bvh; 91 | } 92 | 93 | std::vector computeVolume(const std::vector &indices, 94 | const std::vector &vertices, 95 | vec3i dims, 96 | box3f worldBounds, 97 | bool checkOnlyBoundingBoxes) 98 | { 99 | int numCells = dims.x*dims.y*dims.z; 100 | std::vector result(numCells); 101 | float *d_result = 0; 102 | cudaMalloc((void **)&d_result,numCells*sizeof(float)); 103 | 104 | vec3f *d_vertices = upload(vertices); 105 | vec3i *d_indices = upload(indices); 106 | 107 | bvh3f bvh = buildBVH(indices.size(),d_indices,d_vertices); 108 | 109 | vec3i bs(8); 110 | vec3i nb = divRoundUp(dims,bs); 111 | d_computeVolume<<<(dim3)nb,(dim3)bs>>>(d_result,dims,d_indices,d_vertices, 112 | worldBounds, 113 | bvh,checkOnlyBoundingBoxes); 114 | 115 | cuBQL::cuda::free(bvh); 116 | 117 | cudaMemcpy(result.data(),d_result,numCells*sizeof(float),cudaMemcpyDefault); 118 | cudaFree(d_result); 119 | cudaFree(d_indices); 120 | cudaFree(d_vertices); 121 | return result; 122 | } 123 | 124 | void usage(const std::string &error) 125 | { 126 | std::cerr << "Error : " << error << "\n\n"; 127 | std::cout << "Usage: ./insideOutside inFile.obj -o outFilePrefix [-n maxRes]" << std::endl; 128 | exit(0); 129 | } 130 | 131 | int main(int ac, char **av) 132 | { 133 | std::string inFileName = ""; 134 | std::string outFilePrefix = ""; 135 | bool checkOnlyBoundingBoxes = false; 136 | int n = 256; 137 | for (int i=1;i vertices; 155 | std::vector indices; 156 | std::cout << "loading obj file " << inFileName << std::endl; 157 | cuBQL::samples::loadOBJ(indices,vertices,inFileName); 158 | std::cout << "done, got " << indices.size() << " triangles" << std::endl; 159 | for (auto &v : vertices) v = v * 1000.f; 160 | box3f bb; 161 | for (auto v : vertices) 162 | bb.extend(v); 163 | PRINT(bb); 164 | vec3f size = bb.size(); 165 | float max_size = reduce_max(size); 166 | vec3i dims = min(vec3i(n),vec3i(size/max_size*vec3f(n)+1.f)); 167 | std::cout << "using volume dims of " << dims << std::endl; 168 | 169 | std::vector result 170 | = computeVolume(indices,vertices,dims,bb,checkOnlyBoundingBoxes); 171 | const std::string outFileName = 172 | outFilePrefix 173 | +"_"+std::to_string(dims.x) 174 | +"x"+std::to_string(dims.y) 175 | +"x"+std::to_string(dims.z) 176 | +"_float.raw"; 177 | std::ofstream out(outFileName, 178 | std::ios::binary); 179 | out.write((const char *)result.data(), 180 | dims.x*dims.y*dims.z*sizeof(float)); 181 | std::cout << "done. volume saved to " << outFileName << std::endl; 182 | } 183 | -------------------------------------------------------------------------------- /samples/s04_boxOverlapsOrInsideSurfaceMesh/boxOverlapsOrInsideSurfaceMesh.cu: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #include "cuBQL/queries/triangleData/boxInsideOutsideIntersects.h" 5 | #include "cuBQL/builder/cuda.h" 6 | #include 7 | #include "../common/loadOBJ.h" 8 | 9 | using cuBQL::Triangle; 10 | using cuBQL::vec3i; 11 | using cuBQL::vec3f; 12 | using cuBQL::box3f; 13 | using cuBQL::bvh3f; 14 | using cuBQL::divRoundUp; 15 | 16 | __global__ 17 | void d_computeVolume(float *d_result, 18 | vec3i dims, 19 | vec3i *d_indices, 20 | vec3f *d_vertices, 21 | box3f worldBounds, 22 | bvh3f bvh, 23 | /*! if true, we only check the bounding boxes of 24 | triangles, not actual box-triangle tests */ 25 | bool checkOnlyBoundingBoxes) 26 | { 27 | int ix = threadIdx.x+blockIdx.x*blockDim.x; if (ix >= dims.x) return; 28 | int iy = threadIdx.y+blockIdx.y*blockDim.y; if (iy >= dims.y) return; 29 | int iz = threadIdx.z+blockIdx.z*blockDim.z; if (iz >= dims.z) return; 30 | 31 | vec3f f0 = (vec3f(ix,iy,iz)) / vec3f(dims); 32 | vec3f f1 = (vec3f(ix,iy,iz)+vec3f(1.f)) / vec3f(dims); 33 | box3f queryBox { worldBounds.lerp(f0), worldBounds.lerp(f1) }; 34 | auto getTriangle = [d_indices,d_vertices](uint32_t primID) 35 | { 36 | vec3i idx = d_indices[primID]; 37 | return Triangle{d_vertices[idx.x],d_vertices[idx.y],d_vertices[idx.z]}; 38 | }; 39 | 40 | using namespace cuBQL::triangles; 41 | boxInsideOutsideIntersects::result_t result 42 | = checkOnlyBoundingBoxes 43 | ? boxInsideOutsideIntersects::queryVsTriangleBoundingBoxes(bvh,getTriangle,queryBox) 44 | : boxInsideOutsideIntersects::queryVsActualTriangles(bvh,getTriangle,queryBox); 45 | 46 | int v; 47 | switch (result) { 48 | case boxInsideOutsideIntersects::OUTSIDE: v = 0; break; 49 | case boxInsideOutsideIntersects::INTERSECTS: v = 1; break; 50 | case boxInsideOutsideIntersects::INSIDE: v = 2; break; 51 | }; 52 | d_result[ix+iy*dims.x+iz*dims.x*dims.y] = v; 53 | } 54 | 55 | template 56 | T *upload(const std::vector &vec) 57 | { 58 | T *d_vec = 0; 59 | cudaMalloc((void**)&d_vec,vec.size()*sizeof(T)); 60 | cudaMemcpy(d_vec,vec.data(),vec.size()*sizeof(T),cudaMemcpyDefault); 61 | return d_vec; 62 | } 63 | 64 | __global__ void fillBounds(box3f *d_bounds, 65 | int numTriangles, 66 | const vec3i *d_indices, 67 | const vec3f *d_vertices) 68 | { 69 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 70 | if (tid >= numTriangles) return; 71 | vec3i idx = d_indices[tid]; 72 | d_bounds[tid] = box3f() 73 | .extend(d_vertices[idx.x]) 74 | .extend(d_vertices[idx.y]) 75 | .extend(d_vertices[idx.z]); 76 | } 77 | 78 | cuBQL::bvh3f buildBVH(int numTriangles, 79 | const vec3i *d_indices, 80 | const vec3f *d_vertices) 81 | { 82 | box3f *d_boxes; 83 | cudaMalloc((void**)&d_boxes,numTriangles*sizeof(box3f)); 84 | fillBounds<<>> 85 | (d_boxes,numTriangles,d_indices,d_vertices); 86 | 87 | std::cout << "building bvh" << std::endl; 88 | bvh3f bvh; 89 | ::cuBQL::gpuBuilder(bvh,d_boxes,numTriangles); 90 | std::cout << " ... done." << std::endl; 91 | cudaFree(d_boxes); 92 | return bvh; 93 | } 94 | 95 | std::vector computeVolume(const std::vector &indices, 96 | const std::vector &vertices, 97 | vec3i dims, 98 | box3f worldBounds, 99 | bool checkOnlyBoundingBoxes) 100 | { 101 | int numCells = dims.x*dims.y*dims.z; 102 | std::vector result(numCells); 103 | float *d_result = 0; 104 | cudaMalloc((void **)&d_result,numCells*sizeof(float)); 105 | 106 | vec3f *d_vertices = upload(vertices); 107 | vec3i *d_indices = upload(indices); 108 | 109 | bvh3f bvh = buildBVH(indices.size(),d_indices,d_vertices); 110 | 111 | vec3i bs(8); 112 | vec3i nb = divRoundUp(dims,bs); 113 | d_computeVolume<<<(dim3)nb,(dim3)bs>>>(d_result,dims,d_indices,d_vertices, 114 | worldBounds, 115 | bvh,checkOnlyBoundingBoxes); 116 | 117 | cuBQL::cuda::free(bvh); 118 | 119 | cudaMemcpy(result.data(),d_result,numCells*sizeof(float),cudaMemcpyDefault); 120 | cudaFree(d_result); 121 | cudaFree(d_indices); 122 | cudaFree(d_vertices); 123 | return result; 124 | } 125 | 126 | void usage(const std::string &error) 127 | { 128 | std::cerr << "Error : " << error << "\n\n"; 129 | std::cout << "Usage: ./insideOutside inFile.obj -o outFilePrefix [-n maxRes]" << std::endl; 130 | exit(0); 131 | } 132 | 133 | int main(int ac, char **av) 134 | { 135 | std::string inFileName = ""; 136 | std::string outFilePrefix = ""; 137 | bool checkOnlyBoundingBoxes = false; 138 | int n = 256; 139 | for (int i=1;i vertices; 157 | std::vector indices; 158 | std::cout << "loading obj file " << inFileName << std::endl; 159 | cuBQL::samples::loadOBJ(indices,vertices,inFileName); 160 | std::cout << "done, got " << indices.size() << " triangles" << std::endl; 161 | for (auto &v : vertices) v = v * 1000.f; 162 | box3f bb; 163 | for (auto v : vertices) 164 | bb.extend(v); 165 | PRINT(bb); 166 | vec3f size = bb.size(); 167 | float max_size = reduce_max(size); 168 | vec3i dims = min(vec3i(n),vec3i(size/max_size*vec3f(n)+1.f)); 169 | std::cout << "using volume dims of " << dims << std::endl; 170 | 171 | std::vector result 172 | = computeVolume(indices,vertices,dims,bb,checkOnlyBoundingBoxes); 173 | const std::string outFileName = 174 | outFilePrefix 175 | +"_"+std::to_string(dims.x) 176 | +"x"+std::to_string(dims.y) 177 | +"x"+std::to_string(dims.z) 178 | +"_float.raw"; 179 | std::ofstream out(outFileName, 180 | std::ios::binary); 181 | out.write((const char *)result.data(), 182 | dims.x*dims.y*dims.z*sizeof(float)); 183 | std::cout << "done. volume saved to " << outFileName << std::endl; 184 | } 185 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/math/pointToTriangleDistance.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/queries/triangleData/Triangle.h" 7 | 8 | namespace cuBQL { 9 | /*! \namespace triangles for any queries operating on triangle model data */ 10 | namespace triangles { 11 | 12 | // ============================================================================= 13 | // *** INTERFACE *** 14 | // ============================================================================= 15 | 16 | struct PointToTriangleTestResult { 17 | /*! (square!) distance between query point and closest point on triangle.*/ 18 | float sqrDist = INFINITY; 19 | /*! the actual 3D point that's closest on the triangle */ 20 | vec3f P; 21 | }; 22 | 23 | /*! compute one point-triangle distance test, fill in the result 24 | value, and return it*/ 25 | inline __cubql_both 26 | PointToTriangleTestResult computeClosestPoint(Triangle triangle, 27 | vec3f queryPoint, 28 | bool dbg=false); 29 | /*! compute one point-triangle distance test, fill in the result 30 | value, and return it*/ 31 | inline __cubql_both 32 | PointToTriangleTestResult computeClosestPoint(vec3f queryPoint, 33 | Triangle triangle, 34 | bool dbg=false); 35 | 36 | /*! given a pre-initialized 'PointToTriangleTestResult' struct - 37 | that may already contain some other triangle's distance test - 38 | compute a distance test, and check if is closer than the hit 39 | already stored. If not, 'existingResult' will remain 40 | unmodified and this fct returns false; if true, the result 41 | will be updated to the new hit, and this fct return true */ 42 | inline __cubql_both 43 | bool computeClosestPoint(PointToTriangleTestResult &existingResult, 44 | Triangle triangle, 45 | vec3f queryPoint, 46 | bool dbg=false); 47 | 48 | 49 | // ============================================================================= 50 | // *** IMPLEMENTATION *** 51 | // ============================================================================= 52 | 53 | namespace pointToTriangleTest { 54 | /*! helper struct for a edge with double coordinates; mainly 55 | exists for the Edge::closestPoint test method */ 56 | struct Edge { 57 | inline __cubql_both 58 | Edge(vec3f a, vec3f b) : a(a), b(b) {} 59 | 60 | /*! compute point-to-distance for this triangle; returns true if the 61 | result struct was updated with a closer point than what it 62 | previously contained */ 63 | inline __cubql_both 64 | bool closestPoint(PointToTriangleTestResult &result, 65 | const vec3f &referencePointToComputeDistanceTo, 66 | bool dbg=0) const; 67 | 68 | const vec3f a, b; 69 | }; 70 | 71 | 72 | /*! compute point-to-distance for this edge; returns true if the 73 | result struct was updated with a closer point than what it 74 | previously contained */ 75 | inline __cubql_both 76 | bool Edge::closestPoint(PointToTriangleTestResult &result, 77 | const vec3f &p, 78 | bool dbg) const 79 | { 80 | float t = dot(p-a,b-a) / dot(b-a,b-a); 81 | t = clamp(t); 82 | vec3f cp = a + t * (b-a); 83 | float sqrDist = dot(cp-p,cp-p); 84 | if (sqrDist >= result.sqrDist) 85 | return false; 86 | 87 | result.sqrDist = sqrDist; 88 | result.P = cp; 89 | return true; 90 | } 91 | 92 | /*! computes the querypoint-triangle test for a given pair of 93 | triangle and query point; returns true if this _was_ closer 94 | than what 'this' stored before (and if so, 'this' was 95 | updated); if this returns false the computed distance was 96 | greater than the already stored distance, and 'this' was 97 | left unmodified */ 98 | inline __cubql_both 99 | bool computeOneIntersection(PointToTriangleTestResult &result, 100 | const cuBQL::Triangle triangle, 101 | const cuBQL::vec3f queryPoint, 102 | bool dbg) 103 | { 104 | if (dbg) printf("testing triangle ...\n"); 105 | const vec3f a = triangle.a; 106 | const vec3f b = triangle.b; 107 | const vec3f c = triangle.c; 108 | vec3f N = cross(b-a,c-a); 109 | bool projectsOutside 110 | = (N == vec3f(0.f,0.f,0.f)) 111 | || (dot(queryPoint-a,cross(b-a,N)) >= 0.f) 112 | || (dot(queryPoint-b,cross(c-b,N)) >= 0.f) 113 | || (dot(queryPoint-c,cross(a-c,N)) >= 0.f); 114 | if (projectsOutside) { 115 | return 116 | Edge(a,b).closestPoint(result,queryPoint) | 117 | Edge(b,c).closestPoint(result,queryPoint) | 118 | Edge(c,a).closestPoint(result,queryPoint); 119 | } else { 120 | N = normalize(N); 121 | float signed_dist = dot(queryPoint-a,N); 122 | float sqrDist = signed_dist*signed_dist; 123 | if (sqrDist >= result.sqrDist) return false; 124 | result.sqrDist = sqrDist; 125 | result.P = queryPoint - signed_dist * N; 126 | return true; 127 | } 128 | } 129 | 130 | } // ::cuBQL::triangles::pointToTriangleTest 131 | 132 | inline __cubql_both 133 | PointToTriangleTestResult computeClosestPoint(Triangle triangle, 134 | vec3f queryPoint, 135 | bool dbg) 136 | { 137 | PointToTriangleTestResult result; 138 | pointToTriangleTest::computeOneIntersection(result,triangle,queryPoint,dbg); 139 | return result; 140 | } 141 | 142 | inline __cubql_both 143 | PointToTriangleTestResult computeClosestPoint(vec3f queryPoint, 144 | Triangle triangle, 145 | bool dbg) 146 | { return computeClosestPoint(triangle,queryPoint,dbg); } 147 | 148 | inline __cubql_both 149 | bool computeClosestPoint(PointToTriangleTestResult &existingResult, 150 | Triangle triangle, 151 | vec3f queryPoint, 152 | bool dbg) 153 | { 154 | return pointToTriangleTest::computeOneIntersection 155 | (existingResult,triangle,queryPoint,dbg); 156 | } 157 | 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /.github/action-scripts/install_cuda_ubuntu.sh: -------------------------------------------------------------------------------- 1 | # @todo - better / more robust parsing of inputs from env vars. 2 | ## ------------------- 3 | ## Constants 4 | ## ------------------- 5 | 6 | # List of sub-packages to install. 7 | # @todo - pass this in from outside the script? 8 | # @todo - check the specified subpackages exist via apt pre-install? apt-rdepends cuda-9-0 | grep "^cuda-"? 9 | 10 | # Ideally choose from the list of meta-packages to minimise variance between cuda versions (although it does change too). Some of these packages may not be availble in older CUDA releases 11 | CUDA_PACKAGES_IN=( 12 | "cuda-compiler" 13 | "cuda-cudart-dev" 14 | "cuda-nvtx" 15 | "cuda-nvrtc-dev" 16 | "libcurand-dev" # 11-0+ 17 | "cuda-cccl" # 11.4+, provides cub and thrust. On 11.3 known as cuda-thrust-11-3 18 | ) 19 | 20 | ## ------------------- 21 | ## Bash functions 22 | ## ------------------- 23 | # returns 0 (true) if a >= b 24 | function version_ge() { 25 | [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 26 | [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$2" ] 27 | } 28 | # returns 0 (true) if a > b 29 | function version_gt() { 30 | [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 31 | [ "$1" = "$2" ] && return 1 || version_ge $1 $2 32 | } 33 | # returns 0 (true) if a <= b 34 | function version_le() { 35 | [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 36 | [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$1" ] 37 | } 38 | # returns 0 (true) if a < b 39 | function version_lt() { 40 | [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 41 | [ "$1" = "$2" ] && return 1 || version_le $1 $2 42 | } 43 | 44 | ## ------------------- 45 | ## Select CUDA version 46 | ## ------------------- 47 | 48 | if [[ "${cuda}" == "none" ]] ; then 49 | exit 0 50 | fi 51 | 52 | # Get the cuda version from the environment as $cuda. 53 | CUDA_VERSION_MAJOR_MINOR=${cuda} 54 | 55 | # Split the version. 56 | # We (might/probably) don't know PATCH at this point - it depends which version gets installed. 57 | CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) 58 | CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) 59 | CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) 60 | # use lsb_release to find the OS. 61 | UBUNTU_VERSION=$(lsb_release -sr) 62 | UBUNTU_VERSION="${UBUNTU_VERSION//.}" 63 | 64 | echo "CUDA_MAJOR: ${CUDA_MAJOR}" 65 | echo "CUDA_MINOR: ${CUDA_MINOR}" 66 | echo "CUDA_PATCH: ${CUDA_PATCH}" 67 | # echo "UBUNTU_NAME: ${UBUNTU_NAME}" 68 | echo "UBUNTU_VERSION: ${UBUNTU_VERSION}" 69 | 70 | # If we don't know the CUDA_MAJOR or MINOR, error. 71 | if [ -z "${CUDA_MAJOR}" ] ; then 72 | echo "Error: Unknown CUDA Major version. Aborting." 73 | exit 1 74 | fi 75 | if [ -z "${CUDA_MINOR}" ] ; then 76 | echo "Error: Unknown CUDA Minor version. Aborting." 77 | exit 1 78 | fi 79 | # If we don't know the Ubuntu version, error. 80 | if [ -z ${UBUNTU_VERSION} ]; then 81 | echo "Error: Unknown Ubuntu version. Aborting." 82 | exit 1 83 | fi 84 | 85 | 86 | ## ------------------------------- 87 | ## Select CUDA packages to install 88 | ## ------------------------------- 89 | CUDA_PACKAGES="" 90 | for package in "${CUDA_PACKAGES_IN[@]}" 91 | do : 92 | # @todo This is not perfect. Should probably provide a separate list for diff versions 93 | # cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y 94 | if [[ "${package}" == "cuda-nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then 95 | package="cuda-compiler" 96 | elif [[ "${package}" == "cuda-compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then 97 | package="cuda-nvcc" 98 | # CUB/Thrust are packages in cuda-thrust in 11.3, but cuda-cccl in 11.4+ 99 | elif [[ "${package}" == "cuda-thrust" || "${package}" == "cuda-cccl" ]]; then 100 | # CUDA cuda-thrust >= 11.4 101 | if version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.4" ; then 102 | package="cuda-cccl" 103 | # Use cuda-thrust > 11.2 104 | elif version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.3" ; then 105 | package="cuda-thrust" 106 | # Do not include this pacakge < 11.3 107 | else 108 | continue 109 | fi 110 | fi 111 | # CUDA 11+ includes lib* / lib*-dev packages, which if they existed previously where cuda-cu*- / cuda-cu*-dev- 112 | if [[ ${package} == libcu* ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "11.0" ; then 113 | package="${package/libcu/cuda-cu}" 114 | fi 115 | # Build the full package name and append to the string. 116 | CUDA_PACKAGES+=" ${package}-${CUDA_MAJOR}-${CUDA_MINOR}" 117 | done 118 | echo "CUDA_PACKAGES ${CUDA_PACKAGES}" 119 | 120 | ## ----------------- 121 | ## Prepare to install 122 | ## ----------------- 123 | CPU_ARCH="x86_64" 124 | PIN_FILENAME="cuda-ubuntu${UBUNTU_VERSION}.pin" 125 | PIN_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/${CPU_ARCH}/${PIN_FILENAME}" 126 | # apt keyring package now available https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/ 127 | KERYRING_PACKAGE_FILENAME="cuda-keyring_1.1-1_all.deb" 128 | KEYRING_PACKAGE_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/${CPU_ARCH}/${KERYRING_PACKAGE_FILENAME}" 129 | REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/${CPU_ARCH}/" 130 | 131 | echo "PIN_FILENAME ${PIN_FILENAME}" 132 | echo "PIN_URL ${PIN_URL}" 133 | echo "KEYRING_PACKAGE_URL ${KEYRING_PACKAGE_URL}" 134 | 135 | ## ----------------- 136 | ## Check for root/sudo 137 | ## ----------------- 138 | 139 | # Detect if the script is being run as root, storing true/false in is_root. 140 | is_root=false 141 | if (( $EUID == 0)); then 142 | is_root=true 143 | fi 144 | # Find if sudo is available 145 | has_sudo=false 146 | if command -v sudo &> /dev/null ; then 147 | has_sudo=true 148 | fi 149 | # Decide if we can proceed or not (root or sudo is required) and if so store whether sudo should be used or not. 150 | if [ "$is_root" = false ] && [ "$has_sudo" = false ]; then 151 | echo "Root or sudo is required. Aborting." 152 | exit 1 153 | elif [ "$is_root" = false ] ; then 154 | USE_SUDO=sudo 155 | else 156 | USE_SUDO= 157 | fi 158 | 159 | ## ----------------- 160 | ## Install 161 | ## ----------------- 162 | echo "Adding CUDA Repository" 163 | wget ${PIN_URL} 164 | $USE_SUDO mv ${PIN_FILENAME} /etc/apt/preferences.d/cuda-repository-pin-600 165 | wget ${KEYRING_PACKAGE_URL} && ${USE_SUDO} dpkg -i ${KERYRING_PACKAGE_FILENAME} && rm ${KERYRING_PACKAGE_FILENAME} 166 | $USE_SUDO add-apt-repository "deb ${REPO_URL} /" 167 | $USE_SUDO apt-get update 168 | 169 | echo "Installing CUDA packages ${CUDA_PACKAGES}" 170 | $USE_SUDO apt-get -y install ${CUDA_PACKAGES} 171 | 172 | if [[ $? -ne 0 ]]; then 173 | echo "CUDA Installation Error." 174 | exit 1 175 | fi 176 | 177 | ## ----------------- 178 | ## Set environment vars / vars to be propagated 179 | ## ----------------- 180 | 181 | CUDA_PATH=/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} 182 | echo "CUDA_PATH=${CUDA_PATH}" 183 | export CUDA_PATH=${CUDA_PATH} 184 | export PATH="$CUDA_PATH/bin:$PATH" 185 | export LD_LIBRARY_PATH="$CUDA_PATH/lib:$LD_LIBRARY_PATH" 186 | export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH" 187 | # Check nvcc is now available. 188 | nvcc -V 189 | 190 | # If executed on github actions, make the appropriate echo statements to update the environment 191 | if [[ $GITHUB_ACTIONS ]]; then 192 | # Set paths for subsequent steps, using ${CUDA_PATH} 193 | echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" 194 | echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV 195 | echo "${CUDA_PATH}/bin" >> $GITHUB_PATH 196 | echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV 197 | echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV 198 | fi 199 | -------------------------------------------------------------------------------- /cuBQL/builder/cpu/spatialMedian.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/bvh.h" 7 | #if CUBQL_CPU_BUILDER_IMPLEMENTATION 8 | #include 9 | #endif 10 | 11 | namespace cuBQL { 12 | namespace cpu { 13 | // ****************************************************************** 14 | // INTERFACE 15 | // (which functionality this header file provides) 16 | // ****************************************************************** 17 | 18 | /*! a simple (and currently non parallel) recursive spatial median 19 | builder */ 20 | template 21 | void spatialMedian(BinaryBVH &bvh, 22 | const box_t *boxes, 23 | uint32_t numPrims, 24 | BuildConfig buildConfig); 25 | 26 | // ****************************************************************** 27 | // IMPLEMENTATION 28 | // ****************************************************************** 29 | 30 | #if CUBQL_CPU_BUILDER_IMPLEMENTATION 31 | namespace spatialMedian_impl { 32 | struct Topo { 33 | struct { 34 | int offset; 35 | int count; 36 | } admin; 37 | }; 38 | 39 | inline void makeLeaf(int nodeID, int begin, int end, 40 | std::vector &topo) 41 | { 42 | auto &node = topo[nodeID]; 43 | node.admin.count = end-begin; 44 | node.admin.offset = begin; 45 | } 46 | 47 | inline int makeInner(int nodeID, 48 | std::vector &topo) 49 | { 50 | int childID = (int)topo.size(); 51 | topo.push_back({}); 52 | topo.push_back({}); 53 | auto &node = topo[nodeID]; 54 | node.admin.count = 0; 55 | node.admin.offset = childID; 56 | return childID; 57 | } 58 | 59 | template 60 | void buildRec(int nodeID, int begin, int end, 61 | std::vector &topo, 62 | std::vector &primIDs, 63 | std::vector &altPrimIDs, 64 | const box_t *boxes, 65 | BuildConfig buildConfig) 66 | { 67 | if (end-begin <= buildConfig.makeLeafThreshold) 68 | return makeLeaf(nodeID,begin,end,topo); 69 | 70 | using box_t = ::cuBQL::box_t; 71 | 72 | box_t centBounds; 73 | for (int i=begin;i 104 | void refit(uint64_t nodeID, 105 | BinaryBVH &bvh, 106 | const box_t *boxes) 107 | { 108 | auto &node = bvh.nodes[nodeID]; 109 | if (node.admin.count == 0) { 110 | refit(node.admin.offset+0,bvh,boxes); 111 | refit(node.admin.offset+1,bvh,boxes); 112 | node.bounds = box_t() 113 | .including(bvh.nodes[node.admin.offset+0].bounds) 114 | .including(bvh.nodes[node.admin.offset+1].bounds); 115 | } else { 116 | node.bounds.clear(); 117 | for (int i=0;i 123 | void spatialMedian(BinaryBVH &bvh, 124 | const box_t *boxes, 125 | int numPrims, 126 | BuildConfig buildConfig) 127 | { 128 | using box_t = ::cuBQL::box_t; 129 | std::vector primIDs; 130 | for (int i=0;i altPrimIDs(primIDs.size()); 136 | std::vector topo(1); 137 | 138 | buildRec(0,0,(int)primIDs.size(), 139 | topo,primIDs,altPrimIDs,boxes,buildConfig); 140 | altPrimIDs.clear(); 141 | bvh.primIDs = new uint32_t[primIDs.size()]; 142 | bvh.numPrims = (uint32_t)primIDs.size(); 143 | std::copy(primIDs.begin(),primIDs.end(),bvh.primIDs); 144 | primIDs.clear(); 145 | 146 | bvh.nodes = new typename BinaryBVH::Node[topo.size()]; 147 | bvh.numNodes = (uint32_t)topo.size(); 148 | for (int i=0;i<(int)topo.size();i++) { 149 | bvh.nodes[i].admin.count = topo[i].admin.count; 150 | bvh.nodes[i].admin.offset = topo[i].admin.offset; 151 | } 152 | topo.clear(); 153 | refit(0,bvh,boxes); 154 | } 155 | } // spatialMedian_impl 156 | 157 | /*! a simple (and currently non parallel) recursive spatial median 158 | builder */ 159 | template 160 | void spatialMedian(BinaryBVH &bvh, 161 | const box_t *boxes, 162 | uint32_t numPrims, 163 | BuildConfig buildConfig) 164 | { 165 | spatialMedian_impl::spatialMedian(bvh,boxes,numPrims,buildConfig); 166 | } 167 | 168 | 169 | template 170 | void spatialMedian(WideBVH &bvh, 171 | const box_t *boxes, 172 | uint32_t numPrims, 173 | BuildConfig buildConfig) 174 | { throw std::runtime_error("not yet implemented"); } 175 | 176 | #endif 177 | } 178 | } 179 | 180 | #define CUBQL_CPU_INSTANTIATE_BINARY_BVH(T,D) \ 181 | namespace cuBQL { \ 182 | namespace cpu { \ 183 | template void spatialMedian(BinaryBVH &bvh, \ 184 | const box_t *boxes, \ 185 | uint32_t numPrims, \ 186 | BuildConfig buildConfig); \ 187 | } \ 188 | } \ 189 | 190 | 191 | #define CUBQL_CPU_INSTANTIATE_WIDE_BVH(T,D,W) \ 192 | namespace cuBQL { \ 193 | namespace cpu { \ 194 | template void spatialMedian(WideBVH &bvh, \ 195 | const box_t *boxes, \ 196 | uint32_t numPrims, \ 197 | BuildConfig buildConfig); \ 198 | } \ 199 | } \ 200 | 201 | 202 | -------------------------------------------------------------------------------- /cuBQL/builder/cuda/wide_gpu_builder.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/builder/cuda/builder_common.h" 7 | 8 | namespace cuBQL { 9 | namespace gpuBuilder_impl { 10 | 11 | struct CollapseInfo { 12 | // careful: 'isWideRoot' and ''binaryRoot' get written to in 13 | // parallel by differnet threads; they must be in different atomic 14 | // words. 15 | struct { 16 | int32_t parent:31; 17 | uint32_t isWideRoot:1; 18 | }; 19 | /*! for *wide* nodes: the ID of the binary node that is the root 20 | of the treelet that this node maps to */ 21 | int32_t binaryRoot; 22 | 23 | /*! for *binary* nodes that re treelet root nodes: the ID of the 24 | wide node that it maps to */ 25 | int wideNodeID; 26 | }; 27 | 28 | template 29 | __global__ 30 | void collapseInit(int *d_numWideNodes, 31 | CollapseInfo *d_infos, 32 | BinaryBVH bvh) 33 | { 34 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 35 | if (tid >= bvh.numNodes) return; 36 | 37 | if (tid == 0) { 38 | *d_numWideNodes = 1; 39 | d_infos[0].parent = -1; 40 | d_infos[0].isWideRoot = 1; 41 | d_infos[0].wideNodeID = 0; 42 | d_infos[0].binaryRoot = -1; 43 | } 44 | 45 | auto &node = bvh.nodes[tid]; 46 | if (node.admin.count > 0) 47 | // leaf node 48 | return; 49 | 50 | // _could_ write this as a int4 ... we know it'll have to be 51 | // 128-bit aligned 52 | d_infos[node.admin.offset+0].isWideRoot = 0; 53 | d_infos[node.admin.offset+0].parent = tid; 54 | d_infos[node.admin.offset+0].binaryRoot = -1; 55 | d_infos[node.admin.offset+1].isWideRoot = 0; 56 | d_infos[node.admin.offset+1].parent = tid; 57 | d_infos[node.admin.offset+1].binaryRoot = -1; 58 | } 59 | 60 | template 61 | __global__ 62 | void collapseSummarize(int *d_numWideNodes, 63 | CollapseInfo *d_infos, 64 | BinaryBVH bvh) 65 | { 66 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 67 | if (tid >= bvh.numNodes) return; 68 | if (tid == 1) 69 | // bvh.node[1] is always unused 70 | return; 71 | 72 | int depth = 0; 73 | { 74 | int nodeID = tid; 75 | while (nodeID > 0) { 76 | depth++; 77 | nodeID = d_infos[nodeID].parent; 78 | } 79 | } 80 | 81 | const bool isWideNodeRoot 82 | = /* inner node: */ 83 | (bvh.nodes[tid].admin.count == 0) 84 | && /* on right level*/ 85 | ((depth % (log_of::value)) == 0) 86 | 87 | || /* special case: single-node BVH */ 88 | (bvh.numNodes == 1); 89 | 90 | if (!isWideNodeRoot) 91 | return; 92 | 93 | const int wideNodeID 94 | = (tid == 0) 95 | ? 0 96 | : atomicAdd(d_numWideNodes,1); 97 | d_infos[wideNodeID].binaryRoot = tid; 98 | d_infos[tid].isWideRoot = true; 99 | d_infos[tid].wideNodeID = wideNodeID; 100 | } 101 | 102 | 103 | template 104 | __global__ 105 | void collapseExecute(CollapseInfo *d_infos, 106 | WideBVH wideBVH, 107 | BinaryBVH binary) 108 | { 109 | int tid = threadIdx.x+blockIdx.x*blockDim.x; 110 | if (tid >= wideBVH.numNodes) 111 | return; 112 | 113 | int nodeStack[5], *stackPtr = nodeStack; 114 | int binaryRoot = d_infos[tid].binaryRoot; 115 | *stackPtr++ = binaryRoot; 116 | 117 | typename WideBVH::Node &target = wideBVH.nodes[tid]; 118 | int numWritten = 0; 119 | while (stackPtr > nodeStack) { 120 | int nodeID = *--stackPtr; 121 | auto &node = binary.nodes[nodeID]; 122 | if ((node.admin.count > 0) || 123 | ((nodeID != binaryRoot) && d_infos[nodeID].isWideRoot)) { 124 | target.children[numWritten].bounds = node.bounds; 125 | if (node.admin.count) { 126 | target.children[numWritten].offset = node.admin.offset; 127 | } else { 128 | target.children[numWritten].offset = d_infos[nodeID].wideNodeID; 129 | } 130 | target.children[numWritten].count = node.admin.count; 131 | target.children[numWritten].valid = 1; 132 | numWritten++; 133 | } else { 134 | *stackPtr++ = node.admin.offset+0; 135 | *stackPtr++ = node.admin.offset+1; 136 | } 137 | } 138 | while (numWritten < N) { 139 | target.children[numWritten].bounds.set_empty(); 140 | // lower 141 | // = make_float3(+INFINITY,+INFINITY,+INFINITY); 142 | // target.children[numWritten].bounds.upper 143 | // = make_float3(-INFINITY,-INFINITY,-INFINITY); 144 | target.children[numWritten].offset = (uint32_t)-1; 145 | target.children[numWritten].count = (uint32_t)-1; 146 | target.children[numWritten].valid = 0; 147 | ++numWritten; 148 | } 149 | } 150 | 151 | template 152 | void gpuBuilder(WideBVH &wideBVH, 153 | const box_t *boxes, 154 | uint32_t numBoxes, 155 | BuildConfig buildConfig, 156 | cudaStream_t s, 157 | GpuMemoryResource &memResource) 158 | { 159 | BinaryBVH binaryBVH; 160 | gpuBuilder(binaryBVH,boxes,numBoxes,buildConfig,s,memResource); 161 | 162 | int *d_numWideNodes; 163 | CollapseInfo *d_infos; 164 | _ALLOC(d_numWideNodes,1,s,memResource); 165 | _ALLOC(d_infos,binaryBVH.numNodes,s,memResource); 166 | // cudaMemset(d_infos,0,binaryBVH.numNodes*sizeof(*d_infos)); 167 | collapseInit<<>> 168 | (d_numWideNodes,d_infos,binaryBVH); 169 | collapseSummarize<<>> 170 | (d_numWideNodes,d_infos,binaryBVH); 171 | CUBQL_CUDA_CALL(StreamSynchronize(s)); 172 | 173 | CUBQL_CUDA_CALL(MemcpyAsync(&wideBVH.numNodes,d_numWideNodes, 174 | sizeof(int),cudaMemcpyDefault,s)); 175 | CUBQL_CUDA_CALL(StreamSynchronize(s)); 176 | _ALLOC(wideBVH.nodes,wideBVH.numNodes,s,memResource); 177 | 178 | collapseExecute<<>> 179 | (d_infos,wideBVH,binaryBVH); 180 | 181 | wideBVH.numPrims = binaryBVH.numPrims; 182 | wideBVH.primIDs = binaryBVH.primIDs; 183 | binaryBVH.primIDs = 0; 184 | 185 | CUBQL_CUDA_CALL(StreamSynchronize(s)); 186 | _FREE(d_infos,s,memResource); 187 | _FREE(d_numWideNodes,s,memResource); 188 | free(binaryBVH,s,memResource); 189 | } 190 | 191 | } // ::cuBQL::gpuBuilder_impl 192 | 193 | template 194 | void gpuBuilder(WideBVH &bvh, 195 | const box_t *boxes, 196 | uint32_t numBoxes, 197 | BuildConfig buildConfig, 198 | cudaStream_t s, 199 | GpuMemoryResource &memResource) 200 | { 201 | gpuBuilder_impl::gpuBuilder(bvh,boxes,numBoxes,buildConfig,s,memResource); 202 | } 203 | 204 | namespace cuda { 205 | template 206 | void free(WideBVH &bvh, 207 | cudaStream_t s, 208 | GpuMemoryResource &memResource) 209 | { 210 | CUBQL_CUDA_CALL(StreamSynchronize(s)); 211 | gpuBuilder_impl::_FREE(bvh.primIDs,s,memResource); 212 | gpuBuilder_impl::_FREE(bvh.nodes,s,memResource); 213 | // CUBQL_CUDA_CALL(FreeAsync(bvh.primIDs,s)); 214 | // CUBQL_CUDA_CALL(FreeAsync(bvh.nodes,s)); 215 | CUBQL_CUDA_CALL(StreamSynchronize(s)); 216 | bvh.primIDs = 0; 217 | } 218 | } 219 | } // :: cuBQL 220 | 221 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/crossingCount.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | /*! \file queries/triangles/crossingCount Implement a "ray-triangle 5 | crossing count" query 6 | 7 | In this query, the data model is a triangle mesh (with a cuBQL BVH 8 | built over it, obviously), and the query is a list of ray segments 9 | (given by origin point and direction vector, respectively. The job 10 | of the query is to perform a 'crossing count', where each ray is 11 | traced against the triangles, and for every triangle it 12 | intersects, increases or decreses a given per-ray counter: -1 for 13 | crossing _into_ a surface (ie, the ray hits the triangle on its 14 | "front" side), and +1 for every crossing _out of_ the surface (if 15 | ray intersects triangle's back side). 16 | 17 | */ 18 | 19 | #pragma once 20 | 21 | // for 'fixedRayQuery' 22 | #include "cuBQL/traversal/rayQueries.h" 23 | // the kind of model data we operate on 24 | #include "cuBQL/queries/triangleData/math/rayTriangleIntersections.h" 25 | 26 | /*! \namespace cuBQL - *cu*BQL based geometric *q*ueries */ 27 | namespace cuBQL { 28 | namespace triangles { 29 | 30 | // ============================================================================= 31 | // *** INTERFACE *** 32 | // ============================================================================= 33 | 34 | /*! returns (absolute) crossing count (ie, counting each triangle 35 | once, no matter how it is oriented wrt the query line) for a 36 | axis-aligned line that goes through point queryOrigin and is 37 | aligend to the axis'th coordinate axis. 38 | 39 | In theory, for any properly closed and outward-oriented surface 40 | mesh any given ray shot from a point should have a crossing 41 | count of +1 if that point was inside the mesh (the ray 'left' 42 | once more than it entered), and 0 it it was outside the mesh 43 | (every time it entered it also left). Note however that due to 44 | 'funny siutations' like rays like double-counting triangles if a 45 | ray happens to just hit a edge or vertex this cannot be 46 | absoltely relied on for any single ray. 47 | 48 | getTriangle is a lambda getTriangle(uint32_t primID)->Triangle 49 | */ 50 | template 51 | inline __cubql_both 52 | int signedCrossingCount(bvh3f bvh, 53 | GetTriangleLambda getTriangle, 54 | AxisAlignedRay ray); 55 | 56 | /*! defines a crossing count kernel. The struct itself defines the 57 | return values computed for this query, the 'compute()' method 58 | provides a device-side implementation of that kernel for a given 59 | set of inputs */ 60 | struct CrossingCount { 61 | // ====================== COMPUTED VALUES ====================== 62 | 63 | /* sum of all ray-triangle crossings, using "-1" for crossing 64 | _into_ a surface, and "+1" for crossing _out of_ a surface. in 65 | theory for a closed and properly outside-oriented surface and 66 | infinite-length query rays a point not inside the object should 67 | have value 0, no point should ever have values < 0 (because 68 | that would require a ray to enter an object and never leave 69 | it), and points inside the object should hae a value of exactly 70 | 1 (because it should cross out exactly once more than it 71 | crosses in). Caveat: for query rays hitting edges, vertices, or 72 | just numerically fancy configurations this theory will probably 73 | not match practice :-/ */ 74 | int crossingCount = 0; 75 | 76 | /*! total number of ray-triangle intersections, no matter which 77 | sign. note this *may* count certain surfaces twice if the ray 78 | happens to hit on an edge or vertex */ 79 | int totalCount = 0; 80 | 81 | // ====================== ACTUAL QUERIES ====================== 82 | 83 | /*! runs one complete crossing-count query; will compute 84 | crossing count for every triangle whose bounding box 85 | intersects the given ray */ 86 | inline __cubql_both 87 | void runQuery(const cuBQL::TriangleMesh mesh, 88 | const cuBQL::bvh3f bvh, 89 | const cuBQL::Ray queryRay); 90 | 91 | /*! runs one complete crossing-count query; will compute 92 | crossing count for every triangle whose bounding box 93 | intersects the given ray 94 | 95 | 'cuBQL::Triangle getTriangle(int triangleIdx)` is a lambda 96 | returning the triangle with given index, and allows the app to 97 | choose however it wants to store the triangles as long as it 98 | can return one on request.. 99 | */ 100 | template 101 | inline __cubql_both 102 | void runQuery(const cuBQL::bvh3f bvh, 103 | const GetTriangleLambda &getTriangle/* Triangle(*)(int) */, 104 | const cuBQL::AxisAlignedRay queryRay, 105 | bool dbg=false); 106 | 107 | /*! runs one complete crossing-count query; will compute 108 | crossing count for every triangle whose bounding box 109 | intersects the given ray */ 110 | template 111 | inline __cubql_both 112 | void runQuery(const cuBQL::TriangleMesh mesh, 113 | const cuBQL::bvh3f bvh, 114 | const cuBQL::AxisAlignedRay queryRay, 115 | bool dbg=false); 116 | }; 117 | 118 | // ============================================================================= 119 | // *** IMPLEMENTATION *** 120 | // ============================================================================= 121 | 122 | /*! runs one complete crossing-count query; will compute 123 | crossing count for every triangle whose bounding box 124 | intersects the given ray 125 | 126 | 'cuBQL::Triangle getTriangle(int triangleIdx)` is a lambda 127 | returning the triangle with given index, and allows the app to 128 | choose however it wants to store the triangles as long as it 129 | can return one on request.. 130 | */ 131 | template 132 | inline __cubql_both 133 | void CrossingCount::runQuery(const cuBQL::bvh3f bvh, 134 | const GetTriangleLambda &getTriangle, 135 | const cuBQL::AxisAlignedRay queryRay, 136 | bool dbg) 137 | { 138 | // reset to defaults 139 | *this = {0,0}; 140 | if (dbg) 141 | printf("#####################\ncrossing count query axis %i sign %i\n",axis,sign); 142 | auto perPrimCode = [getTriangle,this,queryRay,dbg](uint32_t triangleIdx)->int { 143 | const Triangle triangle = getTriangle(triangleIdx); 144 | #if 1 145 | if (rayIntersectsTriangle(queryRay,triangle,dbg)) { 146 | this->totalCount++; 147 | this->crossingCount 148 | += (dot(triangle.normal(),queryRay.direction()) > 0.f ? +1 : -1); 149 | } 150 | #else 151 | const Ray ray = queryRay.makeRay(); 152 | RayTriangleIntersection isec; 153 | if (isec.compute(ray,triangle)) { 154 | this->totalCount++; 155 | this->crossingCount 156 | += (dot(isec.N,ray.direction) > 0.f ? +1 : -1); 157 | } 158 | #endif 159 | return CUBQL_CONTINUE_TRAVERSAL; 160 | }; 161 | cuBQL::fixedRayQuery::forEachPrim(perPrimCode,bvh,queryRay,dbg); 162 | } 163 | 164 | 165 | template 166 | inline __cubql_both 167 | int signedCrossingCount(bvh3f bvh, 168 | GetTriangleLambda getTriangle, 169 | AxisAlignedRay queryRay, 170 | bool dbg=false) 171 | { 172 | CrossingCount cc; 173 | // AxisAlignedRay queryRay(queryPoint,0.f,+CUBQL_INF); 174 | cc.runQuery(bvh,getTriangle,queryRay,dbg); 175 | return cc.crossingCount; 176 | } 177 | 178 | } // ::cuBQL::triangles 179 | } // ::cuBQL 180 | -------------------------------------------------------------------------------- /cuBQL/math/affine.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "linear.h" 7 | 8 | namespace cuBQL { 9 | 10 | //////////////////////////////////////////////////////////////////////////////// 11 | // Affine Space 12 | //////////////////////////////////////////////////////////////////////////////// 13 | 14 | template 15 | struct AffineSpaceT 16 | { 17 | using vector_t = typename L::vector_t; 18 | using linear_t = L; 19 | using scalar_t = typename vector_t::scalar_t; 20 | 21 | linear_t l; /*< linear part of affine space */ 22 | vector_t p; /*< affine part of affine space */ 23 | 24 | //////////////////////////////////////////////////////////////////////////////// 25 | // Constructors, Assignment, Cast, Copy Operations 26 | //////////////////////////////////////////////////////////////////////////////// 27 | 28 | inline __cubql_both AffineSpaceT() 29 | : l(OneTy()), 30 | p(ZeroTy()) 31 | {} 32 | 33 | inline AffineSpaceT(const AffineSpaceT &other) = default; 34 | 35 | inline __cubql_both AffineSpaceT(const L &other) 36 | { 37 | l = other ; 38 | p = vector_t(ZeroTy()); 39 | } 40 | 41 | inline __cubql_both AffineSpaceT& operator=(const AffineSpaceT& other) 42 | { 43 | l = other.l; 44 | p = other.p; 45 | return *this; 46 | } 47 | 48 | inline __cubql_both AffineSpaceT(const vector_t& vx, 49 | const vector_t& vy, 50 | const vector_t& vz, 51 | const vector_t& p) 52 | : l(vx,vy,vz), 53 | p(p) 54 | {} 55 | 56 | inline __cubql_both AffineSpaceT(const L& l, 57 | const vector_t& p) 58 | : l(l), 59 | p(p) 60 | {} 61 | 62 | template inline __cubql_both AffineSpaceT( const AffineSpaceT& s ) 63 | : l(s.l), 64 | p(s.p) 65 | {} 66 | 67 | //////////////////////////////////////////////////////////////////////////////// 68 | // Constants 69 | //////////////////////////////////////////////////////////////////////////////// 70 | 71 | inline AffineSpaceT( ZeroTy ) : l(ZeroTy()), p(ZeroTy()) {} 72 | inline AffineSpaceT( OneTy ) : l(OneTy()), p(ZeroTy()) {} 73 | 74 | /*! return matrix for scaling */ 75 | static inline AffineSpaceT scale(const vector_t& s) { return L::scale(s); } 76 | 77 | /*! return matrix for translation */ 78 | static inline AffineSpaceT translate(const vector_t& p) { return AffineSpaceT(OneTy(),p); } 79 | 80 | /*! return matrix for rotation, only in 2D */ 81 | static inline AffineSpaceT rotate(const scalar_t &r) { return L::rotate(r); } 82 | 83 | /*! return matrix for rotation around arbitrary point (2D) or axis (3D) */ 84 | static inline AffineSpaceT rotate(const vector_t &u, 85 | const scalar_t &r) 86 | { return L::rotate(u,r);} 87 | 88 | /*! return matrix for rotation around arbitrary axis and point, only in 3D */ 89 | static inline AffineSpaceT rotate(const vector_t &p, 90 | const vector_t &u, 91 | const scalar_t &r) 92 | { return translate(+p) * rotate(u,r) * translate(-p); } 93 | 94 | /*! return matrix for looking at given point, only in 3D; right-handed coordinate system */ 95 | static inline AffineSpaceT lookat(const vector_t& eye, 96 | const vector_t& point, 97 | const vector_t& up) 98 | { 99 | vector_t Z = normalize(point-eye); 100 | vector_t U = normalize(cross(Z,up)); 101 | vector_t V = cross(U,Z); 102 | return AffineSpaceT(L(U,V,Z),eye); 103 | } 104 | 105 | }; 106 | 107 | //////////////////////////////////////////////////////////////////////////////// 108 | // Unary Operators 109 | //////////////////////////////////////////////////////////////////////////////// 110 | 111 | template inline AffineSpaceT operator -( const AffineSpaceT& a ) { return AffineSpaceT(-a.l,-a.p); } 112 | template inline AffineSpaceT operator +( const AffineSpaceT& a ) { return AffineSpaceT(+a.l,+a.p); } 113 | template 114 | inline __cubql_both 115 | AffineSpaceT rcp( const AffineSpaceT& a ) { 116 | L il = rcp(a.l); 117 | return AffineSpaceT(il,-(il*a.p)); 118 | } 119 | 120 | //////////////////////////////////////////////////////////////////////////////// 121 | // Binary Operators 122 | //////////////////////////////////////////////////////////////////////////////// 123 | 124 | template inline AffineSpaceT operator +( const AffineSpaceT& a, const AffineSpaceT& b ) { return AffineSpaceT(a.l+b.l,a.p+b.p); } 125 | template inline AffineSpaceT operator -( const AffineSpaceT& a, const AffineSpaceT& b ) { return AffineSpaceT(a.l-b.l,a.p-b.p); } 126 | 127 | template inline __cubql_both 128 | AffineSpaceT operator *(const typename AffineSpaceT::scalar_t &a, 129 | const AffineSpaceT &b ) 130 | { return AffineSpaceT(a*b.l,a*b.p); } 131 | 132 | template inline __cubql_both 133 | AffineSpaceT operator *( const AffineSpaceT& a, const AffineSpaceT& b ) 134 | { return AffineSpaceT(a.l*b.l,a.l*b.p+a.p); } 135 | 136 | template inline 137 | AffineSpaceT operator /( const AffineSpaceT& a, const AffineSpaceT& b ) 138 | { return a * rcp(b); } 139 | 140 | template inline 141 | AffineSpaceT operator/(const AffineSpaceT &a, 142 | const typename AffineSpaceT::scalar_t &b) 143 | { return a * rcp(b); } 144 | 145 | template inline 146 | AffineSpaceT& operator *=( AffineSpaceT& a, const AffineSpaceT& b ) 147 | { return a = a * b; } 148 | 149 | template inline 150 | AffineSpaceT &operator*=(AffineSpaceT &a, 151 | const typename AffineSpaceT::scalar_t &b) 152 | { return a = a * b; } 153 | 154 | template inline 155 | AffineSpaceT& operator /=( AffineSpaceT& a, const AffineSpaceT& b ) 156 | { return a = a / b; } 157 | 158 | template inline 159 | AffineSpaceT &operator/=(AffineSpaceT &a, 160 | const typename AffineSpaceT::scalar_t &b) 161 | { return a = a / b; } 162 | 163 | template inline __cubql_both 164 | typename AffineSpaceT::vector_t xfmPoint(const AffineSpaceT& m, 165 | const typename AffineSpaceT::vector_t &p) 166 | { 167 | return madd(vector_t(p.x),m.l.vx, 168 | madd(vector_t(p.y),m.l.vy, 169 | madd(vector_t(p.z),m.l.vz, 170 | m.p))); 171 | } 172 | 173 | template inline __cubql_both 174 | typename AffineSpaceT::vector_t xfmVector(const AffineSpaceT& m, 175 | const typename AffineSpaceT::vector_t& v) 176 | { return xfmVector(m.l,v); } 177 | 178 | template inline __cubql_both 179 | typename AffineSpaceT::vector_t xfmNormal(const AffineSpaceT& m, 180 | const typename AffineSpaceT::vector_t& n) 181 | { return xfmNormal(m.l,n); } 182 | 183 | 184 | //////////////////////////////////////////////////////////////////////////////// 185 | /// Comparison Operators 186 | //////////////////////////////////////////////////////////////////////////////// 187 | 188 | template inline 189 | bool operator ==( const AffineSpaceT& a, const AffineSpaceT& b ) 190 | { return a.l == b.l && a.p == b.p; } 191 | 192 | template inline 193 | bool operator !=( const AffineSpaceT& a, const AffineSpaceT& b ) 194 | { return a.l != b.l || a.p != b.p; } 195 | 196 | //////////////////////////////////////////////////////////////////////////////// 197 | // Output Operators 198 | //////////////////////////////////////////////////////////////////////////////// 199 | 200 | template inline std::ostream& operator<<(std::ostream& cout, const AffineSpaceT& m) { 201 | return cout << "{ l = " << m.l << ", p = " << m.p << " }"; 202 | } 203 | 204 | //////////////////////////////////////////////////////////////////////////////// 205 | // Type Aliases 206 | //////////////////////////////////////////////////////////////////////////////// 207 | 208 | using AffineSpace2f = AffineSpaceT; 209 | using AffineSpace3f = AffineSpaceT; 210 | using AffineSpace2d = AffineSpaceT; 211 | using AffineSpace3d = AffineSpaceT; 212 | 213 | //////////////////////////////////////////////////////////////////////////////// 214 | /*! Template Specialization for 2D: return matrix for rotation around point (rotation around arbitrarty vector is not meaningful in 2D) */ 215 | template<> inline AffineSpace2f AffineSpace2f::rotate(const vec2f& p, const float& r) 216 | { return translate(+p) * AffineSpace2f(LinearSpace2f::rotate(r)) * translate(-p); } 217 | 218 | 219 | using affine2f = AffineSpace2f; 220 | using affine3f = AffineSpace3f; 221 | using affine2d = AffineSpace2d; 222 | using affine3d = AffineSpace3d; 223 | } // ::cuBQL 224 | -------------------------------------------------------------------------------- /samples/common/Generator.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/queries/triangleData/Triangle.h" 7 | #include "samples/common/IO.h" 8 | 9 | namespace cuBQL { 10 | namespace samples { 11 | 12 | inline double defaultDomainSize() { return 100000.; } 13 | 14 | // ================================================================== 15 | /*! a 'point generator' is a class that implements a procedure to 16 | create a randomized set of points (of given type and 17 | dimensoins). In particular, this library allows for describing 18 | various point generators through a string, such as "uniform" 19 | (uniformly distributed points), "nrooks" (a n-rooks style 20 | distribution, see below), "clustered", etc. */ 21 | template 22 | struct PointGenerator { 23 | typedef std::shared_ptr SP; 24 | /*! create a set of requested number of elements with given 25 | generator seed*/ 26 | virtual std::vector> generate(int numRequested, int seed) = 0; 27 | 28 | // helper stuff to parse itself from cmd-line descriptor string 29 | static SP createFromString(const std::string &wholeString); 30 | 31 | static SP createAndParse(const char *&curr); 32 | virtual void parse(const char *¤tParsePos); 33 | 34 | }; 35 | 36 | // ================================================================== 37 | template 38 | struct BoxGenerator { 39 | typedef std::shared_ptr> SP; 40 | 41 | /*! create a set of requested number of elements with given 42 | generator seed*/ 43 | virtual std::vector> generate(int numRequested, int seed) = 0; 44 | 45 | static SP createFromString(const std::string &wholeString); 46 | 47 | static SP createAndParse(const char *&curr); 48 | virtual void parse(const char *¤tParsePos); 49 | }; 50 | 51 | 52 | 53 | 54 | // ================================================================== 55 | template 56 | struct UniformPointGenerator : public PointGenerator 57 | { 58 | UniformPointGenerator(vec_t lower = -defaultDomainSize(), 59 | vec_t upper = +defaultDomainSize()) 60 | : lower(lower), upper(upper) 61 | {} 62 | 63 | std::vector> generate(int numRequested, int seed) override; 64 | vec_t lower, upper; 65 | }; 66 | 67 | template 68 | struct UniformBoxGenerator : public BoxGenerator 69 | { 70 | std::vector> generate(int numRequested, int seed) override; 71 | }; 72 | 73 | /*! re-maps points from the 'default domain' to the domain specified 74 | by [lower,upper]. Ie, for float the defulat domain is [0,1]^N, 75 | so a poitn with coordinate x=1 would be re-mapped to 76 | x=lower. Note this does not require the input points to *be* 77 | inside that default domain - if they are outside the domain the 78 | generated points will simply be outside the target domain, 79 | too 80 | 81 | To create via generator string, use the string "remap x0 y0 82 | ... x1 y1 ... ", where x0,x1 etc are the lower coordinates of 83 | the target domain; x1, y1 etc are the upper bounds of the target 84 | domain, and is another generator that produces the input 85 | points. E.g., assuimng we'd be dealing with data, the string 86 | "remap 2 2 4 4 nrooks" would first generate points with the "nrooks" 87 | generator, then re-map those to [(2,2),(4,4)]. 88 | */ 89 | template 90 | struct RemapPointGenerator : public PointGenerator 91 | { 92 | RemapPointGenerator(); 93 | 94 | std::vector> generate(int numRequested, int seed) override; 95 | 96 | virtual void parse(const char *¤tParsePos); 97 | 98 | vec_t lower, upper; 99 | typename PointGenerator::SP source; 100 | }; 101 | template 102 | struct RemapBoxGenerator : public BoxGenerator 103 | { 104 | RemapBoxGenerator(); 105 | 106 | std::vector> generate(int numRequested, int seed) override; 107 | 108 | virtual void parse(const char *¤tParsePos); 109 | 110 | vec_t lower, upper; 111 | typename BoxGenerator::SP source; 112 | }; 113 | 114 | 115 | 116 | // ================================================================== 117 | template 118 | struct ClusteredPointGenerator : public PointGenerator 119 | { 120 | std::vector> generate(int numRequested, int seed) override; 121 | }; 122 | 123 | template 124 | struct ClusteredBoxGenerator : public BoxGenerator 125 | { 126 | void parse(const char *¤tParsePos) override; 127 | std::vector> generate(int numRequested, int seed) override; 128 | 129 | struct { 130 | float mean = -1.f, sigma = 0.f, scale = 1.f; 131 | } gaussianSize; 132 | struct { 133 | float min = -1.f, max = -1.f; 134 | } uniformSize; 135 | }; 136 | 137 | // ================================================================== 138 | /*! "nrooks": generate ~sqrt(N) N clusters of around sqrt(N) 139 | points each, and arrange thsoe in a n-rooks patterns */ 140 | template 141 | struct NRooksPointGenerator : public PointGenerator 142 | { 143 | std::vector> generate(int numRequested, int seed) override; 144 | }; 145 | 146 | // ================================================================== 147 | /*! "nrooks": same as n-rooks point generator (for the box centers), 148 | then surrounds each of these points with a box whose size can be 149 | controlled through various distributions */ 150 | template 151 | struct NRooksBoxGenerator : public BoxGenerator 152 | { 153 | std::vector> generate(int numRequested, int seed) override; 154 | void parse(const char *¤tParsePos) override; 155 | struct { 156 | float mean = -1.f, sigma = 0.f, scale = 1.f; 157 | } gaussianSize; 158 | struct { 159 | float min = -1.f, max = -1.f; 160 | } uniformSize; 161 | }; 162 | 163 | // ================================================================== 164 | /*! takes a file of triangles, and creates one box per 165 | triangle. will ignore the number of requested samples, and just 166 | return the boxes. Will only work for float3 data, and error-exit 167 | for all other cases T,D configurations. 168 | 169 | *must* be created with a a generator string that specifies a 170 | file (and format) to read those triangles from; this is 171 | specified through two strings: one for the format ('obj' for 172 | .obj files), and a second with a file name. E.g., to read 173 | triangles from bunny.obj, just the generator string "triangles 174 | obj bunny.obj" 175 | */ 176 | template 177 | struct TrianglesBoxGenerator : public BoxGenerator 178 | { 179 | std::vector> generate(int numRequested, int seed) override; 180 | 181 | void parse(const char *¤tParsePos) override; 182 | 183 | std::vector triangles; 184 | }; 185 | 186 | // ================================================================== 187 | /*! takes a file of triangles, then generates points by sampling 188 | these proportional to their surface area 189 | 190 | *must* be created with a a generator string that specifies a 191 | file (and format) to read those triangles from; this is 192 | specified through two strings: one for the format ('obj' for 193 | .obj files), and a second with a file name. E.g., to read 194 | triangles from bunny.obj, just the generator string "triangles 195 | obj bunny.obj" 196 | */ 197 | template 198 | struct TrianglesPointGenerator : public PointGenerator 199 | { 200 | std::vector> generate(int numRequested, int seed) override; 201 | 202 | void parse(const char *¤tParsePos) override; 203 | 204 | std::vector triangles; 205 | }; 206 | 207 | // ================================================================== 208 | 209 | /*! "mixture" generator - generates a new distributoin based by 210 | randomly picking between two input distributions */ 211 | template 212 | struct MixturePointGenerator : public PointGenerator { 213 | std::vector> generate(int numRequested, int seed) override; 214 | 215 | void parse(const char *¤tParsePos) override; 216 | 217 | typename PointGenerator::SP gen_a; 218 | typename PointGenerator::SP gen_b; 219 | float prob_a; 220 | }; 221 | 222 | /*! "mixture" generator - generates a new distributoin based by 223 | randomly picking between two input distributions */ 224 | template 225 | struct MixtureBoxGenerator : public BoxGenerator { 226 | std::vector> generate(int numRequested, int seed) override; 227 | 228 | void parse(const char *¤tParsePos) override; 229 | 230 | typename BoxGenerator::SP gen_a; 231 | typename BoxGenerator::SP gen_b; 232 | float prob_a; 233 | }; 234 | 235 | } // ::cuBQL::samples 236 | } // ::cuBQL 237 | 238 | 239 | -------------------------------------------------------------------------------- /cuBQL/queries/triangleData/math/rayTriangleIntersections.h: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #pragma once 5 | 6 | #include "cuBQL/queries/triangleData/Triangle.h" 7 | #include "cuBQL/math/Ray.h" 8 | 9 | namespace cuBQL { 10 | 11 | // ======================================================================== 12 | // *** INTERFACE *** 13 | // ======================================================================== 14 | 15 | // struct RayTriangleIntersection { 16 | // vec3f N; 17 | // float t,u,v; 18 | 19 | // inline __cubql_both bool compute(Ray ray, Triangle tri); 20 | // }; 21 | 22 | template 23 | struct RayTriangleIntersection_t { 24 | using vec3 = vec_t; 25 | T t=0,u=0,v=0; 26 | vec3 N; 27 | 28 | inline __cubql_both bool compute(const ray_t &ray, 29 | const triangle_t &tri, 30 | bool dbg=false); 31 | }; 32 | 33 | using RayTriangleIntersection = RayTriangleIntersection_t; 34 | 35 | // ======================================================================== 36 | // *** IMPLEMENTATION *** 37 | // ======================================================================== 38 | 39 | template 40 | inline __cubql_both 41 | bool RayTriangleIntersection_t::compute(const ray_t &ray, 42 | const triangle_t &tri, 43 | bool dbg) 44 | { 45 | using vec3 = vec_t; 46 | const vec3 v0(tri.a); 47 | const vec3 v1(tri.b); 48 | const vec3 v2(tri.c); 49 | 50 | const vec3 e1 = v1-v0; 51 | const vec3 e2 = v2-v0; 52 | 53 | N = cross(e1,e2); 54 | if (N == vec3(T(0))) 55 | return false; 56 | 57 | if (abst(dot(ray.direction,N)) < T(1e-12)) return false; 58 | 59 | // P = o+td 60 | // dot(P-v0,N) = 0 61 | // dot(o+td-v0,N) = 0 62 | // dot(td,N)+dot(o-v0,N)=0 63 | // t*dot(d,N) = -dot(o-v0,N) 64 | // t = -dot(o-v0,N)/dot(d,N) 65 | t = -dot(ray.origin-v0,N)/dot(ray.direction,N); 66 | if (t <= ray.tMin || t >= ray.tMax) return false; 67 | 68 | vec3 P = (ray.origin - v0) + t*ray.direction; 69 | 70 | T e1u,e2u,Pu; 71 | T e1v,e2v,Pv; 72 | if (abst(N.x) >= max(abst(N.y),abst(N.z))) { 73 | e1u = e1.y; e2u = e2.y; Pu = P.y; 74 | e1v = e1.z; e2v = e2.z; Pv = P.z; 75 | } else if (abst(N.y) > abst(N.z)) { 76 | e1u = e1.x; e2u = e2.x; Pu = P.x; 77 | e1v = e1.z; e2v = e2.z; Pv = P.z; 78 | } else { 79 | e1u = e1.x; e2u = e2.x; Pu = P.x; 80 | e1v = e1.y; e2v = e2.y; Pv = P.y; 81 | } 82 | auto det = [](T a, T b, T c, T d) -> T 83 | { return a*d - c*b; }; 84 | 85 | // P = v0 + u * e1 + v * e2 + h * N 86 | // (P-v0) = [e1,e2]*(u,v,h) 87 | if (det(e1u,e1v,e2u,e2v) == T(0)) return false; 88 | 89 | #if 0 90 | T den = det(e1u,e2u,e1v,e2v); 91 | T sign = den < T(0) ? T(-1):T(1); 92 | den *= sign; 93 | T den_u = sign*det(Pu,e2u,Pv,e2v); 94 | if (den_u < T(0)) return false; 95 | T den_v = sign*det(e1u,Pu,e1v,Pv); 96 | if (den_v < T(0)) return false; 97 | if (den_u + den_v > den) return false; 98 | T rcp_den = rcp(den); 99 | u = den_u * rcp_den; 100 | v = den_v * rcp_den; 101 | #else 102 | u = det(Pu,e2u,Pv,e2v)/det(e1u,e2u,e1v,e2v); 103 | v = det(e1u,Pu,e1v,Pv)/det(e1u,e2u,e1v,e2v); 104 | 105 | if ((u < T(0)) || (v < T(0)) || ((u+v) > T(1))) return false; 106 | #endif 107 | return true; 108 | } 109 | 110 | // inline __cubql_both 111 | // bool RayTriangleIntersection::compute(Ray ray, Triangle tri) 112 | // { 113 | // const vec3f v0 = tri.a; 114 | // const vec3f v1 = tri.b; 115 | // const vec3f v2 = tri.c; 116 | 117 | // const vec3f e1 = v1-v0; 118 | // const vec3f e2 = v2-v0; 119 | 120 | // vec3f N = cross(e1,e2); 121 | // if (fabsf(dot(ray.direction,N)) < 1e-12f) return false; 122 | 123 | // t = -dot(ray.origin-v0,N)/dot(ray.direction,N); 124 | 125 | // if (t <= 0.f || t >= ray.tMax) return false; 126 | 127 | // vec3f P = ray.origin - v0 + t*ray.direction; 128 | 129 | // float e1u,e2u,Pu; 130 | // float e1v,e2v,Pv; 131 | // if (fabsf(N.x) >= max(fabsf(N.y),fabsf(N.z))) { 132 | // e1u = e1.y; e2u = e2.y; Pu = P.y; 133 | // e1v = e1.z; e2v = e2.z; Pv = P.z; 134 | // } else if (fabsf(N.y) > fabsf(N.z)) { 135 | // e1u = e1.x; e2u = e2.x; Pu = P.x; 136 | // e1v = e1.z; e2v = e2.z; Pv = P.z; 137 | // } else { 138 | // e1u = e1.x; e2u = e2.x; Pu = P.x; 139 | // e1v = e1.y; e2v = e2.y; Pv = P.y; 140 | // } 141 | // auto det = [](float a, float b, float c, float d) -> float 142 | // { return a*d - c*b; }; 143 | 144 | // // P = v0 + u * e1 + v * e2 + h * N 145 | // // (P-v0) = [e1,e2]*(u,v,h) 146 | // if (det(e1u,e1v,e2u,e2v) == 0.f) return false; 147 | 148 | // u = det(Pu,e2u,Pv,e2v)/det(e1u,e2u,e1v,e2v); 149 | // v = det(e1u,Pu,e1v,Pv)/det(e1u,e2u,e1v,e2v); 150 | // if ((u < 0.f) || (v < 0.f) || ((u+v) >= 1.f)) return false; 151 | 152 | // return true; 153 | // } 154 | 155 | 156 | 157 | 158 | template 159 | inline __cubql_both 160 | bool rayIntersectsTriangle(AxisAlignedRay ray, 161 | Triangle triangle, 162 | bool dbg=false) 163 | { 164 | const vec3f dir = ray.direction(); 165 | const vec3f org = ray.origin; 166 | 167 | if (dbg) { 168 | dout << "-----------\ntriangle " << triangle << "\n"; 169 | } 170 | using cuBQL::dot; 171 | using cuBQL::cross; 172 | 173 | vec3f n = triangle.normal(); 174 | if (dbg) dout << "normal " << n << endl; 175 | if (dbg) dout << "dir " << dir << endl; 176 | 177 | float cosND = dot(n,dir); 178 | if (cosND == 0.f) 179 | /* iw - this is debatable - a perfectly parallel triangle may 180 | still have the ray 'intersect' if its in the same 2D plane */ 181 | return false; 182 | 183 | float t = -dot(org-triangle.a,n)/cosND; 184 | if (t <= ray.tMin || t >= ray.tMax) { 185 | if (dbg) dout << " -> not in interval" << endl; 186 | return false; 187 | } 188 | 189 | vec3f a = triangle.a; 190 | vec3f b = triangle.b; 191 | vec3f c = triangle.c; 192 | // transform triangle into space centered aorund line origin 193 | a = a - org; 194 | b = b - org; 195 | c = c - org; 196 | 197 | auto pluecker=[](vec3f a0, vec3f a1, vec3f b0, vec3f b1) 198 | { return dot(a1-a0,cross(b1,b0))+dot(b1-b0,cross(a1,a0)); }; 199 | 200 | // compute pluecker coordinates dot product of all edges wrt x 201 | // axis ray. since the ray is mostly 0es and 1es, this shold all 202 | // evaluate to some fairly simple expressions 203 | float sx = pluecker(vec3f(0.f),dir,a,b); 204 | float sy = pluecker(vec3f(0.f),dir,b,c); 205 | float sz = pluecker(vec3f(0.f),dir,c,a); 206 | if (dbg) dout << "pluecker " << sx << " " << sy << " " << sz << endl; 207 | // for ray to be inside edges it must have all positive or all 208 | // negative pluecker winding order 209 | auto min3=[](float x, float y, float z) 210 | { return min(min(x,y),z); }; 211 | auto max3=[](float x, float y, float z) 212 | { return max(max(x,y),z); }; 213 | if (min3(sx,sy,sz) >= 0.f || max3(sx,sy,sz) <= 0.f) { 214 | if (dbg) dout << " -> HIT\n"; 215 | return true; 216 | } 217 | 218 | if (dbg) dout << " -> MISS\n"; 219 | return false; 220 | } 221 | 222 | 223 | inline __cubql_both 224 | bool rayIntersectsTriangle(Ray ray, 225 | Triangle triangle, 226 | bool dbg=false) 227 | { 228 | vec3f org = ray.origin; 229 | vec3f dir = ray.direction; 230 | 231 | if (dbg) { 232 | dout << "-----------\ntriangle " << triangle << "\n"; 233 | } 234 | using cuBQL::dot; 235 | using cuBQL::cross; 236 | 237 | vec3f n = triangle.normal(); 238 | if (dbg) dout << "normal " << n << endl; 239 | if (dbg) dout << "dir " << dir << endl; 240 | 241 | float cosND = dot(n,dir); 242 | if (cosND == 0.f) 243 | /* iw - this is debatable - a perfectly parallel triangle may 244 | still have the ray 'intersect' if its in the same 2D plane */ 245 | return false; 246 | 247 | float t = -dot(org-triangle.a,n)/cosND; 248 | if (t <= ray.tMin || t >= ray.tMax) { 249 | if (dbg) dout << " -> not in interval" << endl; 250 | return false; 251 | } 252 | 253 | vec3f a = triangle.a; 254 | vec3f b = triangle.b; 255 | vec3f c = triangle.c; 256 | // transform triangle into space centered aorund line origin 257 | a = a - org; 258 | b = b - org; 259 | c = c - org; 260 | 261 | auto pluecker=[](vec3f a0, vec3f a1, vec3f b0, vec3f b1) 262 | { return dot(a1-a0,cross(b1,b0))+dot(b1-b0,cross(a1,a0)); }; 263 | 264 | // compute pluecker coordinates dot product of all edges wrt x 265 | // axis ray. since the ray is mostly 0es and 1es, this shold all 266 | // evaluate to some fairly simple expressions 267 | float sx = pluecker(vec3f(0.f),dir,a,b); 268 | float sy = pluecker(vec3f(0.f),dir,b,c); 269 | float sz = pluecker(vec3f(0.f),dir,c,a); 270 | if (dbg) dout << "pluecker " << sx << " " << sy << " " << sz << endl; 271 | // float sx = pluecker(beg,dir,a,b-a); 272 | // float sy = pluecker(beg,dir,b,c-b); 273 | // float sz = pluecker(beg,dir,c,a-c); 274 | // for ray to be inside edges it must have all positive or all 275 | // negative pluecker winding order 276 | auto min3=[](float x, float y, float z) 277 | { return min(min(x,y),z); }; 278 | auto max3=[](float x, float y, float z) 279 | { return max(max(x,y),z); }; 280 | if (min3(sx,sy,sz) >= 0.f || max3(sx,sy,sz) <= 0.f) { 281 | if (dbg) dout << " -> HIT\n"; 282 | return true; 283 | } 284 | 285 | if (dbg) dout << " -> MISS\n"; 286 | return false; 287 | } 288 | 289 | } 290 | 291 | --------------------------------------------------------------------------------