├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── SpatialCL ├── binary_utils.hpp ├── bit_manipulation.hpp ├── cl_utils.hpp ├── configuration.hpp ├── grid.hpp ├── hilbert_curve.hpp ├── math │ └── geometry.hpp ├── query.hpp ├── query │ ├── query_base.hpp │ ├── query_engine_dfs.hpp │ ├── query_engine_grouped_dfs.hpp │ ├── query_knn.hpp │ └── query_range.hpp ├── sfc_position_generator.hpp ├── tree.hpp ├── tree │ ├── binary_tree.hpp │ ├── particle_bvh_sfc_tree.hpp │ ├── particle_bvh_tree.hpp │ └── particle_tree.hpp ├── types.hpp └── zcurve.hpp ├── benchmarks ├── CMakeLists.txt └── range_query_on_grid │ ├── CMakeLists.txt │ └── range_query_on_grid.cpp ├── common ├── environment.hpp ├── random_vectors.hpp ├── timer.hpp ├── verification_knn.hpp └── verification_range.hpp ├── examples ├── CMakeLists.txt └── nbody │ ├── CMakeLists.txt │ ├── model.hpp │ ├── nbody.cpp │ ├── nbody.hpp │ ├── nbody_tree.hpp │ └── particle_renderer.hpp └── tests ├── CMakeLists.txt ├── construction ├── CMakeLists.txt └── basic.cpp ├── knn_query ├── CMakeLists.txt └── knn_query.cpp └── range_query ├── CMakeLists.txt └── range_query.cpp /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "QCL"] 2 | path = QCL 3 | url = git://github.com/illuhad/QuickCL.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -Wno-ignored-attributes -Wno-unused-parameter") 3 | set(CMAKE_CXX_FLAGS_RELEASE "-g -O3 -march=native -ffast-math -Wall -Wno-ignored-attributes -Wextra -Wno-unused-parameter") 4 | 5 | 6 | cmake_minimum_required (VERSION 2.6) 7 | project (SpatialCL) 8 | 9 | if(NOT CMAKE_BUILD_TYPE) 10 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING 11 | "Choose the type of build, options are: Debug Release" 12 | FORCE) 13 | endif(NOT CMAKE_BUILD_TYPE) 14 | 15 | include(CheckCXXCompilerFlag) 16 | CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11) 17 | CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X) 18 | if(COMPILER_SUPPORTS_CXX11) 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 20 | elseif(COMPILER_SUPPORTS_CXX0X) 21 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") 22 | else() 23 | message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") 24 | endif() 25 | 26 | find_package(OpenCL REQUIRED) 27 | find_package(Boost 28 | 1.61 # Minimum version 29 | REQUIRED) # Fail with error if Boost is not found 30 | 31 | 32 | include_directories(${PROJECT_BINARY_DIR} ${PROJECT_SOURCE_DIR} ${OpenCL_INCLUDE_DIRS}) 33 | subdirs(tests examples benchmarks) 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, 2018 Aksel Alpay 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SpatialCL - a high performance library for the spatial processing of particles on GPUs 2 | 3 | SpatialCL is a library providing an efficient tree data structure for GPUs to spatially index particles in 2D or 3D, as well as optimized parallel query algorithms. Several query algorithms are supported, each optimized for the parallel execution of many independent queries. 4 | 5 | ## Features 6 | * Fully parallelized for GPUs using OpenCL, including the tree construction 7 | * Both 2D and 3D is supported in both single and double precision 8 | * No performance penalty if large empty regions are present, and no additional memory is used for empty regions. This makes SpatialCL suitable for highly clustered particle distributions. 9 | * Apart from their coordinates, particles can carry up to 14 (2D) or 13 (3D) additional floating point values (e.g. masses, charges, velocity, ...) 10 | * The query algorithms are separated from the query handler, which controls what the query actually returns and how it executes. This allows the user to formulate custom queries. During query execution, these custom query handlers are tightly integrated into the existing query algorithms, resulting in extremely low overhead and high performance. 11 | * Out of the box, query handlers for range queries and KNN queries (work in progress) are provided. 12 | 13 | Target applications include 14 | * Spatial point/particle processing in general 15 | * Point-cloud based computer graphics 16 | * Scientific applications based on particles, such as N-body simulations or smoothed particle hydrodynamics. 17 | 18 | The power of SpatialCL is illustrated by the provided `nbody` example, which implements a complete N-Body simulation using a Barnes-Hut gravitational tree code on GPUs. This tree is powered by SpatialCL with a custom query handler to efficiently calculate the acceleration of each particle. 19 | 20 | ## Building SpatialCL 21 | 22 | SpatialCL itself is a header-only library, so no special build steps are required to use SpatialCL in your application. In order to compile the examples, benchmarks and tests, execute 23 | ``` 24 | $ mkdir build 25 | $ cd build 26 | $ cmake 27 | $ make 28 | ``` 29 | -------------------------------------------------------------------------------- /SpatialCL/binary_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include 30 | 31 | namespace spatialcl{ 32 | namespace utils{ 33 | namespace binary{ 34 | 35 | template 36 | struct small_binary_logarithm 37 | { 38 | static constexpr bool is_input_valid = false; 39 | }; 40 | 41 | #define SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(N, val) \ 42 | template <> \ 43 | struct small_binary_logarithm \ 44 | { \ 45 | static constexpr std::size_t value = val; \ 46 | static constexpr bool is_input_valid = true; \ 47 | }; 48 | 49 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(1, 0) 50 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(2, 1) 51 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(4, 2) 52 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(8, 3) 53 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(16, 4) 54 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(32, 5) 55 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(64, 6) 56 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(128, 7) 57 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(256, 8) 58 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(512, 9) 59 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(1024, 10) 60 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(2048, 11) 61 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(4096, 12) 62 | SPATIALCL_IMPL_DECLARE_BINARY_LOGARITHM(8192, 13) 63 | 64 | template 65 | struct is_small_power2 66 | { 67 | static constexpr bool value = small_binary_logarithm::is_input_valid; 68 | }; 69 | 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /SpatialCL/bit_manipulation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef BIT_MANIPULATION_HPP 30 | #define BIT_MANIPULATION_HPP 31 | 32 | #include "configuration.hpp" 33 | 34 | namespace spatialcl { 35 | 36 | QCL_STANDALONE_MODULE(bit_manipulation) 37 | QCL_STANDALONE_SOURCE 38 | ( 39 | QCL_RAW( 40 | ulong bit_split3(uint input) 41 | { 42 | ulong x = input; 43 | x &= 0x1fffff; 44 | x = (x | x << 32) & 0x1f00000000ffff; 45 | x = (x | x << 16) & 0x1f0000ff0000ff; 46 | x = (x | x << 8) & 0x100f00f00f00f00f; 47 | x = (x | x << 4) & 0x10c30c30c30c30c3; 48 | x = (x | x << 2) & 0x1249249249249249; 49 | 50 | return x; 51 | } 52 | 53 | ulong bit_split2(uint input) 54 | { 55 | ulong x = input; 56 | 57 | //x &= 0xffffffff // not necessary because we are using all 32 bits 58 | x = (x | x << 16) & 0xffff0000ffff; 59 | x = (x | x << 8) & 0xff00ff00ff00ff; 60 | x = (x | x << 4) & 0xf0f0f0f0f0f0f0f; 61 | x = (x | x << 2) & 0x3333333333333333; 62 | x = (x | x << 1) & 0x5555555555555555; 63 | 64 | return x; 65 | } 66 | 67 | ulong n_bits_set(uint n) 68 | { 69 | // shifting a 64 bit integer by 64 bits is undefined on most architectures, 70 | // hence we need special treatment for ths case. 71 | if(n == 64) 72 | return ~0ul; 73 | 74 | ulong result = 1ul << n; 75 | return result - 1; 76 | } 77 | 78 | ulong interleave_bits2(uint a, uint b) 79 | { 80 | return bit_split2(a) | (bit_split2(b) << 1); 81 | } 82 | 83 | ulong interleave_bits3(uint a, uint b, uint c) 84 | { 85 | return bit_split3(a) | (bit_split3(b) << 1) | (bit_split3(c) << 2); 86 | } 87 | 88 | uint find_most_significant_bit(ulong x) 89 | { 90 | x |= (x >> 1); 91 | x |= (x >> 2); 92 | x |= (x >> 4); 93 | x |= (x >> 8); 94 | x |= (x >> 16); 95 | x |= (x >> 32); 96 | return(x & ~(x >> 1)); 97 | } 98 | 99 | ulong get_next_power_of_two(ulong x) 100 | { 101 | x--; 102 | x |= x >> 1; 103 | x |= x >> 2; 104 | x |= x >> 4; 105 | x |= x >> 8; 106 | x |= x >> 16; 107 | x |= x >> 32; 108 | x++; 109 | 110 | return x; 111 | } 112 | ) 113 | ) 114 | 115 | } 116 | 117 | #endif 118 | -------------------------------------------------------------------------------- /SpatialCL/cl_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | 30 | #ifndef CL_UTILS_HPP 31 | #define CL_UTILS_HPP 32 | 33 | #include 34 | #include 35 | #include 36 | 37 | namespace cl_utils { 38 | 39 | #define NODEBUG 40 | 41 | #ifdef NODEBUG 42 | const int CL_NODEBUG = 1; 43 | #else 44 | const int CL_NODEBUG = 0; 45 | #endif 46 | 47 | QCL_STANDALONE_MODULE(debug) 48 | QCL_STANDALONE_SOURCE 49 | ( 50 | QCL_IMPORT_CONSTANT(CL_NODEBUG) 51 | R"( 52 | #if CL_NODEBUG == 0 53 | #define NAMED_ASSERT(name, cond) \ 54 | if(!(cond)) \ 55 | printf("Assert failed: %s, Line %d\n", name, __LINE__) 56 | #define ASSERT(cond) \ 57 | if(!(cond)) \ 58 | printf("Assert failed: %s, Line %d\n", __FILE__, __LINE__) 59 | #else 60 | #define ASSERT(cond) 61 | #define NAMED_ASSERT(name, cond) 62 | #endif 63 | )" 64 | ) 65 | 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /SpatialCL/configuration.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef CONFIGURATION_HPP 30 | #define CONFIGURATION_HPP 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | #include "types.hpp" 37 | 38 | namespace spatialcl { 39 | namespace type_descriptor{ 40 | 41 | template 44 | struct generic 45 | { 46 | using scalar = Scalar_type; 47 | static constexpr std::size_t dimension = Dimension; 48 | static constexpr std::size_t particle_dimension = Num_particle_components; 49 | 50 | static_assert(particle_dimension >= dimension, "Number of components of a particle must" 51 | " be greater or equal to the dimensionality of" 52 | " the problem."); 53 | }; 54 | 55 | template 56 | using single_precision2d = generic; 57 | 58 | template 59 | using single_precision3d = generic; 60 | 61 | template 62 | using double_precision2d = generic; 63 | 64 | template 65 | using double_precision3d = generic; 66 | 67 | } 68 | 69 | template 70 | struct configuration 71 | { 72 | QCL_MAKE_MODULE(configuration) 73 | 74 | using scalar = typename Type_descriptor::scalar; 75 | using cell_index_type = typename cl_vector_type::value; 76 | using int_vector_type = typename cl_vector_type::value; 77 | using vector_type = typename cl_vector_type::value; 78 | using particle_type = typename cl_vector_type::value; 79 | static constexpr std::size_t dimension = Type_descriptor::dimension; 80 | 81 | static_assert(dimension == 2 || dimension == 3, 82 | "Only 2D and 3D is supported"); 83 | 84 | private: 85 | QCL_MAKE_SOURCE( 86 | QCL_IMPORT_TYPE(cell_index_type) 87 | QCL_IMPORT_TYPE(vector_type) 88 | QCL_IMPORT_TYPE(int_vector_type) 89 | QCL_IMPORT_TYPE(particle_type) 90 | QCL_IMPORT_TYPE(scalar) 91 | QCL_IMPORT_CONSTANT(dimension) 92 | R"( 93 | #if dimension == 2 94 | #define PARTICLE_POSITION(p) p.s01 95 | #define CONVERT_VECTOR_TO_CELL_INDEX(v) convert_uint2(v) 96 | #define VECTOR_NORM2(v) dot(v,v) 97 | #define CLIP_TO_VECTOR(v) ((v).xy) 98 | #elif dimension == 3 99 | #define PARTICLE_POSITION(p) p.s0123 100 | #define CONVERT_VECTOR_TO_CELL_INDEX(v) convert_uint4(v) 101 | #define VECTOR_NORM2(v) dot((v).s012, (v).s012) 102 | #define CLIP_TO_VECTOR(v) ((v).xyzw) 103 | #else 104 | #error Invalid dimension, only 2d and 3d is supported. 105 | #endif 106 | 107 | #if dimension == 2 108 | #define DIMENSIONALITY_SWITCH(case2d, case3d) case2d 109 | #elif dimension == 3 110 | #define DIMENSIONALITY_SWITCH(case2d, case3d) case3d 111 | #else 112 | #error Invalid dimension. 113 | #endif 114 | 115 | )" 116 | ) 117 | }; 118 | 119 | template 120 | class tree_configuration 121 | { 122 | public: 123 | QCL_MAKE_MODULE(tree_configuration) 124 | 125 | using type_system = typename Tree_type::type_system; 126 | using node_type0 = typename Tree_type::node_type0; 127 | using node_type1 = typename Tree_type::node_type1; 128 | 129 | private: 130 | QCL_MAKE_SOURCE( 131 | QCL_INCLUDE_MODULE(configuration) 132 | QCL_IMPORT_TYPE(node_type0) 133 | QCL_IMPORT_TYPE(node_type1) 134 | QCL_RAW() 135 | ) 136 | }; 137 | 138 | } 139 | 140 | #endif 141 | -------------------------------------------------------------------------------- /SpatialCL/grid.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef GRID_HPP 30 | #define GRID_HPP 31 | 32 | 33 | #include "configuration.hpp" 34 | 35 | namespace spatialcl { 36 | 37 | template 38 | class grid 39 | { 40 | public: 41 | QCL_MAKE_MODULE(grid) 42 | private: 43 | QCL_MAKE_SOURCE 44 | ( 45 | QCL_INCLUDE_MODULE(configuration) 46 | QCL_RAW( 47 | 48 | typedef struct 49 | { 50 | vector_type min_corner; 51 | vector_type cell_sizes; 52 | } grid_t; 53 | 54 | void grid_init(grid_t* ctx, 55 | vector_type grid_min, 56 | vector_type grid_max, 57 | ulong num_cells_per_dim) 58 | { 59 | ctx->min_corner = grid_min; 60 | ctx->cell_sizes = (grid_max - grid_min) / num_cells_per_dim; 61 | } 62 | 63 | cell_index_type grid_get_cell(grid_t* ctx, 64 | vector_type point, 65 | ulong num_cells_per_dim) 66 | { 67 | vector_type float_cell_index = (point - ctx->min_corner) / ctx->cell_sizes; 68 | cell_index_type result = CONVERT_VECTOR_TO_CELL_INDEX(float_cell_index); 69 | 70 | return result; 71 | } 72 | ) 73 | ) 74 | }; 75 | } 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /SpatialCL/hilbert_curve.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef HILBERT_CURVE_HPP 30 | #define HILBERT_CURVE_HPP 31 | 32 | #include "configuration.hpp" 33 | #include "bit_manipulation.hpp" 34 | #include "sfc_position_generator.hpp" 35 | #include "grid.hpp" 36 | 37 | 38 | namespace spatialcl { 39 | namespace space_filling_curve { 40 | 41 | template 42 | class hilbert_curve : public position_generator 43 | { 44 | public: 45 | QCL_MAKE_MODULE(hilbert_curve) 46 | 47 | using vector_type = typename configuration::vector_type; 48 | 49 | virtual void operator()(const qcl::device_context_ptr& ctx, 50 | const vector_type& particles_min, 51 | const vector_type& particles_max, 52 | const cl::Buffer& particles, 53 | cl_ulong num_particles, 54 | const cl::Buffer& out) const override 55 | { 56 | cl::NDRange global_size{num_particles}; 57 | cl::NDRange local_size{128}; 58 | 59 | qcl::kernel_call gen_position = this->generate_hilbert_position(ctx,global_size,local_size); 60 | cl_int err = gen_position(particles_min, particles_max, particles, num_particles, out); 61 | 62 | qcl::check_cl_error(err, "Could not enqueue generate_hilbert_position kernel"); 63 | } 64 | 65 | virtual ~hilbert_curve(){} 66 | 67 | static constexpr unsigned hilbert_num_cells2d = 0xffffffff; 68 | static constexpr unsigned hilbert_num_resolved_levels2d = 32; 69 | 70 | static constexpr unsigned hilbert_num_cells3d = 0x1fffff; //21 bits set 71 | static constexpr unsigned hilbert_num_resolved_levels3d = 21; 72 | 73 | private: 74 | QCL_ENTRYPOINT(generate_hilbert_position) 75 | QCL_MAKE_SOURCE 76 | ( 77 | QCL_INCLUDE_MODULE(configuration) 78 | QCL_INCLUDE_MODULE(grid) 79 | QCL_INCLUDE_MODULE(bit_manipulation) 80 | QCL_IMPORT_CONSTANT(hilbert_num_cells2d) 81 | QCL_IMPORT_CONSTANT(hilbert_num_resolved_levels2d) 82 | QCL_IMPORT_CONSTANT(hilbert_num_cells3d) 83 | QCL_IMPORT_CONSTANT(hilbert_num_resolved_levels3d) 84 | QCL_RAW( 85 | 86 | typedef ulong hilbert_key; 87 | 88 | typedef uint2 hilbert_cell_indices2d; 89 | typedef uint4 hilbert_cell_indices3d; 90 | 91 | // This calculation of the position along the hilbert 92 | // curve is based on the algorithm presented in: 93 | // John Skilling: Programming the Hilbert curve 94 | // AIP Conference Proceedings 707, 381 (2004); doi: 10.1063/1.1751381 95 | hilbert_key hilbert_position2d(hilbert_cell_indices2d pos) 96 | { 97 | uint X [2]; 98 | X[0] = pos.x; 99 | X[1] = pos.y; 100 | 101 | const int n = 2; 102 | const int bits = 32; 103 | 104 | uint M = 1U << (bits - 1); 105 | uint P; 106 | uint Q; 107 | uint t; 108 | int i; 109 | // Inverse undo 110 | for (Q = M; Q > 1; Q >>= 1) 111 | { 112 | P = Q - 1; 113 | for (i = 0; i < n; i++) 114 | if ((X[i] & Q) != 0) 115 | X[0] ^= P; // invert 116 | else 117 | { 118 | t = (X[0] ^ X[i]) & P; 119 | X[0] ^= t; 120 | X[i] ^= t; 121 | } 122 | }// exchange 123 | // Gray encode 124 | for (i = 1; i < n; i++) 125 | X[i] ^= X[i - 1]; 126 | t = 0; 127 | for (Q = M; Q > 1; Q >>= 1) 128 | if ((X[n - 1] & Q)!=0) 129 | t ^= Q - 1; 130 | for (i = 0; i < n; i++) 131 | X[i] ^= t; 132 | 133 | return interleave_bits2(X[1], X[0]); 134 | } 135 | 136 | hilbert_key hilbert_position3d(hilbert_cell_indices3d pos) 137 | { 138 | uint X [3]; 139 | X[0] = pos.x; 140 | X[1] = pos.y; 141 | X[2] = pos.z; 142 | 143 | const int n = 3; 144 | const int bits = 21; 145 | 146 | uint M = 1U << (bits - 1); 147 | uint P; 148 | uint Q; 149 | uint t; 150 | int i; 151 | // Inverse undo 152 | for (Q = M; Q > 1; Q >>= 1) 153 | { 154 | P = Q - 1; 155 | for (i = 0; i < n; i++) 156 | if ((X[i] & Q) != 0) 157 | X[0] ^= P; // invert 158 | else 159 | { 160 | t = (X[0] ^ X[i]) & P; 161 | X[0] ^= t; 162 | X[i] ^= t; 163 | } 164 | }// exchange 165 | // Gray encode 166 | for (i = 1; i < n; i++) 167 | X[i] ^= X[i - 1]; 168 | t = 0; 169 | for (Q = M; Q > 1; Q >>= 1) 170 | if ((X[n - 1] & Q)!=0) 171 | t ^= Q - 1; 172 | for (i = 0; i < n; i++) 173 | X[i] ^= t; 174 | 175 | return interleave_bits3(X[2], X[1], X[0]); 176 | } 177 | 178 | ulong hilbert_position(cell_index_type pos) 179 | { 180 | return DIMENSIONALITY_SWITCH(hilbert_position2d(pos), 181 | hilbert_position3d(pos)); 182 | } 183 | 184 | __kernel void generate_hilbert_position(const vector_type particles_min, 185 | const vector_type particles_max, 186 | __global particle_type* particles, 187 | ulong num_particles, 188 | __global hilbert_key* out) 189 | { 190 | ulong num_cells = 0; 191 | 192 | if(dimension == 2) 193 | num_cells = hilbert_num_cells2d; 194 | else if(dimension == 3) 195 | num_cells = hilbert_num_cells3d; 196 | 197 | grid_t grid; 198 | grid_init(&grid, particles_min, particles_max, num_cells); 199 | 200 | size_t num_threads = get_global_size(0); 201 | 202 | for(size_t tid = get_global_id(0); 203 | tid < num_particles; 204 | tid += num_threads) 205 | { 206 | particle_type particle = particles[tid]; 207 | vector_type position = PARTICLE_POSITION(particle); 208 | 209 | cell_index_type idx = grid_get_cell(&grid, position, num_cells); 210 | 211 | out[tid] = hilbert_position(idx); 212 | } 213 | } 214 | ) 215 | ) 216 | }; 217 | 218 | } 219 | } 220 | 221 | #endif 222 | -------------------------------------------------------------------------------- /SpatialCL/math/geometry.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef MATH_GEOMETRY_HPP 30 | #define MATH_GEOMETRY_HPP 31 | 32 | #include "../configuration.hpp" 33 | 34 | namespace spatialcl { 35 | namespace math { 36 | 37 | template 38 | QCL_STANDALONE_MODULE(geometry) 39 | QCL_STANDALONE_SOURCE 40 | ( 41 | QCL_INCLUDE_MODULE(configuration) 42 | QCL_RAW 43 | ( 44 | // Calculates minimum distance squared to a box. Assumes 45 | // that the point is outside of the box 46 | scalar box_distance2(vector_type point, 47 | vector_type box_min, 48 | vector_type box_max) 49 | { 50 | vector_type zero = (vector_type)0.0f; 51 | vector_type delta = fmax(box_min - point, fmax(zero, point - box_max)); 52 | return VECTOR_NORM2(delta); 53 | } 54 | 55 | // Calculates maximum distance squared to a box, 56 | // i.e. the distance from the point to the farthest 57 | // point on the box surface 58 | // TODO: This is wrong, fix this 59 | scalar box_farthest_distance2(vector_type point, 60 | vector_type box_min, 61 | vector_type box_max) 62 | { 63 | vector_type delta = fmax(fabs(point - box_min), 64 | fabs(point - box_max)); 65 | return VECTOR_NORM2(delta); 66 | } 67 | 68 | int box_box_intersection(vector_type a_min, 69 | vector_type a_max, 70 | vector_type b_min, 71 | vector_type b_max) 72 | { 73 | 74 | int_vector_type intersects = ((a_max >= b_min) && (b_max >= a_min)); 75 | int result = DIMENSIONALITY_SWITCH(intersects.x & intersects.y & 1, 76 | intersects.x & intersects.y & intersects.z & 1); 77 | 78 | return result; 79 | } 80 | 81 | 82 | int box_contains_point(vector_type box_min, 83 | vector_type box_max, 84 | vector_type point) 85 | { 86 | int_vector_type contains = (box_min <= point) && 87 | (box_max >= point); 88 | return DIMENSIONALITY_SWITCH(contains.x & contains.y & 1, 89 | contains.x & contains.y & contains.z & 1); 90 | } 91 | 92 | int box_contains_particle(vector_type box_min, 93 | vector_type box_max, 94 | particle_type p) 95 | { 96 | return box_contains_point(box_min, box_max, 97 | PARTICLE_POSITION(p)); 98 | } 99 | ) 100 | ) 101 | 102 | } 103 | } 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /SpatialCL/query.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef QUERY_HPP 30 | #define QUERY_HPP 31 | 32 | #include "query/query_engine_dfs.hpp" 33 | #include "query/query_engine_grouped_dfs.hpp" 34 | 35 | #include "query/query_knn.hpp" 36 | #include "query/query_range.hpp" 37 | 38 | namespace spatialcl { 39 | namespace query { 40 | 41 | template 42 | using strict_dfs_query_engine = query::engine::depth_first 43 | < 44 | Tree_type, 45 | Handler, 46 | engine::HIERARCHICAL_ITERATION_STRICT 47 | >; 48 | 49 | template 50 | using relaxed_dfs_query_engine = query::engine::depth_first 51 | < 52 | Tree_type, 53 | Handler, 54 | engine::HIERARCHICAL_ITERATION_RELAXED 55 | >; 56 | 57 | template 58 | using grouped_dfs_query_engine = query::engine::grouped_depth_first 59 | < 60 | Tree_type, 61 | Handler, 62 | Group_size 63 | >; 64 | 65 | 66 | 67 | /************** Range Queries ***************************/ 68 | 69 | template 70 | using strict_dfs_range_query_engine = strict_dfs_query_engine 71 | < 72 | Tree_type, 73 | box_range_query 74 | >; 75 | 76 | template 77 | using relaxed_dfs_range_query_engine = relaxed_dfs_query_engine 78 | < 79 | Tree_type, 80 | box_range_query 81 | >; 82 | 83 | template 86 | using grouped_dfs_range_query_engine = grouped_dfs_query_engine 87 | < 88 | Tree_type, 89 | box_range_query, 90 | Group_size 91 | >; 92 | 93 | template 94 | using default_range_query_engine = relaxed_dfs_range_query_engine 95 | < 96 | Tree_type, 97 | Max_retrieved_particles 98 | >; 99 | 100 | 101 | /********** KNN Queries ********************************/ 102 | 103 | template 104 | using strict_dfs_knn_query_engine = strict_dfs_query_engine 105 | < 106 | Tree_type, 107 | knn_query 108 | >; 109 | 110 | template 111 | using relaxed_dfs_knn_query_engine = relaxed_dfs_query_engine 112 | < 113 | Tree_type, 114 | knn_query 115 | >; 116 | 117 | template 120 | using grouped_dfs_knn_query_engine = grouped_dfs_query_engine 121 | < 122 | Tree_type, 123 | knn_query, 124 | Group_size 125 | >; 126 | 127 | template 128 | using default_knn_query_engine = relaxed_dfs_knn_query_engine 129 | < 130 | Tree_type, 131 | K 132 | >; 133 | 134 | } 135 | } 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /SpatialCL/query/query_base.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef QUERY_BASE_HPP 30 | #define QUERY_BASE_HPP 31 | 32 | #include 33 | 34 | namespace spatialcl { 35 | namespace query { 36 | 37 | class basic_query 38 | { 39 | public: 40 | virtual void push_full_arguments(qcl::kernel_call& call) = 0; 41 | virtual std::size_t get_num_independent_queries() const = 0; 42 | }; 43 | 44 | 45 | } 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /SpatialCL/query/query_engine_dfs.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef QUERY_ENGINE_DFS_HPP 30 | #define QUERY_ENGINE_DFS_HPP 31 | 32 | #include 33 | #include 34 | 35 | #include "../configuration.hpp" 36 | #include "../tree/binary_tree.hpp" 37 | 38 | 39 | namespace spatialcl { 40 | namespace query { 41 | namespace engine { 42 | 43 | enum depth_first_iteration_strategy 44 | { 45 | HIERARCHICAL_ITERATION_STRICT = 0, 46 | HIERARCHICAL_ITERATION_RELAXED = 1 47 | }; 48 | 49 | /// Depth-first query. 50 | /// \tparam Tree_type the tree type on which this query operates 51 | /// \tparam Handler_module A query handler, fulfilling the dfs handler concept 52 | /// \tparam group_size The OpenCL group size of the query. A 0 will correspond 53 | /// to a cl::NullRange and will hence allow the OpenCL implementation to choose 54 | /// the group size 55 | template 59 | class depth_first 60 | { 61 | public: 62 | QCL_MAKE_MODULE(depth_first) 63 | 64 | using handler_type = Handler_module; 65 | using type_system = typename Tree_type::type_system; 66 | 67 | /// Execute query 68 | cl_int operator()(const Tree_type& tree, 69 | Handler_module& handler, 70 | cl::Event* evt = nullptr) 71 | { 72 | return this->run(tree.get_device_context(), 73 | tree.get_sorted_particles(), 74 | tree.get_node_values0(), 75 | tree.get_node_values1(), 76 | tree.get_num_particles(), 77 | tree.get_effective_num_particles(), 78 | tree.get_effective_num_levels(), 79 | handler, 80 | evt); 81 | } 82 | private: 83 | cl_int run(const qcl::device_context_ptr& ctx, 84 | const cl::Buffer& particles, 85 | const cl::Buffer& node_values0, 86 | const cl::Buffer& node_values1, 87 | std::size_t num_particles, 88 | std::size_t effective_num_particles, 89 | std::size_t effective_num_levels, 90 | Handler_module& handler, 91 | cl::Event* evt = nullptr) 92 | { 93 | 94 | cl::NDRange local_size = cl::NullRange; 95 | if(group_size > 0) 96 | local_size = cl::NDRange{group_size}; 97 | 98 | qcl::kernel_call call = query(ctx, 99 | cl::NDRange{handler.get_num_independent_queries()}, 100 | local_size, 101 | evt); 102 | 103 | call.partial_argument_list(particles, 104 | node_values0, 105 | node_values1, 106 | static_cast(num_particles), 107 | static_cast(effective_num_particles), 108 | static_cast(effective_num_levels)); 109 | 110 | handler.push_full_arguments(call); 111 | return call.enqueue_kernel(); 112 | } 113 | 114 | QCL_ENTRYPOINT(query) 115 | QCL_MAKE_SOURCE( 116 | QCL_INCLUDE_MODULE(tree_configuration) 117 | QCL_INCLUDE_MODULE(Handler_module) 118 | QCL_INCLUDE_MODULE(binary_tree) 119 | QCL_IMPORT_CONSTANT(Iteration_strategy) 120 | QCL_IMPORT_CONSTANT(group_size) 121 | QCL_RAW( 122 | ulong load_node(binary_tree_key_t* node, 123 | __global node_type0* node_values0, 124 | __global node_type1* node_values1, 125 | ulong effective_num_levels, 126 | ulong effective_num_particles, 127 | node_type0* node_value0_out, 128 | node_type1* node_value1_out) 129 | { 130 | ulong idx = binary_tree_key_encode_global_id(node,effective_num_levels); 131 | idx -= effective_num_particles; 132 | 133 | *node_value0_out = node_values0[idx]; 134 | *node_value1_out = node_values1[idx]; 135 | 136 | return idx; 137 | } 138 | 139 | ulong load_particle(binary_tree_key_t* node, 140 | __global particle_type* particles, 141 | ulong effective_num_levels, 142 | ulong effective_num_particles, 143 | particle_type* particle_out) 144 | { 145 | 146 | // Since particles are at the lowest level, we know that for them 147 | // the index equals the local node id 148 | ulong idx = node->local_node_id; 149 | *particle_out = particles[idx]; 150 | return idx; 151 | } 152 | 153 | binary_tree_key_t find_first_left_parent(binary_tree_key_t* node) 154 | { 155 | binary_tree_key_t result = binary_tree_get_parent(node); 156 | while(binary_tree_is_right_child(&result)) 157 | result = binary_tree_get_parent(&result); 158 | return result; 159 | } 160 | ) 161 | R"( 162 | #if Iteration_strategy == 0 163 | // Strict iteration 164 | #define NEXT_PARENT(node) find_first_left_parent(&node) 165 | #elif Iteration_strategy == 1 166 | // Relaxed iteration 167 | #define NEXT_PARENT(node) binary_tree_get_parent(&node) 168 | #else 169 | #error Invalid iteration strategy 170 | #endif 171 | 172 | #if group_size > 0 173 | #define KERNEL_ATTRIBUTES __attribute__((reqd_work_group_size(group_size,1,1))) 174 | #else 175 | #define KERNEL_ATTRIBUTES 176 | #endif 177 | )" 178 | QCL_PREPROCESSOR(define, get_query_id() tid) 179 | QCL_PREPROCESSOR(define, 180 | QUERY_NODE_LEVEL(node_values0, 181 | node_values1, 182 | effective_num_particles, 183 | effective_num_levels, 184 | current_node, 185 | num_covered_particles) 186 | { 187 | node_type0 current_node_values0; 188 | node_type1 current_node_values1; 189 | 190 | ulong node_idx = load_node(¤t_node, 191 | node_values0, 192 | node_values1, 193 | effective_num_levels, 194 | effective_num_particles, 195 | ¤t_node_values0, 196 | ¤t_node_values1); 197 | 198 | int node_selected = 0; 199 | dfs_node_selector(&node_selected, 200 | ¤t_node, 201 | node_idx, 202 | current_node_values0, 203 | current_node_values1); 204 | if(node_selected) 205 | { 206 | current_node = binary_tree_get_children_begin(¤t_node); 207 | } 208 | else 209 | { 210 | dfs_unique_node_discard_event(node_idx, 211 | current_node_values0, 212 | current_node_values1); 213 | 214 | num_covered_particles += BT_LEAVES_PER_NODE(current_node.level, 215 | effective_num_levels); 216 | 217 | if(binary_tree_is_right_child(¤t_node)) 218 | { 219 | // if we are at a right child node, go up to the parent's 220 | // sibling... 221 | current_node = NEXT_PARENT(current_node); 222 | current_node.local_node_id++; 223 | } 224 | else 225 | // otherwise, first investigate the sibling 226 | current_node.local_node_id++; 227 | } 228 | } 229 | ) 230 | QCL_PREPROCESSOR(define, 231 | QUERY_PARTICLE_LEVEL(particles, 232 | effective_num_particles, 233 | effective_num_levels, 234 | current_node, 235 | num_covered_particles) 236 | { 237 | particle_type current_particle; 238 | 239 | ulong particle_idx = load_particle(¤t_node, 240 | particles, 241 | effective_num_levels, 242 | effective_num_particles, 243 | ¤t_particle); 244 | 245 | int particle_selected = 0; 246 | dfs_particle_processor(&particle_selected, 247 | particle_idx, 248 | current_particle); 249 | if(particle_selected) 250 | { 251 | // Move to next particle 252 | current_node.local_node_id++; 253 | } 254 | else 255 | { 256 | if(binary_tree_is_right_child(¤t_node)) 257 | { 258 | // if we are at a right child node, go up to the parent's 259 | // sibling... 260 | current_node = NEXT_PARENT(current_node); 261 | current_node.local_node_id++; 262 | } 263 | else 264 | // otherwise, first investigate the sibling 265 | current_node.local_node_id++; 266 | } 267 | num_covered_particles++; 268 | } 269 | ) 270 | QCL_RAW( 271 | __kernel void query(__global particle_type* particles, 272 | __global node_type0* node_values0, 273 | __global node_type1* node_values1, 274 | ulong num_particles, 275 | ulong effective_num_particles, 276 | ulong effective_num_levels, 277 | declare_full_query_parameter_set()) 278 | KERNEL_ATTRIBUTES 279 | { 280 | for(size_t tid = get_global_id(0); 281 | tid < get_num_queries(); 282 | tid += get_global_size(0)) 283 | { 284 | at_query_init(); 285 | 286 | binary_tree_key_t current_node; 287 | current_node.level = 0; 288 | current_node.local_node_id = 0; 289 | 290 | ulong num_covered_particles = 0; 291 | while(num_covered_particles < num_particles) 292 | { 293 | int particle_level_reached = (current_node.level == effective_num_levels-1); 294 | 295 | if(particle_level_reached) 296 | { 297 | QUERY_PARTICLE_LEVEL(particles, 298 | effective_num_particles, 299 | effective_num_levels, 300 | current_node, 301 | num_covered_particles); 302 | } 303 | else 304 | { 305 | QUERY_NODE_LEVEL(node_values0, 306 | node_values1, 307 | effective_num_particles, 308 | effective_num_levels, 309 | current_node, 310 | num_covered_particles); 311 | } 312 | } 313 | 314 | at_query_exit(); 315 | } 316 | } 317 | ) 318 | ) 319 | }; 320 | 321 | } 322 | } 323 | } 324 | 325 | #endif 326 | -------------------------------------------------------------------------------- /SpatialCL/query/query_knn.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef QUERY_KNN_HPP 30 | #define QUERY_KNN_HPP 31 | 32 | 33 | #include "../configuration.hpp" 34 | #include "../math/geometry.hpp" 35 | 36 | #include "query_base.hpp" 37 | 38 | namespace spatialcl { 39 | namespace query { 40 | 41 | 42 | template 43 | class knn_query : public basic_query 44 | { 45 | public: 46 | QCL_MAKE_MODULE(knn_query) 47 | 48 | static_assert(K > 0, "K must be non-zero, or do you really want" 49 | "to find the zero nearest neighbors?"); 50 | 51 | knn_query(const cl::Buffer& query_points, 52 | const cl::Buffer& results, 53 | std::size_t num_queries) 54 | : _query_points{query_points}, 55 | _results{results}, 56 | _num_queries{num_queries} 57 | { 58 | } 59 | 60 | virtual void push_full_arguments(qcl::kernel_call& call) override 61 | { 62 | call.partial_argument_list(_query_points, 63 | _results, 64 | static_cast(_num_queries)); 65 | } 66 | 67 | virtual std::size_t get_num_independent_queries() const override 68 | { 69 | return _num_queries; 70 | } 71 | 72 | virtual ~knn_query(){} 73 | 74 | static_assert(K > 0, "K must be non-zero"); 75 | 76 | private: 77 | cl::Buffer _query_points; 78 | cl::Buffer _results; 79 | std::size_t _num_queries; 80 | 81 | QCL_MAKE_SOURCE 82 | ( 83 | QCL_INCLUDE_MODULE(configuration) 84 | QCL_INCLUDE_MODULE(math::geometry) 85 | QCL_IMPORT_CONSTANT(K) 86 | 87 | QCL_RAW( 88 | void knn_update_max_distance(uint* max_distance_idx, 89 | scalar* candidate_distances2) 90 | { 91 | *max_distance_idx = 0; 92 | // Find new max distance index 93 | for(int i = 0; i < K; ++i) 94 | if(candidate_distances2[i] >= 95 | candidate_distances2[*max_distance_idx]) 96 | *max_distance_idx = i; 97 | } 98 | 99 | void knn_add_candidate_particle(particle_type particle, 100 | scalar particle_dist2, 101 | uint* max_distance_idx, 102 | scalar* candidate_distances2, 103 | particle_type* candidates) 104 | { 105 | candidate_distances2[*max_distance_idx] = particle_dist2; 106 | candidates[*max_distance_idx] = particle; 107 | knn_update_max_distance(max_distance_idx, candidate_distances2); 108 | }) 109 | // depth-first-serach for knn queries is currently 110 | // implemented rather inefficiently, because the DFS 111 | // query interface currently does not provide a means 112 | // to compare sibling nodes. This is however required 113 | // for efficient KNN queries to decide which path down 114 | // the tree is the better one. 115 | QCL_PREPROCESSOR(define, 116 | dfs_node_selector(selection_result_ptr, 117 | current_node_key_ptr, 118 | node_index, 119 | bbox_min_corner, 120 | bbox_max_corner) 121 | { 122 | scalar dist2 = 0.0f; 123 | if(!box_contains_particle(CLIP_TO_VECTOR(bbox_min_corner), 124 | CLIP_TO_VECTOR(bbox_max_corner), 125 | query_position)) 126 | { 127 | dist2 = box_distance2(query_position, 128 | CLIP_TO_VECTOR(bbox_min_corner), 129 | CLIP_TO_VECTOR(bbox_max_corner)); 130 | } 131 | 132 | *selection_result_ptr = 133 | dist2 < candidate_distances2[max_distance_idx]; 134 | } 135 | 136 | ) 137 | QCL_PREPROCESSOR(define, 138 | dfs_particle_processor(selection_result_ptr, 139 | particle_idx, 140 | current_particle) 141 | { 142 | vector_type delta = PARTICLE_POSITION(current_particle) 143 | - query_position; 144 | scalar dist2 = VECTOR_NORM2(delta); 145 | 146 | *selection_result_ptr = 147 | dist2 < candidate_distances2[max_distance_idx]; 148 | 149 | if(*selection_result_ptr) 150 | { 151 | knn_add_candidate_particle(current_particle, 152 | dist2, 153 | &max_distance_idx, 154 | candidate_distances2, 155 | candidates); 156 | } 157 | } 158 | ) 159 | QCL_PREPROCESSOR(define, 160 | dfs_unique_node_discard_event(node_idx, 161 | current_bbox_min_corner, 162 | current_bbox_max_corner) 163 | ) 164 | R"( 165 | #define declare_full_query_parameter_set() \ 166 | __global vector_type* query_positions, \ 167 | __global particle_type* query_result, \ 168 | ulong num_queries 169 | )" 170 | QCL_PREPROCESSOR(define, 171 | at_query_init() 172 | 173 | scalar candidate_distances2 [K]; 174 | particle_type candidates [K]; 175 | 176 | for(int i = 0; i < K; ++i) 177 | candidate_distances2[i] = FLT_MAX; 178 | 179 | uint max_distance_idx = 0; 180 | 181 | vector_type query_position; 182 | if(get_query_id() < num_queries) 183 | query_position = query_positions[get_query_id()]; 184 | ) 185 | QCL_PREPROCESSOR(define, 186 | at_query_exit() 187 | if(get_query_id() < num_queries) 188 | { 189 | for(int i = 0; i < K; ++i) 190 | query_result[get_query_id()*K + i] = candidates[i]; 191 | } 192 | ) 193 | QCL_PREPROCESSOR(define, 194 | get_num_queries() 195 | num_queries 196 | ) 197 | 198 | ) 199 | }; 200 | 201 | 202 | } 203 | } 204 | 205 | 206 | #endif 207 | -------------------------------------------------------------------------------- /SpatialCL/query/query_range.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef QUERY_RANGE_HPP 30 | #define QUERY_RANGE_HPP 31 | 32 | #include "../configuration.hpp" 33 | #include "../math/geometry.hpp" 34 | 35 | #include "query_base.hpp" 36 | 37 | namespace spatialcl { 38 | namespace query { 39 | 40 | template 42 | class box_range_query : public basic_query 43 | { 44 | public: 45 | QCL_MAKE_MODULE(box_range_query) 46 | 47 | 48 | box_range_query(const cl::Buffer& query_ranges_min, 49 | const cl::Buffer& query_ranges_max, 50 | const cl::Buffer& result_retrieved_particles, 51 | const cl::Buffer& result_num_retrieved_particles, 52 | std::size_t num_queries) 53 | : _query_ranges_min{query_ranges_min}, 54 | _query_ranges_max{query_ranges_max}, 55 | _result{result_retrieved_particles}, 56 | _num_selected_particles{result_num_retrieved_particles}, 57 | _num_queries{num_queries} 58 | { 59 | assert(num_queries > 0); 60 | } 61 | 62 | virtual void push_full_arguments(qcl::kernel_call& call) override 63 | { 64 | call.partial_argument_list(_query_ranges_min, 65 | _query_ranges_max, 66 | _result, 67 | _num_selected_particles, 68 | static_cast(_num_queries)); 69 | } 70 | 71 | virtual std::size_t get_num_independent_queries() const override 72 | { 73 | return _num_queries; 74 | } 75 | 76 | virtual ~box_range_query(){} 77 | 78 | private: 79 | cl::Buffer _query_ranges_min; 80 | cl::Buffer _query_ranges_max; 81 | cl::Buffer _result; 82 | cl::Buffer _num_selected_particles; 83 | std::size_t _num_queries; 84 | 85 | QCL_MAKE_SOURCE 86 | ( 87 | QCL_INCLUDE_MODULE(configuration) 88 | QCL_INCLUDE_MODULE(math::geometry) 89 | QCL_IMPORT_CONSTANT(Max_retrieved_particles) 90 | QCL_PREPROCESSOR(define, 91 | dfs_node_selector(selection_result_ptr, 92 | current_node_key_ptr, 93 | node_index, 94 | bbox_min_corner, 95 | bbox_max_corner) 96 | *selection_result_ptr = box_box_intersection( 97 | CLIP_TO_VECTOR(bbox_min_corner), 98 | CLIP_TO_VECTOR(bbox_max_corner), 99 | query_range_min, 100 | query_range_max); 101 | ) 102 | QCL_PREPROCESSOR(define, 103 | dfs_particle_processor(selection_result_ptr, 104 | particle_idx, 105 | current_particle) 106 | { 107 | *selection_result_ptr = box_contains_particle(query_range_min, 108 | query_range_max, 109 | current_particle); 110 | if(*selection_result_ptr) 111 | { 112 | if(num_selected_particles < Max_retrieved_particles) 113 | { 114 | ulong result_pos = get_query_id()*Max_retrieved_particles 115 | + num_selected_particles; 116 | query_result[result_pos] = current_particle; 117 | ++num_selected_particles; 118 | } 119 | } 120 | } 121 | ) 122 | QCL_PREPROCESSOR(define, 123 | dfs_unique_node_discard_event(node_idx, 124 | current_bbox_min_corner, 125 | current_bbox_max_corner) 126 | ) 127 | R"( 128 | #define declare_full_query_parameter_set() \ 129 | __global vector_type* query_ranges_min, \ 130 | __global vector_type* query_ranges_max, \ 131 | __global particle_type* query_result, \ 132 | __global uint* num_retrieved_particles, \ 133 | ulong num_queries 134 | )" 135 | QCL_PREPROCESSOR(define, 136 | at_query_init() 137 | vector_type query_range_min; 138 | vector_type query_range_max; 139 | uint num_selected_particles = 0; 140 | 141 | if(get_query_id() < num_queries) 142 | { 143 | query_range_min = query_ranges_min[get_query_id()]; 144 | query_range_max = query_ranges_max[get_query_id()]; 145 | num_retrieved_particles[get_query_id()] = 0; 146 | } 147 | ) 148 | QCL_PREPROCESSOR(define, 149 | at_query_exit() 150 | if(get_query_id() < num_queries) 151 | { 152 | num_retrieved_particles[get_query_id()] = num_selected_particles; 153 | } 154 | ) 155 | QCL_PREPROCESSOR(define, 156 | get_num_queries() 157 | num_queries 158 | ) 159 | ) 160 | }; 161 | 162 | 163 | } 164 | } 165 | 166 | #endif 167 | -------------------------------------------------------------------------------- /SpatialCL/sfc_position_generator.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef SFC_POSITION_GENERATOR_HPP 30 | #define SFC_POSITION_GENERATOR_HPP 31 | 32 | #include "configuration.hpp" 33 | 34 | 35 | namespace spatialcl { 36 | namespace space_filling_curve { 37 | 38 | template 39 | class position_generator 40 | { 41 | public: 42 | using vector_type = typename configuration::vector_type; 43 | 44 | virtual void operator()(const qcl::device_context_ptr& ctx, 45 | const vector_type& particles_min, 46 | const vector_type& particles_max, 47 | const cl::Buffer& particles, 48 | cl_ulong num_particles, 49 | const cl::Buffer& out) const = 0; 50 | }; 51 | 52 | } 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /SpatialCL/tree.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef TREE_HPP 30 | #define TREE_HPP 31 | 32 | #include "tree/particle_bvh_sfc_tree.hpp" 33 | 34 | namespace spatialcl { 35 | 36 | 37 | template 38 | using zcurve_bvh_tree = 39 | particle_bvh_tree>, Type_descriptor>; 40 | 41 | template 42 | using zcurve_bvh_sp2d_tree = zcurve_bvh_tree>; 43 | 44 | template 45 | using zcurve_bvh_sp3d_tree = zcurve_bvh_tree>; 46 | 47 | template 48 | using zcurve_bvh_dp2d_tree = zcurve_bvh_tree>; 49 | 50 | template 51 | using zcurve_bvh_dp3d_tree = zcurve_bvh_tree>; 52 | 53 | 54 | template 55 | using hilbert_bvh_tree = 56 | particle_bvh_tree>, Type_descriptor>; 57 | 58 | template 59 | using hilbert_bvh_sp2d_tree = hilbert_bvh_tree>; 60 | 61 | template 62 | using hilbert_bvh_sp3d_tree = hilbert_bvh_tree>; 63 | 64 | template 65 | using hilbert_bvh_dp2d_tree = hilbert_bvh_tree>; 66 | 67 | template 68 | using hilbert_bvh_dp3d_tree = hilbert_bvh_tree>; 69 | 70 | //using kd_bvh_tree = particle_bvh_tree>; 71 | 72 | 73 | } 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /SpatialCL/tree/binary_tree.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef BINARY_TREE_HPP 30 | #define BINARY_TREE_HPP 31 | 32 | #include "../configuration.hpp" 33 | #include "../bit_manipulation.hpp" 34 | 35 | 36 | /** 37 | General binary tree layout 38 | ==================== 39 | 40 | Nodes for each level in the tree are laid out in continous order in global memory 41 | (this helps coalescing). The leaf level comes first, followed the first level of parent 42 | nodes, and so on, until the root, which is the last node in memory. 43 | 44 | For the indexing of nodes it is assumed that the number of leaves is always a number 45 | of 2, as it leads to convenient binary representations of node adresses/offsets 46 | (see below). The number of leaves can be non-powers of 2, in which case they will be 47 | rounded to the nearest power of 2 (=effective_num_leaves). 48 | It is not necessary that all the "padded" leaves actually exist in memory, as client code 49 | can just add (num_leaves-effective_num_leaves) to the indices to correct for this offset. 50 | 51 | For the indexing, levels are counted from the root (= Level 0) to the leaves. 52 | 53 | Example node layout for 32 particles (Offset is the index where the nodes for a given 54 | level would start in memory): 55 | N_nodes Level Offset 56 | 100000 = 32 l5 000000 57 | 010000 = 16 l4 100000 58 | 001000 = 8 l3 110000 59 | 000100 = 4 l2 111000 60 | 000010 = 2 l1 111100 61 | 000001 = 1 l0 111110 62 | 63 | This means that the level of a node and its position within the level can be identified 64 | by the number of leading 1s of its index. 65 | 66 | Terminology 67 | ============ 68 | 69 | * A global index/id is the overall index of a node, counted from the first leaf. 70 | If the number of leaves is a power of 2, a node's data can simply be accessed 71 | by reading the node array at the node's global index (If the number of leaves 72 | is not a power 2, the global index first needs to be corrected by the deviation 73 | from the power of 2, see above) 74 | * The level offset is the global index of the first node of a given level. 75 | * A local index/id is the index of a node relative to the level offset. 76 | (More intuitively, the local index is obtained by enumerating the nodes within 77 | one node level, while the global index is obtained by enumerating all nodes 78 | across all levels) 79 | 80 | The corresponding tree would look like this: 81 | | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | 82 | | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | x | 83 | | x | x | x | x | x | x | x | x | 84 | | x | x | x | x | 85 | | x | 86 | 87 | What if the number of leaves is not a power of 2? 88 | ================================================= 89 | 90 | Treating the case of non power of 2 numer of leaves may require additional effort, 91 | depending on the tree algorithm that one wishes to run. Let's consider the example 92 | of 6 leaves. For the indexing, this will be rounded to the nearest power of 2, 93 | i.e., 8. Hence, the effective tree will look like this (x denotes populated nodes): 94 | 95 | | x | x | x | x | x | x | | | 96 | | x | x | x | | 97 | | x | x | 98 | | x | 99 | 100 | As can be seen, non-power-of 2 numbers of leaves in general also imply that some 101 | nodes of the higher levels are invalid. It is important when constructing the tree 102 | and also for client code evaluating the tree (in whatever way this may be) to make sure 103 | these invalid nodes are ignored. 104 | The validity of a node can be checked by making sure that the index of the first leaf 105 | contained by a node is smaller than the number of leaves. The index of the first leaf 106 | can be easily obtained using binary_tree_get_leaves_begin(). A shortcut for this check 107 | exists in the form of the binary_tree_is_node_used() function. 108 | 109 | */ 110 | 111 | 112 | namespace spatialcl { 113 | 114 | class binary_tree 115 | { 116 | public: 117 | QCL_MAKE_MODULE(binary_tree) 118 | private: 119 | QCL_MAKE_SOURCE 120 | ( 121 | QCL_INCLUDE_MODULE(bit_manipulation) 122 | R"( 123 | #define BT_EFFECTIVE_NUM_LEAVES(num_leaves) get_next_power_of_two(num_leaves) 124 | #define BT_LEVEL_OFFSET_MASK(num_levels) n_bits_set(num_levels) 125 | #define BT_LEVEL_OFFSET(level, num_levels) (~n_bits_set(level+1) & BT_LEVEL_OFFSET_MASK(num_levels)) 126 | #define BT_LEAVES_PER_NODE(level, num_levels) (1ul << (num_levels - level - 1)) 127 | #define BT_NUM_NODES(level) (1ul << (level)) 128 | #define BT_LOCAL_NODE_ID_OF_LEFT_CHILD(parent_local_node_id) ((parent_local_node_id) << 1) 129 | )" 130 | QCL_RAW 131 | ( 132 | 133 | typedef ulong index_type; 134 | 135 | typedef struct 136 | { 137 | uint level; 138 | index_type local_node_id; 139 | } binary_tree_key_t; 140 | 141 | 142 | void binary_tree_key_init(binary_tree_key_t* ctx, 143 | uint level, 144 | index_type local_node_id) 145 | { 146 | ctx->level = level; 147 | ctx->local_node_id = local_node_id; 148 | } 149 | 150 | index_type binary_tree_key_encode_global_id(binary_tree_key_t* ctx, 151 | index_type num_levels) 152 | { 153 | index_type offset = BT_LEVEL_OFFSET(ctx->level, num_levels); 154 | return offset + ctx->local_node_id; 155 | } 156 | 157 | void binary_tree_key_decode_global_id(binary_tree_key_t* ctx, 158 | index_type global_node_id, 159 | index_type level, 160 | index_type num_levels) 161 | { 162 | ctx->level = level; 163 | 164 | index_type offset = BT_LEVEL_OFFSET(level, num_levels); 165 | ctx->local_node_id = global_node_id - offset; 166 | } 167 | 168 | index_type binary_tree_get_leaves_begin(binary_tree_key_t* node_key, 169 | index_type num_levels) 170 | { 171 | index_type leaves_per_node = BT_LEAVES_PER_NODE(node_key->level, num_levels); 172 | 173 | return node_key->local_node_id * leaves_per_node; 174 | } 175 | 176 | index_type binary_tree_get_leaves_end(binary_tree_key_t* node_key, 177 | index_type num_levels) 178 | { 179 | index_type BT_LEAVES_PER_NODE = BT_LEAVES_PER_NODE(node_key->level, num_levels); 180 | 181 | return (node_key->local_node_id + 1) * BT_LEAVES_PER_NODE; 182 | } 183 | 184 | int binary_tree_is_node_used(binary_tree_key_t* node_key, 185 | index_type num_levels, 186 | index_type num_leaves) 187 | { 188 | return binary_tree_get_leaves_begin(node_key, num_levels) < num_leaves; 189 | } 190 | 191 | binary_tree_key_t binary_tree_get_children_begin(binary_tree_key_t* node) 192 | { 193 | binary_tree_key_t child = *node; 194 | 195 | child.level++; 196 | child.local_node_id <<= 1; 197 | 198 | return child; 199 | } 200 | 201 | binary_tree_key_t binary_tree_get_children_last(binary_tree_key_t* node) 202 | { 203 | binary_tree_key_t child = binary_tree_get_children_begin(node); 204 | child.local_node_id++; 205 | 206 | return child; 207 | } 208 | 209 | binary_tree_key_t binary_tree_get_parent(binary_tree_key_t* ctx) 210 | { 211 | binary_tree_key_t result = *ctx; 212 | result.level--; 213 | result.local_node_id >>= 1; 214 | 215 | return result; 216 | } 217 | 218 | int binary_tree_is_left_child(binary_tree_key_t* ctx) 219 | { 220 | return (ctx->local_node_id & 1) == 0; 221 | } 222 | 223 | int binary_tree_is_right_child(binary_tree_key_t* ctx) 224 | { 225 | return ctx->local_node_id & 1; 226 | } 227 | 228 | index_type binary_tree_get_num_populated_nodes(uint level, 229 | uint num_levels, 230 | uint num_leaves) 231 | { 232 | index_type leaves_per_node = BT_LEAVES_PER_NODE(level, num_levels); 233 | return (num_leaves + leaves_per_node - 1) / leaves_per_node; 234 | } 235 | 236 | ) 237 | ) 238 | }; 239 | 240 | } 241 | 242 | #endif 243 | 244 | -------------------------------------------------------------------------------- /SpatialCL/tree/particle_bvh_sfc_tree.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef PARTICLE_BVH_SFC_TREE 30 | #define PARTICLE_BVH_SFC_TREE 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | #include 38 | #include 39 | 40 | #include "particle_bvh_tree.hpp" 41 | #include "../bit_manipulation.hpp" 42 | #include "../sfc_position_generator.hpp" 43 | #include "../zcurve.hpp" 44 | #include "../hilbert_curve.hpp" 45 | 46 | namespace spatialcl { 47 | 48 | template 49 | class sfc_sort_key_generator 50 | { 51 | public: 52 | using key_type = cl_ulong; 53 | using particle_type = typename configuration::particle_type; 54 | using vector_type = typename configuration::vector_type; 55 | static constexpr std::size_t particle_dimension = Type_descriptor::particle_dimension; 56 | static constexpr std::size_t dimension = Type_descriptor::dimension; 57 | 58 | 59 | 60 | void operator()(const qcl::device_context_ptr& ctx, 61 | const cl::Buffer& particles, 62 | std::size_t num_particles, 63 | const cl::Buffer& particle_sort_keys_out) const 64 | { 65 | vector_type min, max; 66 | this->get_particle_extent(ctx, particles, num_particles, min, max); 67 | 68 | Space_filling_curve curve; 69 | 70 | curve(ctx, 71 | min, 72 | max, 73 | particles, 74 | static_cast(num_particles), 75 | particle_sort_keys_out); 76 | } 77 | 78 | private: 79 | 80 | void get_particle_extent(const qcl::device_context_ptr& ctx, 81 | const cl::Buffer& particles, 82 | std::size_t num_particles, 83 | vector_type& min, 84 | vector_type& max) const 85 | { 86 | 87 | using boost_particle_type = typename qcl::to_boost_vector_type::type; 88 | 89 | 90 | BOOST_COMPUTE_FUNCTION(int, vector_less_x, (boost_particle_type a, boost_particle_type b), 91 | { return a.x < b.x; }); 92 | BOOST_COMPUTE_FUNCTION(int, vector_less_y, (boost_particle_type a, boost_particle_type b), 93 | { return a.y < b.y; }); 94 | BOOST_COMPUTE_FUNCTION(int, vector_less_z, (boost_particle_type a, boost_particle_type b), 95 | { return a.z < b.z; }); 96 | 97 | boost::compute::command_queue boost_queue{ctx->get_command_queue().get()}; 98 | auto begin = qcl::create_buffer_iterator(particles,0); 99 | auto end = qcl::create_buffer_iterator(particles,num_particles); 100 | 101 | auto result_x = boost::compute::minmax_element(begin, end, vector_less_x, 102 | boost_queue); 103 | auto result_y = boost::compute::minmax_element(begin, end, vector_less_y, 104 | boost_queue); 105 | auto result_z = result_y; 106 | if(dimension >= 3) 107 | result_z = boost::compute::minmax_element(begin, end, vector_less_z, 108 | boost_queue); 109 | 110 | min.s[0] = result_x.first.read(boost_queue)[0]; 111 | max.s[0] = result_x.second.read(boost_queue)[0]; 112 | 113 | min.s[1] = result_y.first.read(boost_queue)[1]; 114 | max.s[1] = result_y.second.read(boost_queue)[1]; 115 | 116 | if(dimension >= 3) 117 | { 118 | min.s[2] = result_z.first.read(boost_queue)[2]; 119 | max.s[2] = result_z.second.read(boost_queue)[2]; 120 | } 121 | 122 | } 123 | 124 | 125 | static constexpr std::size_t local_size = 256; 126 | }; 127 | 128 | template 129 | using zcurve_sort_key_generator = 130 | sfc_sort_key_generator>; 131 | 132 | template 133 | using hilbert_sort_key_generator = 134 | sfc_sort_key_generator>; 135 | 136 | 137 | 138 | 139 | template 140 | class key_based_sorter 141 | { 142 | public: 143 | 144 | using key_type = typename Key_generator::key_type; 145 | using particle_type = typename Key_generator::particle_type; 146 | using boost_particle_type = typename qcl::to_boost_vector_type::type; 147 | 148 | void operator()(const qcl::device_context_ptr& ctx, 149 | const cl::Buffer& particles, 150 | std::size_t num_particles) const 151 | { 152 | qcl::device_array sort_keys{ctx, num_particles}; 153 | 154 | Key_generator sort_key_generator; 155 | sort_key_generator(ctx, particles, num_particles, sort_keys.get_buffer()); 156 | 157 | boost::compute::command_queue boost_queue{ 158 | ctx->get_command_queue().get() 159 | }; 160 | 161 | auto keys_begin = qcl::create_buffer_iterator(sort_keys.get_buffer(),0); 162 | auto keys_end = qcl::create_buffer_iterator(sort_keys.get_buffer(),sort_keys.size()); 163 | auto values_begin = qcl::create_buffer_iterator(particles, 0); 164 | 165 | boost::compute::sort_by_key( 166 | keys_begin, 167 | keys_end, 168 | values_begin, 169 | boost_queue); 170 | 171 | } 172 | 173 | }; 174 | 175 | } 176 | /* 177 | class kd_sorter 178 | { 179 | public: 180 | void operator()(const qcl::device_context_ptr& ctx, 181 | const cl::Buffer& particles, 182 | std::size_t num_particles) const 183 | { 184 | BOOST_COMPUTE_FUNCTION(int, vector4_less_x, (boost_float4 a, boost_float4 b), 185 | { return a.x < b.x; }); 186 | BOOST_COMPUTE_FUNCTION(int, vector4_less_y, (boost_float4 a, boost_float4 b), 187 | { return a.y < b.y; }); 188 | BOOST_COMPUTE_FUNCTION(int, vector4_less_z, (boost_float4 a, boost_float4 b), 189 | { return a.z < b.z; }); 190 | 191 | boost::compute::command_queue boost_queue{ctx->get_command_queue().get()}; 192 | 193 | std::size_t current_num_particles_per_node = num_particles; 194 | 195 | while(current_num_particles_per_node > _group_size) 196 | { 197 | boost::compute::nth_element(qcl::create_buffer_iterator(particles, 0), 198 | qcl::create_buffer_iterator(particles, num_particles/2), 199 | qcl::create_buffer_iterator(particles, num_particles), 200 | vector4_less_x, 201 | boost_queue); 202 | } 203 | } 204 | 205 | private: 206 | static constexpr std::size_t _group_size = 512; 207 | }; 208 | 209 | class kd_sort_key_generator 210 | { 211 | public: 212 | using key_type = cl_ulong; 213 | 214 | void operator()(const qcl::device_context_ptr& ctx, 215 | const cl::Buffer& particles, 216 | std::size_t num_particles, 217 | cl::Buffer& particle_sort_keys_out) const 218 | { 219 | using boost_float4 = boost::compute::float4_; 220 | 221 | cl::Buffer x_sort_indices, y_sort_indices, z_sort_indices; 222 | 223 | BOOST_COMPUTE_FUNCTION(int, vector4_less_x, (boost_float4 a, boost_float4 b), 224 | { return a.x < b.x; }); 225 | BOOST_COMPUTE_FUNCTION(int, vector4_less_y, (boost_float4 a, boost_float4 b), 226 | { return a.y < b.y; }); 227 | BOOST_COMPUTE_FUNCTION(int, vector4_less_z, (boost_float4 a, boost_float4 b), 228 | { return a.z < b.z; }); 229 | 230 | // First, obtain the indices where a particle would be moved to in a sort along the 231 | // x,y and z axes 232 | this->obtain_sort_indices(ctx, particles, num_particles, vector4_less_x, x_sort_indices); 233 | this->obtain_sort_indices(ctx, particles, num_particles, vector4_less_y, y_sort_indices); 234 | this->obtain_sort_indices(ctx, particles, num_particles, vector4_less_z, z_sort_indices); 235 | // The final kd tree sort key can now be obtained by interleaving the x,y,z sort indices 236 | // in a bitwise manner. 237 | 238 | ctx->register_source_module( 239 | std::vector{"kd_tree_interleave_sort_indices"}); 240 | qcl::kernel_ptr kernel = ctx->get_kernel("kd_tree_interleave_sort_indices"); 241 | qcl::kernel_argument_list args{kernel}; 242 | 243 | args.push(x_sort_indices); 244 | args.push(y_sort_indices); 245 | args.push(z_sort_indices); 246 | args.push(static_cast(num_particles)); 247 | args.push(particle_sort_keys_out); 248 | 249 | cl_int err = ctx->enqueue_ndrange_kernel(kernel, 250 | cl::NDRange{num_particles}, 251 | cl::NDRange{128}); 252 | qcl::check_cl_error(err, "Could not enqueue kd_tree_interleave_sort_indices" 253 | " interleave kernel"); 254 | 255 | err = ctx->get_command_queue().finish(); 256 | qcl::check_cl_error(err, "Error while waiting for the kd_tree_interleave_sort_indices" 257 | " to finish."); 258 | 259 | } 260 | 261 | private: 262 | QCL_MODULE_BEGIN(kd_tree_interleave_sort_indices); 263 | QCL_INCLUDE_MODULE(bit_manipulation); 264 | cl_source( 265 | __kernel void kd_tree_interleave_sort_indices(__global ulong* x_sort_indices, 266 | __global ulong* y_sort_indices, 267 | __global ulong* z_sort_indices, 268 | ulong num_elements, 269 | __global ulong* out) 270 | { 271 | size_t gid = get_global_id(0); 272 | 273 | if(gid < num_elements) 274 | { 275 | ulong x_idx = x_sort_indices[gid]; 276 | ulong y_idx = y_sort_indices[gid]; 277 | ulong z_idx = z_sort_indices[gid]; 278 | out[gid] = interleave_bits3(x_idx, y_idx, z_idx); 279 | 280 | } 281 | } 282 | ); 283 | QCL_MODULE_END(); 284 | 285 | QCL_MODULE_BEGIN(kd_tree_generate_sequence); 286 | cl_source( 287 | __kernel void kd_tree_generate_sequence(__global ulong* out, ulong num_elements) 288 | { 289 | size_t gid = get_global_id(0); 290 | if(gid < num_elements) 291 | out[gid] = gid; 292 | } 293 | ); 294 | QCL_MODULE_END(); 295 | 296 | QCL_MODULE_BEGIN(kd_tree_invert_indices); 297 | cl_source( 298 | __kernel void kd_tree_invert_indices(__global ulong* sorted_indices, 299 | ulong num_elements, 300 | __global ulong* out) 301 | { 302 | size_t gid = get_global_id(0); 303 | if(gid < num_elements) 304 | { 305 | ulong idx = sorted_indices[gid]; 306 | out[idx] = gid; 307 | } 308 | } 309 | ); 310 | QCL_MODULE_END(); 311 | 312 | void run_generate_sequence_kernel(const qcl::device_context_ptr& ctx, 313 | const cl::Buffer& out, 314 | std::size_t num_elements) const 315 | { 316 | qcl::kernel_ptr kernel = ctx->get_kernel("kd_tree_generate_sequence"); 317 | qcl::kernel_argument_list args{kernel}; 318 | args.push(out); 319 | args.push(static_cast(num_elements)); 320 | 321 | cl_int err = ctx->enqueue_ndrange_kernel(kernel, 322 | cl::NDRange{num_elements}, 323 | cl::NDRange{128}); 324 | qcl::check_cl_error(err, "Could not enqueue kd_tree_generate_sequence kernel"); 325 | } 326 | 327 | void run_invert_indices_kernel(const qcl::device_context_ptr& ctx, 328 | const cl::Buffer& sorted_indices, 329 | std::size_t num_elements, 330 | cl::Buffer& out) const 331 | { 332 | qcl::kernel_ptr kernel = ctx->get_kernel("kd_tree_invert_indices"); 333 | qcl::kernel_argument_list args{kernel}; 334 | 335 | args.push(sorted_indices); 336 | args.push(static_cast(num_elements)); 337 | args.push(out); 338 | 339 | cl_int err = ctx->enqueue_ndrange_kernel(kernel, cl::NDRange{num_elements}, cl::NDRange{128}); 340 | qcl::check_cl_error(err, "Could not enqueue kd_tree_invert_indices kernel"); 341 | } 342 | 343 | template 344 | void obtain_sort_indices(const qcl::device_context_ptr& ctx, 345 | const cl::Buffer& particles, 346 | std::size_t num_particles, 347 | Comparator compare, 348 | cl::Buffer& out) const 349 | { 350 | // Make sure source module is compiled 351 | ctx->register_source_module( 352 | std::vector{"kd_tree_generate_sequence"}); 353 | 354 | using boost_float4 = boost::compute::float4_; 355 | 356 | cl::Buffer sorted_indices; 357 | ctx->create_buffer(sorted_indices, num_particles); 358 | run_generate_sequence_kernel(ctx, sorted_indices, num_particles); 359 | 360 | // Sort indices 361 | boost::compute::command_queue boost_queue{ctx->get_command_queue().get()}; 362 | 363 | cl::Buffer temp_particles; 364 | ctx->create_buffer(temp_particles, num_particles); 365 | ctx->get_command_queue().enqueueCopyBuffer(particles, temp_particles, 0, 0, num_particles*sizeof(cl_float4)); 366 | 367 | boost::compute::sort_by_key(qcl::create_buffer_iterator(temp_particles, 0), 368 | qcl::create_buffer_iterator(temp_particles, num_particles), 369 | qcl::create_buffer_iterator(sorted_indices, 0), 370 | compare, 371 | boost_queue); 372 | 373 | // Map indices back to obtain sorted position 374 | ctx->register_source_module(std::vector{"kd_tree_invert_indices"}); 375 | 376 | ctx->create_buffer(out, num_particles); 377 | run_invert_indices_kernel(ctx, sorted_indices, num_particles, out); 378 | 379 | } 380 | 381 | 382 | };*/ 383 | 384 | 385 | #endif 386 | -------------------------------------------------------------------------------- /SpatialCL/tree/particle_bvh_tree.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef PARTICLE_BVH_TREE 30 | #define PARTICLE_BVH_TREE 31 | 32 | #include 33 | #include 34 | #include 35 | #include "particle_tree.hpp" 36 | 37 | namespace spatialcl { 38 | 39 | template 41 | class particle_bvh_tree : public particle_tree::vector_type, 44 | typename configuration::vector_type> 45 | { 46 | public: 47 | QCL_MAKE_MODULE(particle_bvh_tree) 48 | 49 | using particle_type = typename configuration::particle_type; 50 | using vector_type = typename configuration::vector_type; 51 | 52 | using base_type = particle_tree< 53 | Particle_sorter, 54 | Type_descriptor, 55 | typename configuration::vector_type, 56 | typename configuration::vector_type 57 | >; 58 | 59 | particle_bvh_tree(const qcl::device_context_ptr& ctx, 60 | const std::vector& particles, 61 | const Particle_sorter& sorter = Particle_sorter{}) 62 | : base_type{ctx, particles, sorter} 63 | { 64 | this->rebuild_bounding_boxes(); 65 | } 66 | 67 | particle_bvh_tree(const qcl::device_context_ptr& ctx, 68 | const cl::Buffer& particles, 69 | std::size_t num_particles, 70 | const Particle_sorter& sorter = Particle_sorter{}) 71 | : base_type{ctx, particles, num_particles, sorter} 72 | { 73 | this->rebuild_bounding_boxes(); 74 | } 75 | 76 | particle_bvh_tree(const qcl::device_context_ptr& ctx, 77 | const qcl::device_array& particles, 78 | const Particle_sorter& sorter = Particle_sorter{}) 79 | : base_type{ctx, particles, sorter} 80 | { 81 | this->rebuild_bounding_boxes(); 82 | } 83 | 84 | virtual ~particle_bvh_tree(){} 85 | 86 | void rebuild_bounding_boxes() 87 | { 88 | // Build the lowest layer of nodes. Because the particles 89 | // themselves are the lowest level, this is actually the second level. 90 | this->build_lowest_level_bboxes(); 91 | 92 | // Build the next layers. We start from num_levels-3, because num_levels-1 corresponds 93 | // to the particle layer, and num_levels-2 is the lowest node layer that was already 94 | // built by build_lowest_level_bboxes(). 95 | for(int i = static_cast(this->get_effective_num_levels())-3; i >= 0; --i) 96 | { 97 | #ifndef NODEBUG 98 | std::cout << "Building level " << i << std::endl; 99 | #endif 100 | build_higher_level_bboxes(static_cast(i)); 101 | } 102 | } 103 | 104 | const cl::Buffer& get_bbox_min_corners() const 105 | { 106 | return this->get_node_values0(); 107 | } 108 | 109 | const cl::Buffer& get_bbox_max_corners() const 110 | { 111 | return this->get_node_values1(); 112 | } 113 | 114 | private: 115 | 116 | static constexpr std::size_t local_size = 256; 117 | 118 | void build_lowest_level_bboxes() const 119 | { 120 | cl::NDRange global_size {this->get_effective_num_particles()/2}; 121 | cl::NDRange local_size {this->local_size}; 122 | 123 | cl_int err = bvh_tree_build_ll_bbox(this->get_device_context(), global_size, local_size)( 124 | this->get_sorted_particles(), 125 | static_cast(this->get_num_particles()), 126 | this->get_node_values0(), 127 | this->get_node_values1()); 128 | 129 | qcl::check_cl_error(err, "Could not enqueue bvh_tree_build_ll_bbox kernel"); 130 | } 131 | 132 | void build_higher_level_bboxes(unsigned level_id) const 133 | { 134 | assert(level_id < this->get_effective_num_levels()); 135 | 136 | 137 | cl::NDRange global_size {1ull << level_id}; 138 | cl::NDRange local_size {local_size}; 139 | 140 | cl_int err = bvh_tree_build_bbox(this->get_device_context(), global_size, local_size)( 141 | this->get_node_values0(), 142 | this->get_node_values1(), 143 | static_cast(level_id), 144 | static_cast(this->get_effective_num_levels()), 145 | static_cast(this->get_num_particles())); 146 | 147 | qcl::check_cl_error(err, "Could not enqueue bvh_tree_build_bbox kernel."); 148 | } 149 | 150 | QCL_ENTRYPOINT(bvh_tree_build_ll_bbox) 151 | QCL_ENTRYPOINT(bvh_tree_build_bbox) 152 | QCL_MAKE_SOURCE 153 | ( 154 | QCL_INCLUDE_MODULE(configuration) 155 | QCL_INCLUDE_MODULE(binary_tree) 156 | QCL_INCLUDE_MODULE(cl_utils::debug) 157 | QCL_RAW 158 | ( 159 | 160 | __kernel void bvh_tree_build_ll_bbox(__global particle_type* particles, 161 | index_type num_particles, 162 | __global vector_type* nodes_min_corner, 163 | __global vector_type* nodes_max_corner) 164 | { 165 | index_type num_threads = get_global_size(0); 166 | 167 | index_type effective_num_particles = BT_EFFECTIVE_NUM_LEAVES(num_particles); 168 | uint num_levels = find_most_significant_bit(effective_num_particles) + 1; 169 | 170 | for(index_type tid = get_global_id(0); 171 | tid < effective_num_particles/2; 172 | tid += num_threads) 173 | { 174 | index_type particle0_idx = 2 * tid; 175 | index_type particle1_idx = 2 * tid + 1; 176 | 177 | particle_type particle0 = (particle_type)(0.0f); 178 | 179 | if(particle0_idx < num_particles) 180 | particle0 = particles[particle0_idx]; 181 | 182 | particle_type particle1 = particle0; 183 | 184 | if(particle1_idx < num_particles) 185 | particle1 = particles[particle1_idx]; 186 | 187 | vector_type bb_min_position = fmin(PARTICLE_POSITION(particle0), 188 | PARTICLE_POSITION(particle1)); 189 | 190 | 191 | vector_type bb_max_position = fmax(PARTICLE_POSITION(particle0), 192 | PARTICLE_POSITION(particle1)); 193 | 194 | nodes_min_corner[tid] = bb_min_position; 195 | nodes_max_corner[tid] = bb_max_position; 196 | } 197 | } 198 | 199 | /// Builds bounding boxed for higher nodes - assumes that the lowest 200 | /// level of nodes has already been constructed using 201 | /// bvh_tree_build_ll_bbox() 202 | __kernel void bvh_tree_build_bbox(__global vector_type* nodes_min_corner, 203 | __global vector_type* nodes_max_corner, 204 | uint level, 205 | uint num_levels, 206 | index_type num_particles) 207 | { 208 | index_type num_threads = get_global_size(0); 209 | 210 | index_type num_nodes = BT_NUM_NODES(level); 211 | //index_type offset_correction = BT_EFFECTIVE_NUM_LEAVES(num_particles) - num_particles; 212 | index_type offset_correction = BT_EFFECTIVE_NUM_LEAVES(num_particles); 213 | 214 | for(index_type tid = get_global_id(0); 215 | tid < num_nodes; 216 | tid += num_threads) 217 | { 218 | binary_tree_key_t node_key; 219 | binary_tree_key_init(&node_key, level, tid); 220 | 221 | int node_exists = binary_tree_is_node_used(&node_key, 222 | num_levels, 223 | num_particles); 224 | 225 | vector_type bb_min = (vector_type)(0.0f); 226 | vector_type bb_max = (vector_type)(0.0f); 227 | 228 | if(node_exists) 229 | { 230 | binary_tree_key_t children_begin = binary_tree_get_children_begin(&node_key); 231 | binary_tree_key_t children_last = binary_tree_get_children_last (&node_key); 232 | 233 | int right_child_exists = binary_tree_is_node_used(&children_last, 234 | num_levels, 235 | num_particles); 236 | 237 | index_type child_idx = binary_tree_key_encode_global_id(&children_begin, 238 | num_levels); 239 | 240 | // We already know that the new parent node exists, so at least the left 241 | // child must exist as well. We therefore do not need to check the existence 242 | // of the left child, and can directly access its data. 243 | ASSERT(child_idx >= offset_correction); 244 | bb_min = nodes_min_corner[child_idx - offset_correction]; 245 | bb_max = nodes_max_corner[child_idx - offset_correction]; 246 | 247 | if(right_child_exists) 248 | { 249 | child_idx = binary_tree_key_encode_global_id(&children_last, num_levels); 250 | ASSERT(child_idx >= offset_correction); 251 | bb_min = fmin(bb_min, nodes_min_corner[child_idx - offset_correction]); 252 | bb_max = fmax(bb_max, nodes_max_corner[child_idx - offset_correction]); 253 | } 254 | } 255 | 256 | index_type node_idx = binary_tree_key_encode_global_id(&node_key, num_levels); 257 | nodes_min_corner[node_idx - offset_correction] = bb_min; 258 | nodes_max_corner[node_idx - offset_correction] = bb_max; 259 | } 260 | } 261 | ) 262 | ) 263 | }; 264 | 265 | 266 | } 267 | #endif 268 | -------------------------------------------------------------------------------- /SpatialCL/tree/particle_tree.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef PARTICLE_TREE 30 | #define PARTICLE_TREE 31 | 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | #include "binary_tree.hpp" 39 | #include "../configuration.hpp" 40 | #include "../cl_utils.hpp" 41 | 42 | 43 | namespace spatialcl { 44 | 45 | /// Base class for particle trees. Does not calculate 46 | /// the content of the tree nodes (this should be done 47 | /// by derived classes) 48 | /// \tparam Particle_sorter A functor to sort the particles 49 | /// spatially 50 | /// \tparam Type_descriptor The type descriptor for the desired 51 | /// type system (dimensionality, single/double precision etc) 52 | /// \tparam Node_data_type0 Data type of the first value of the data 53 | /// per node 54 | /// \tparam Node_data_type1 Data type of the second value of the data 55 | /// per node 56 | template 60 | class particle_tree 61 | { 62 | public: 63 | 64 | using particle_type = typename configuration::particle_type; 65 | using vector_type = typename configuration::vector_type; 66 | using boost_particle = typename qcl::to_boost_vector_type::type; 67 | using node_type0 = Node_data_type0; 68 | using node_type1 = Node_data_type1; 69 | 70 | using type_system = Type_descriptor; 71 | 72 | particle_tree(const qcl::device_context_ptr& ctx, 73 | const std::vector& particles, 74 | const Particle_sorter& sorter = Particle_sorter{}) 75 | : _ctx{ctx}, 76 | _num_particles{particles.size()} 77 | { 78 | _ctx->create_buffer(this->_sorted_particles, particles.size()); 79 | _ctx->memcpy_h2d(this->_sorted_particles, particles.data(), particles.size()); 80 | 81 | this->init_tree(sorter); 82 | } 83 | 84 | particle_tree(const qcl::device_context_ptr& ctx, 85 | const cl::Buffer& particles, 86 | std::size_t num_particles, 87 | const Particle_sorter& sorter = Particle_sorter{}) 88 | : _ctx{ctx}, 89 | _sorted_particles{particles}, 90 | _num_particles{num_particles} 91 | { 92 | this->init_tree(sorter); 93 | } 94 | 95 | particle_tree(const qcl::device_context_ptr& ctx, 96 | const qcl::device_array& particles, 97 | const Particle_sorter& sorter = Particle_sorter{}) 98 | : particle_tree{ctx, particles.get_buffer(), particles.size(), sorter} 99 | {} 100 | 101 | virtual ~particle_tree(){} 102 | 103 | std::size_t get_num_nodes() const 104 | { 105 | return this->_effective_num_particles-1; 106 | } 107 | 108 | std::size_t get_effective_num_levels() const 109 | { 110 | return this->_num_levels; 111 | } 112 | 113 | std::size_t get_num_node_levels() const 114 | { 115 | return get_effective_num_levels() - 1; 116 | } 117 | 118 | const cl::Buffer& get_sorted_particles() const 119 | { 120 | return this->_sorted_particles; 121 | } 122 | 123 | std::size_t get_num_particles() const 124 | { 125 | return this->_num_particles; 126 | } 127 | 128 | std::size_t get_effective_num_particles() const 129 | { 130 | return this->_effective_num_particles; 131 | } 132 | 133 | const qcl::device_context_ptr& get_device_context() const 134 | { 135 | return _ctx; 136 | } 137 | 138 | const cl::Buffer& get_node_values0() const 139 | { 140 | return _nodes0.get_buffer(); 141 | } 142 | 143 | const cl::Buffer& get_node_values1() const 144 | { 145 | return _nodes1.get_buffer(); 146 | } 147 | 148 | 149 | private: 150 | void init_tree(const Particle_sorter& sorter) 151 | { 152 | // First sort the particles spatially 153 | sorter(_ctx, _sorted_particles, _num_particles); 154 | 155 | // Calculate the required number of levels 156 | // and the effective number of particles 157 | _effective_num_particles = get_next_power_of_two(_num_particles); 158 | _num_levels = get_highest_set_bit(_effective_num_particles)+1; 159 | #ifndef NODEBUG 160 | std::cout << "Building tree with " 161 | << _num_levels << " levels over " 162 | << _effective_num_particles << " effective particles and " 163 | << _num_particles << " real particles." << std::endl; 164 | #endif 165 | 166 | _nodes0 = qcl::device_array{_ctx, _effective_num_particles}; 167 | _nodes1 = qcl::device_array{_ctx, _effective_num_particles}; 168 | 169 | } 170 | 171 | static unsigned get_highest_set_bit(uint64_t x) 172 | { 173 | unsigned result = 0; 174 | 175 | for(int i = 0; i < 64; ++i) 176 | if(x & (1ull << i)) 177 | result = i; 178 | 179 | return result; 180 | } 181 | 182 | static uint64_t get_next_power_of_two(uint64_t x) 183 | { 184 | x--; 185 | x |= x >> 1; 186 | x |= x >> 2; 187 | x |= x >> 4; 188 | x |= x >> 8; 189 | x |= x >> 16; 190 | x |= x >> 32; 191 | x++; 192 | 193 | return x; 194 | } 195 | 196 | qcl::device_context_ptr _ctx; 197 | 198 | cl::Buffer _sorted_particles; 199 | 200 | unsigned _num_levels; 201 | 202 | std::size_t _num_particles; 203 | std::size_t _effective_num_particles; 204 | 205 | qcl::device_array _nodes0; 206 | qcl::device_array _nodes1; 207 | }; 208 | 209 | 210 | 211 | 212 | } 213 | #endif 214 | -------------------------------------------------------------------------------- /SpatialCL/types.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef TYPES_HPP 30 | #define TYPES_HPP 31 | 32 | #include 33 | 34 | namespace spatialcl { 35 | 36 | template 37 | struct cl_vector_type 38 | { 39 | }; 40 | 41 | #define DECLARE_VECTOR_TYPE(type, dimension, val) \ 42 | template<> struct cl_vector_type{ using value = val; } 43 | 44 | DECLARE_VECTOR_TYPE(float, 2, cl_float2); 45 | DECLARE_VECTOR_TYPE(float, 3, cl_float4); 46 | DECLARE_VECTOR_TYPE(float, 4, cl_float4); 47 | DECLARE_VECTOR_TYPE(float, 8, cl_float8); 48 | DECLARE_VECTOR_TYPE(float, 16, cl_float16); 49 | 50 | DECLARE_VECTOR_TYPE(double, 2, cl_double2); 51 | DECLARE_VECTOR_TYPE(double, 3, cl_double4); 52 | DECLARE_VECTOR_TYPE(double, 4, cl_double4); 53 | DECLARE_VECTOR_TYPE(double, 8, cl_double8); 54 | DECLARE_VECTOR_TYPE(double, 16, cl_double16); 55 | 56 | DECLARE_VECTOR_TYPE(int, 2, cl_int2); 57 | DECLARE_VECTOR_TYPE(int, 3, cl_int4); 58 | DECLARE_VECTOR_TYPE(int, 4, cl_int4); 59 | DECLARE_VECTOR_TYPE(int, 8, cl_int8); 60 | DECLARE_VECTOR_TYPE(int, 16, cl_int16); 61 | 62 | DECLARE_VECTOR_TYPE(unsigned, 2, cl_uint2); 63 | DECLARE_VECTOR_TYPE(unsigned, 3, cl_uint4); 64 | DECLARE_VECTOR_TYPE(unsigned, 4, cl_uint4); 65 | DECLARE_VECTOR_TYPE(unsigned, 8, cl_uint8); 66 | DECLARE_VECTOR_TYPE(unsigned, 16, cl_uint16); 67 | 68 | } 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /SpatialCL/zcurve.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef ZCURVE_HPP 30 | #define ZCURVE_HPP 31 | 32 | 33 | #include "bit_manipulation.hpp" 34 | #include "grid.hpp" 35 | #include "configuration.hpp" 36 | #include "sfc_position_generator.hpp" 37 | 38 | namespace spatialcl { 39 | namespace space_filling_curve { 40 | 41 | template 42 | class zcurve : public position_generator 43 | { 44 | public: 45 | QCL_MAKE_MODULE(zcurve) 46 | 47 | 48 | using vector_type = typename configuration::vector_type; 49 | 50 | virtual void operator()(const qcl::device_context_ptr& ctx, 51 | const vector_type& particles_min, 52 | const vector_type& particles_max, 53 | const cl::Buffer& particles, 54 | cl_ulong num_particles, 55 | const cl::Buffer& out) const override 56 | { 57 | cl::NDRange global_size{num_particles}; 58 | cl::NDRange local_size{128}; 59 | 60 | qcl::kernel_call gen_position = this->generate_zcurve_position(ctx,global_size,local_size); 61 | cl_int err = gen_position(particles_min, particles_max, particles, num_particles, out); 62 | 63 | qcl::check_cl_error(err, "Could not enqueue generate_zcurve_position kernel"); 64 | } 65 | 66 | // Number of cells resolved in 2d along each axis (32 bits set) 67 | static constexpr unsigned zcurve_num_cells2d = 0xffffffff; 68 | static constexpr unsigned zcurve_num_resolved_levels2d = 32; 69 | 70 | static constexpr unsigned zcurve_num_cells3d = 0x1fffff; //21 bits set 71 | static constexpr unsigned zcurve_num_resolved_levels3d = 21; 72 | 73 | private: 74 | QCL_ENTRYPOINT(generate_zcurve_position) 75 | QCL_MAKE_SOURCE 76 | ( 77 | QCL_INCLUDE_MODULE(bit_manipulation) 78 | QCL_INCLUDE_MODULE(configuration) 79 | QCL_INCLUDE_MODULE(grid) 80 | QCL_IMPORT_CONSTANT(zcurve_num_cells2d) 81 | QCL_IMPORT_CONSTANT(zcurve_num_resolved_levels2d) 82 | QCL_IMPORT_CONSTANT(zcurve_num_cells3d) 83 | QCL_IMPORT_CONSTANT(zcurve_num_resolved_levels3d) 84 | QCL_RAW 85 | ( 86 | typedef ulong zcurve_key; 87 | 88 | typedef uint2 zcurve_cell_indices2d; 89 | typedef uint4 zcurve_cell_indices3d; 90 | 91 | // level 0 is the leaf level 92 | zcurve_key zcurve_min_key(zcurve_key leaf, uint level) 93 | { 94 | return leaf & ~n_bits_set(dimension * (level + 1)); 95 | } 96 | 97 | zcurve_key zcurve_max_key(zcurve_key leaf, uint level) 98 | { 99 | return leaf | n_bits_set(dimension * (level + 1)); 100 | } 101 | 102 | zcurve_key zcurve_position2d(zcurve_cell_indices2d pos) 103 | { 104 | return interleave_bits2(pos.x, pos.y); 105 | } 106 | 107 | zcurve_key zcurve_position3d(zcurve_cell_indices3d pos) 108 | { 109 | return interleave_bits3(pos.x, pos.y, pos.z); 110 | } 111 | 112 | __kernel void generate_zcurve_position(vector_type particles_min, 113 | vector_type particles_max, 114 | __global particle_type* particles, 115 | ulong num_particles, 116 | __global zcurve_key* out) 117 | { 118 | ulong num_cells = 0; 119 | 120 | if(dimension == 2) 121 | num_cells = zcurve_num_cells2d; 122 | else if(dimension == 3) 123 | num_cells = zcurve_num_cells3d; 124 | 125 | grid_t grid; 126 | grid_init(&grid, particles_min, particles_max, num_cells); 127 | 128 | size_t num_threads = get_global_size(0); 129 | 130 | for(size_t tid = get_global_id(0); 131 | tid < num_particles; 132 | tid += num_threads) 133 | { 134 | particle_type particle = particles[tid]; 135 | vector_type position = PARTICLE_POSITION(particle); 136 | 137 | cell_index_type idx = grid_get_cell(&grid, position, num_cells); 138 | 139 | out[tid] = DIMENSIONALITY_SWITCH( 140 | interleave_bits2(idx.x, idx.y), 141 | interleave_bits3(idx.x, idx.y, idx.z)); 142 | } 143 | } 144 | 145 | ) 146 | ) 147 | }; 148 | 149 | } 150 | } 151 | 152 | #endif 153 | -------------------------------------------------------------------------------- /benchmarks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | subdirs(range_query_on_grid) 2 | -------------------------------------------------------------------------------- /benchmarks/range_query_on_grid/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(range_query_on_grid range_query_on_grid.cpp) 2 | target_link_libraries (range_query_on_grid ${OpenCL_LIBRARIES}) 3 | -------------------------------------------------------------------------------- /benchmarks/range_query_on_grid/range_query_on_grid.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include 30 | #include 31 | #include 32 | 33 | #include 34 | 35 | #include 36 | #include 37 | 38 | #include 39 | #include 40 | 41 | #include "../../common/environment.hpp" 42 | #include "../../common/random_vectors.hpp" 43 | #include "../../common/timer.hpp" 44 | 45 | 46 | constexpr std::size_t particle_dimension = 3; 47 | constexpr std::size_t num_runs = 10; 48 | 49 | const std::size_t num_particles = 700000; 50 | const std::size_t num_query_groups_xy = 128; 51 | const std::size_t query_group_size_xy = 8; 52 | 53 | constexpr std::size_t max_retrieved_particles = 8; 54 | 55 | using tree_type = spatialcl::hilbert_bvh_sp3d_tree; 56 | using type_system = tree_type::type_system; 57 | using scalar = type_system::scalar; 58 | 59 | // Define queries 60 | using strict_dfs_range_engine = 61 | spatialcl::query::strict_dfs_range_query_engine; 63 | 64 | using relaxed_dfs_range_engine = 65 | spatialcl::query::relaxed_dfs_range_query_engine; 67 | 68 | template 69 | using grouped_dfs_range_engine = 70 | spatialcl::query::grouped_dfs_range_query_engine; 73 | 74 | 75 | using particle_type = spatialcl::configuration::particle_type; 76 | using vector_type = spatialcl::configuration::vector_type; 77 | constexpr std::size_t dimension = type_system::dimension; 78 | 79 | template 80 | void run_benchmark(const std::string& name, 81 | const qcl::device_context_ptr& ctx, 82 | const tree_type& tree, 83 | const qcl::device_array& queries_min, 84 | const qcl::device_array& queries_max, 85 | qcl::device_array& result, 86 | qcl::device_array& num_results) 87 | { 88 | Query_engine query_engine; 89 | 90 | typename Query_engine::handler_type query_handler { 91 | queries_min.get_buffer(), 92 | queries_max.get_buffer(), 93 | result.get_buffer(), 94 | num_results.get_buffer(), 95 | queries_min.size() 96 | }; 97 | 98 | common::timer t; 99 | // Execute the query once before measuring to make sure 100 | // we do not take into account kernel compilation times. 101 | query_engine(tree, query_handler); 102 | 103 | cl_int err = ctx->get_command_queue().finish(); 104 | qcl::check_cl_error(err, "Error while executing range query"); 105 | 106 | t.start(); 107 | for (std::size_t run = 0; run < num_runs; ++run) 108 | { 109 | query_engine(tree, query_handler); 110 | 111 | err = ctx->get_command_queue().finish(); 112 | qcl::check_cl_error(err, "Error while executing range query"); 113 | } 114 | double time = t.stop(); 115 | 116 | std::size_t total_num_retrieved_particles = 0; 117 | std::vector host_num_particles; 118 | num_results.read(host_num_particles); 119 | 120 | for(const cl_uint n: host_num_particles) 121 | total_num_retrieved_particles += n; 122 | 123 | 124 | std::cout << "Benchmark " << name << " completed in " 125 | << time << "s => " << num_runs * queries_min.size() / time << " Queries/s" 126 | << " (retrieved: " << total_num_retrieved_particles << " particles)" << std::endl; 127 | } 128 | 129 | int main() 130 | { 131 | common::environment env; 132 | qcl::device_context_ptr ctx = env.get_device_context(); 133 | 134 | // Setup tree 135 | 136 | std::vector particles; 137 | common::random_vectors rnd; 138 | rnd(num_particles, particles); 139 | 140 | tree_type gpu_tree{ctx, particles}; 141 | 142 | // Create queries 143 | std::size_t total_num_queries = 144 | num_query_groups_xy * num_query_groups_xy * query_group_size_xy * query_group_size_xy; 145 | 146 | std::vector query_ranges_min; 147 | std::vector query_ranges_max; 148 | 149 | double query_stepsize = 1.0 / static_cast(num_query_groups_xy * query_group_size_xy); 150 | scalar query_diameter = static_cast(3.0 * query_stepsize); 151 | 152 | for (std::size_t x = 0; x < num_query_groups_xy; ++x) 153 | { 154 | for (std::size_t y = 0; y < num_query_groups_xy; ++y) 155 | { 156 | for (std::size_t tile_x = 0; tile_x < query_group_size_xy; ++tile_x) 157 | { 158 | for (std::size_t tile_y = 0; tile_y < query_group_size_xy; ++tile_y) 159 | { 160 | std::size_t query_id_x = x * query_group_size_xy + tile_x; 161 | std::size_t query_id_y = y * query_group_size_xy + tile_y; 162 | 163 | scalar x_position = static_cast(query_id_x * query_stepsize); 164 | scalar y_position = static_cast(query_id_y * query_stepsize); 165 | 166 | vector_type query_min, query_max; 167 | 168 | query_min.s[0] = x_position; 169 | query_min.s[1] = y_position; 170 | query_min.s[2] = 0.0f; 171 | 172 | query_max.s[0] = x_position + query_diameter; 173 | query_max.s[1] = y_position + query_diameter; 174 | query_max.s[2] = query_diameter; 175 | 176 | query_ranges_min.push_back(query_min); 177 | query_ranges_max.push_back(query_max); 178 | 179 | } 180 | } 181 | } 182 | } 183 | 184 | qcl::device_array device_ranges_min{ctx, query_ranges_min}; 185 | qcl::device_array device_ranges_max{ctx, query_ranges_max}; 186 | qcl::device_array result_particles{ctx, 187 | total_num_queries*max_retrieved_particles}; 188 | qcl::device_array result_num_retrieved_particles{ctx, 189 | total_num_queries}; 190 | 191 | #define RUN_BENCHMARK(engine) \ 192 | run_benchmark(BOOST_PP_STRINGIZE(engine), \ 193 | ctx, \ 194 | gpu_tree, \ 195 | device_ranges_min, \ 196 | device_ranges_max, \ 197 | result_particles, \ 198 | result_num_retrieved_particles) 199 | 200 | RUN_BENCHMARK(strict_dfs_range_engine); 201 | RUN_BENCHMARK(relaxed_dfs_range_engine); 202 | //RUN_BENCHMARK(grouped_dfs_range_engine<16>); 203 | RUN_BENCHMARK(grouped_dfs_range_engine<32>); 204 | RUN_BENCHMARK(grouped_dfs_range_engine<64>); 205 | RUN_BENCHMARK(grouped_dfs_range_engine<128>); 206 | RUN_BENCHMARK(grouped_dfs_range_engine<256>); 207 | RUN_BENCHMARK(grouped_dfs_range_engine<512>); 208 | 209 | return 0; 210 | } 211 | -------------------------------------------------------------------------------- /common/environment.hpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * This file is part of SpatialCL, a library for the spatial processing of 4 | * particles. 5 | * 6 | * Copyright (c) 2017 Aksel Alpay 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions are met: 11 | * 12 | * 1. Redistributions of source code must retain the above copyright notice, this 13 | * list of conditions and the following disclaimer. 14 | * 2. Redistributions in binary form must reproduce the above copyright notice, 15 | * this list of conditions and the following disclaimer in the documentation 16 | * and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef COMMON_ENVIRONMENT_HPP 31 | #define COMMON_ENVIRONMENT_HPP 32 | 33 | #include 34 | #include 35 | 36 | namespace common { 37 | 38 | class environment 39 | { 40 | public: 41 | environment() 42 | { 43 | const cl::Platform& platform = _env.get_platform_by_preference({"NVIDIA", 44 | "AMD", 45 | "Intel"}); 46 | qcl::global_context_ptr global_ctx = _env.create_global_context(platform, 47 | CL_DEVICE_TYPE_GPU); 48 | if(global_ctx->get_num_devices() == 0) 49 | throw std::runtime_error("No available OpenCL devices!"); 50 | 51 | _ctx = global_ctx->device(); 52 | 53 | std::cout << "Using OpenCL device:\n"; 54 | std::cout << " Vendor: " << _ctx->get_device_vendor() << std::endl; 55 | std::cout << " Device name: " << _ctx->get_device_name() << std::endl; 56 | std::cout << "via Platform:\n"; 57 | std::cout << " Vendor: " << _env.get_platform_vendor(platform) << std::endl; 58 | std::cout << " Name: " << _env.get_platform_name(platform) << std::endl; 59 | 60 | } 61 | 62 | const qcl::device_context_ptr& get_device_context() const 63 | { 64 | return _ctx; 65 | } 66 | 67 | private: 68 | qcl::environment _env; 69 | qcl::device_context_ptr _ctx; 70 | }; 71 | 72 | } 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /common/random_vectors.hpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * This file is part of SpatialCL, a library for the spatial processing of 4 | * particles. 5 | * 6 | * Copyright (c) 2017 Aksel Alpay 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions are met: 11 | * 12 | * 1. Redistributions of source code must retain the above copyright notice, this 13 | * list of conditions and the following disclaimer. 14 | * 2. Redistributions in binary form must reproduce the above copyright notice, 15 | * this list of conditions and the following disclaimer in the documentation 16 | * and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef RANDOM_VECTORS_HPP 31 | #define RANDOM_VECTORS_HPP 32 | 33 | #include 34 | #include 35 | 36 | namespace common { 37 | 38 | template 40 | class random_vectors 41 | { 42 | public: 43 | using vector_type = typename spatialcl::cl_vector_type 44 | < 45 | Scalar_type, 46 | Num_dimensions 47 | >::value; 48 | random_vectors(std::size_t seed = 1245) 49 | : _generator{seed} 50 | {} 51 | 52 | 53 | void operator()(std::size_t num_particles, 54 | std::vector& out, 55 | Scalar_type min_coordinates = 0.0f, 56 | Scalar_type max_coordinates = 1.0f) 57 | { 58 | out.clear(); 59 | 60 | std::uniform_real_distribution distribution{ 61 | min_coordinates, max_coordinates 62 | }; 63 | 64 | for(std::size_t i = 0; i < num_particles; ++i) 65 | { 66 | vector_type v = {}; 67 | 68 | for(std::size_t j = 0; j < Num_dimensions; ++j) 69 | v.s[j] = distribution(_generator); 70 | out.push_back(v); 71 | } 72 | } 73 | private: 74 | std::mt19937 _generator; 75 | }; 76 | 77 | } 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /common/timer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef TIMER_HPP 30 | #define TIMER_HPP 31 | 32 | #include 33 | #include 34 | 35 | namespace common { 36 | 37 | class timer 38 | { 39 | public: 40 | timer() 41 | : _is_running{false} 42 | {} 43 | 44 | inline 45 | bool is_running() const 46 | {return _is_running;} 47 | 48 | void start() 49 | { 50 | _is_running = true; 51 | _start = std::chrono::high_resolution_clock::now(); 52 | } 53 | 54 | double stop() 55 | { 56 | if(!_is_running) 57 | return 0.0; 58 | 59 | _stop = std::chrono::high_resolution_clock::now(); 60 | _is_running = false; 61 | 62 | auto ticks = std::chrono::duration_cast(_stop - _start).count(); 63 | return static_cast(ticks) * 1.e-9; 64 | } 65 | 66 | private: 67 | using time_point_type = 68 | std::chrono::time_point; 69 | time_point_type _start; 70 | time_point_type _stop; 71 | 72 | bool _is_running; 73 | }; 74 | 75 | class cumulative_timer 76 | { 77 | public: 78 | cumulative_timer() 79 | { 80 | reset(); 81 | } 82 | 83 | void reset() 84 | { 85 | _num_runs = 0; 86 | _total_runtime = 0.0; 87 | } 88 | 89 | double get_average_runtime() const 90 | { 91 | if(_num_runs == 0) 92 | return 0.0; 93 | 94 | return _total_runtime / static_cast(_num_runs); 95 | } 96 | 97 | double get_total_runtime() const 98 | { 99 | return _total_runtime; 100 | } 101 | 102 | unsigned get_num_runs() const 103 | { 104 | return _num_runs; 105 | } 106 | 107 | void start() 108 | { 109 | _timer.start(); 110 | } 111 | 112 | void stop() 113 | { 114 | _total_runtime += _timer.stop(); 115 | _num_runs++; 116 | } 117 | 118 | private: 119 | double _total_runtime; 120 | unsigned _num_runs; 121 | timer _timer; 122 | }; 123 | 124 | } 125 | 126 | #endif 127 | -------------------------------------------------------------------------------- /common/verification_knn.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef VERIFICATION_KNN_HPP 30 | #define VERIFICATION_KNN_HPP 31 | 32 | #include 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | // This is necessary to use std::find with cl_float4's 39 | // which is required in the verification process 40 | bool operator==(const cl_float4& a, 41 | const cl_float4& b) 42 | { 43 | return a.s[0] == b.s[0] && 44 | a.s[1] == b.s[1] && 45 | a.s[2] == b.s[2]; 46 | } 47 | 48 | // This is necessary to use std::find with cl_float4's 49 | // which is required in the verification process 50 | bool operator==(const cl_float2& a, 51 | const cl_float2& b) 52 | { 53 | return a.s[0] == b.s[0] && 54 | a.s[1] == b.s[1]; 55 | } 56 | 57 | namespace common { 58 | namespace verification { 59 | 60 | /// Verifies KNN queries by comparing results to the query results 61 | /// obtained from a naive (and inefficient) algorithm run on the CPU. 62 | template 63 | class naive_cpu_knn_verifier 64 | { 65 | public: 66 | using particle_type = 67 | typename spatialcl::configuration::particle_type; 68 | using vector_type = 69 | typename spatialcl::configuration::vector_type; 70 | using scalar = 71 | typename spatialcl::configuration::scalar; 72 | 73 | static constexpr std::size_t dimension = Type_descriptor::dimension; 74 | 75 | naive_cpu_knn_verifier(const std::vector& query_points) 76 | : _query_points{query_points} 77 | { 78 | } 79 | 80 | /// Verifies the result of a KNN query 81 | /// \return The number of detected wrong results 82 | /// \param particles The particle set of which the KNN should be found 83 | /// \param results The results of KNN queries described by the set of 84 | /// query points supplied in the constructor. Assumes that the memory layout 85 | /// is such that the i-th result of query j is located at 86 | /// \c result[j*K + i]. 87 | std::size_t operator()(const std::vector& particles, 88 | const std::vector& results) const 89 | { 90 | assert(results.size() == this->_query_points.size() * K); 91 | 92 | std::size_t num_errors = 0; 93 | for(std::size_t i = 0; i < _query_points.size(); ++i) 94 | { 95 | vector_type query = _query_points[i]; 96 | 97 | std::array knn = 98 | this->naive_knn_query(particles, query); 99 | 100 | auto results_begin = results.begin() + i * K; 101 | auto results_end = results_begin + K; 102 | 103 | for(particle_type p : knn) 104 | if(std::find(results_begin, results_end, p) == results_end) 105 | ++num_errors; 106 | 107 | } 108 | return num_errors; 109 | } 110 | 111 | private: 112 | 113 | std::array 114 | naive_knn_query(const std::vector& particles, 115 | vector_type query_point) const 116 | { 117 | 118 | std::array result; 119 | if(particles.size() < K) 120 | { 121 | std::copy(particles.begin(), particles.end(), result.begin()); 122 | return result; 123 | } 124 | 125 | std::array distances2; 126 | unsigned max_distance_idx = 0; 127 | 128 | for(std::size_t i = 0; i < K; ++i) 129 | distances2[i] = std::numeric_limits::max(); 130 | 131 | for(const auto p: particles) 132 | { 133 | particle_type delta = query_point; 134 | for(std::size_t i = 0; i < dimension; ++i) 135 | delta.s[i] -= p.s[i]; 136 | 137 | scalar dist2 = 0.0f; 138 | for(std::size_t i = 0; i < dimension; ++i) 139 | dist2 += delta.s[i] * delta.s[i]; 140 | 141 | if(dist2 < distances2[max_distance_idx]) 142 | { 143 | result[max_distance_idx] = p; 144 | distances2[max_distance_idx] = dist2; 145 | 146 | max_distance_idx = std::distance(distances2.begin(), 147 | std::max_element(distances2.begin(), 148 | distances2.end())); 149 | } 150 | } 151 | 152 | return result; 153 | } 154 | 155 | std::vector _query_points; 156 | }; 157 | 158 | } 159 | } 160 | 161 | #endif 162 | -------------------------------------------------------------------------------- /common/verification_range.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef VERIFICATION_RANGE_HPP 30 | #define VERIFICATION_RANGE_HPP 31 | 32 | #include 33 | 34 | #include 35 | #include 36 | 37 | namespace common { 38 | namespace verification { 39 | 40 | template 41 | class naive_cpu_range_verifier 42 | { 43 | public: 44 | using particle_type = 45 | typename spatialcl::configuration::particle_type; 46 | using vector_type = 47 | typename spatialcl::configuration::vector_type; 48 | using scalar = 49 | typename spatialcl::configuration::scalar; 50 | 51 | static constexpr std::size_t dimension = Type_descriptor::dimension; 52 | 53 | naive_cpu_range_verifier(const std::vector& queries_min, 54 | const std::vector& queries_max, 55 | const std::size_t max_num_retrieved_particles) 56 | : _queries_min{queries_min}, 57 | _queries_max{queries_max}, 58 | _max_retrieved_particles{max_num_retrieved_particles} 59 | { 60 | assert(_queries_min.size() == _queries_max.size()); 61 | } 62 | 63 | /// Verifies the result of a range query 64 | /// \return The number of detected wrong results 65 | /// (both incomplete and incorrect results are counted) 66 | /// \param particles The particle set 67 | /// \param results The results of range queries described by the set of 68 | /// ranges supplied in the constructor. Assumes that the memory layout 69 | /// is such that the i-th result of query j is located at 70 | /// \c result[j*K + i]. 71 | /// \param A vector in which the i-th entry is the number of found 72 | /// particles in the given range. 73 | std::size_t operator()(const std::vector& particles, 74 | const std::vector& results, 75 | const std::vector& num_results) const 76 | { 77 | std::size_t num_errors = 0; 78 | assert(_queries_min.size() == num_results.size()); 79 | assert(results.size() == _queries_min.size()*_max_retrieved_particles); 80 | 81 | for(std::size_t i = 0; i < _queries_min.size(); ++i) 82 | { 83 | std::size_t num_particles = num_results[i]; 84 | for(std::size_t j = 0; j < num_particles; ++j) 85 | { 86 | particle_type particle = results[i*_max_retrieved_particles + j]; 87 | if(!is_particle_within_box(particle, _queries_min[i], _queries_max[i])) 88 | ++num_errors; 89 | } 90 | std::size_t correct_num_particles = 91 | get_num_particles_in_range(particles, 92 | _queries_min[i], 93 | _queries_max[i]); 94 | correct_num_particles = std::min(correct_num_particles, 95 | _max_retrieved_particles); 96 | if(num_particles != correct_num_particles) 97 | { 98 | ++num_errors; 99 | } 100 | } 101 | return num_errors; 102 | } 103 | private: 104 | bool is_particle_within_box(particle_type particle, 105 | vector_type box_min, 106 | vector_type box_max) const 107 | { 108 | for(std::size_t k = 0; k < dimension; ++k) 109 | if( particle.s[k] < box_min.s[k] 110 | || particle.s[k] > box_max.s[k]) 111 | return false; 112 | return true; 113 | } 114 | 115 | std::size_t get_num_particles_in_range(const std::vector& particles, 116 | vector_type query_range_min, 117 | vector_type query_range_max) const 118 | { 119 | std::size_t counter = 0; 120 | 121 | for(std::size_t i = 0; i < particles.size(); ++i) 122 | { 123 | if(is_particle_within_box(particles[i], 124 | query_range_min, 125 | query_range_max)) 126 | ++counter; 127 | } 128 | 129 | return counter; 130 | } 131 | 132 | std::vector _queries_min; 133 | std::vector _queries_max; 134 | 135 | const std::size_t _max_retrieved_particles; 136 | }; 137 | 138 | } 139 | } 140 | 141 | #endif 142 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | subdirs(nbody) 2 | -------------------------------------------------------------------------------- /examples/nbody/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(nbody nbody.cpp) 2 | target_link_libraries (nbody ${OpenCL_LIBRARIES} -lpng) 3 | -------------------------------------------------------------------------------- /examples/nbody/model.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef NBODY_MODEL_HPP 30 | #define NBODY_MODEL_HPP 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | #include "nbody.hpp" 37 | 38 | #include 39 | 40 | namespace nbody { 41 | namespace model { 42 | 43 | template 44 | class random_particle_cloud 45 | { 46 | public: 47 | using host_vector3d = std::array; 48 | 49 | random_particle_cloud(const host_vector3d& position, 50 | const host_vector3d& width, 51 | Scalar mean_mass, 52 | Scalar mass_distribution_width, 53 | const host_vector3d& mean_velocity, 54 | const host_vector3d& velocity_distribution_width) 55 | : _generator{generate_seed()} 56 | { 57 | for(std::size_t i = 0; i < 3; ++i) 58 | { 59 | _means[i] = position[i]; 60 | _stddevs[i] = width[i]; 61 | } 62 | 63 | _means[3] = mean_mass; 64 | _stddevs[3] = mass_distribution_width; 65 | 66 | for(std::size_t i = 0; i < 3; ++i) 67 | { 68 | _means[i+4] = mean_velocity[i]; 69 | _stddevs[i+4] = velocity_distribution_width[i]; 70 | } 71 | } 72 | 73 | using particle_type = typename nbody_simulation::particle_type; 74 | 75 | void sample(std::size_t n, 76 | std::vector& out) 77 | { 78 | out.resize(n); 79 | 80 | std::array, 7> distributions; 81 | for(std::size_t i = 0; i < distributions.size(); ++i) 82 | { 83 | distributions[i] = std::normal_distribution{ 84 | _means[i], 85 | _stddevs[i] 86 | }; 87 | } 88 | 89 | for(std::size_t i = 0; i < n; ++i) 90 | { 91 | particle_type sampled_particle; 92 | for(std::size_t j = 0; j < distributions.size(); ++j) 93 | { 94 | sampled_particle.s[j] = distributions[j](_generator); 95 | } 96 | // Make sure the mass is positive 97 | sampled_particle.s[3] = std::abs(sampled_particle.s[3]); 98 | out[i] = sampled_particle; 99 | } 100 | } 101 | 102 | private: 103 | static std::size_t generate_seed() 104 | { 105 | std::random_device rd; 106 | return rd(); 107 | } 108 | 109 | std::array _means; 110 | std::array _stddevs; 111 | 112 | std::mt19937 _generator; 113 | }; 114 | 115 | } 116 | } 117 | 118 | #endif 119 | -------------------------------------------------------------------------------- /examples/nbody/nbody.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * This file is part of SpatialCL, a library for the spatial processing of 4 | * particles. 5 | * 6 | * Copyright (c) 2017, 2018 Aksel Alpay 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions are met: 11 | * 12 | * 1. Redistributions of source code must retain the above copyright notice, this 13 | * list of conditions and the following disclaimer. 14 | * 2. Redistributions in binary form must reproduce the above copyright notice, 15 | * this list of conditions and the following disclaimer in the documentation 16 | * and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include 37 | 38 | #include "model.hpp" 39 | #include "nbody.hpp" 40 | #include "particle_renderer.hpp" 41 | 42 | #include "../../common/timer.hpp" 43 | 44 | using scalar = float; 45 | using particle_type = 46 | typename nbody::nbody_simulation::particle_type; 47 | 48 | 49 | constexpr scalar final_time = 100.f; 50 | constexpr scalar dt = 0.1f; 51 | constexpr scalar opening_angle = 0.5f; 52 | 53 | constexpr std::array viewport_center{0.0f, 0.0f, 0.0f}; 54 | constexpr std::array viewport_width{400.f, 400.f}; 55 | 56 | /// Creates the initial model: two random particle clouds 57 | void create_model(std::vector& particles) 58 | { 59 | std::vector cloud0, cloud1; 60 | 61 | std::size_t cloud0_num_particles = 100000; 62 | std::size_t cloud1_num_particles = 150000; 63 | 64 | nbody::model::random_particle_cloud cloud0_sampler{ 65 | {0.0f, 100.0f, 0.0f}, // position 66 | {10.0f, 15.0f, 11.0f}, // stddev position 67 | 1.0f*100000.f/static_cast(cloud0_num_particles), // particle mass 68 | 0.1f*100000.f/static_cast(cloud0_num_particles), // stddev particle mass 69 | {0.0f, -26.0f, 5.0f}, // velocity 70 | {5.0f, 20.0f, 12.f} // stddev velocity 71 | }; 72 | 73 | nbody::model::random_particle_cloud cloud1_sampler{ 74 | {50.0f, 5.0f, 0.0f}, // position 75 | {17.0f, 7.0f, 5.0f}, // stddev position 76 | 1.3f*150000.f/static_cast(cloud1_num_particles), // particle mass 77 | 0.2f*150000.f/static_cast(cloud1_num_particles), // stddev particle mass 78 | {-5.f, 20.0f, 1.0f}, // velocity 79 | {18.0f, 11.f, 8.f} // stddev velocity 80 | }; 81 | 82 | cloud0_sampler.sample(cloud0_num_particles, cloud0); 83 | cloud1_sampler.sample(cloud1_num_particles, cloud1); 84 | // Concatenate data 85 | particles.clear(); 86 | for(const auto& p : cloud0) 87 | { 88 | particles.push_back(p); 89 | } 90 | for(const auto& p : cloud1) 91 | { 92 | particles.push_back(p); 93 | } 94 | } 95 | 96 | void save_state(const qcl::device_array& particles, 97 | std::size_t step_id) 98 | { 99 | std::string filename = "nbody_"+std::to_string(step_id)+".dat"; 100 | 101 | std::vector data; 102 | particles.read(data); 103 | 104 | std::ofstream file{filename.c_str(), std::ios::trunc}; 105 | 106 | if(file.is_open()) 107 | { 108 | file << "# Format: x y z mass\n"; 109 | for(const particle_type& p : data) 110 | { 111 | file << p.s[0] << "\t " 112 | << p.s[1] << "\t " 113 | << p.s[2] << "\t " 114 | << p.s[3] << "\n"; 115 | } 116 | } 117 | else 118 | std::cout << "Warning: could not save state" << std::endl; 119 | } 120 | 121 | int main() 122 | { 123 | try 124 | { 125 | qcl::environment env; 126 | const cl::Platform& platform = 127 | env.get_platform_by_preference({"NVIDIA", 128 | "AMD", 129 | "Intel"}); 130 | 131 | qcl::global_context_ptr global_ctx = 132 | env.create_global_context(platform, 133 | CL_DEVICE_TYPE_GPU); 134 | 135 | if(global_ctx->get_num_devices() == 0) 136 | throw std::runtime_error("No available OpenCL devices!"); 137 | 138 | qcl::device_context_ptr ctx = global_ctx->device(); 139 | 140 | std::cout << "Using device: " << ctx->get_device_name() 141 | << " (Vendor: " << ctx->get_device_vendor() 142 | << ")" << std::endl; 143 | 144 | // Create model: two random particle clouds 145 | std::vector particles; 146 | create_model(particles); 147 | std::size_t total_num_particles = particles.size(); 148 | std::size_t num_interactions = total_num_particles * total_num_particles; 149 | 150 | // Copy particles to the GPU 151 | qcl::device_array device_particles{ctx, particles}; 152 | 153 | nbody::nbody_simulation simulation{ctx, device_particles}; 154 | nbody::particle_renderer renderer{ctx, 512, 512}; 155 | 156 | common::timer t; 157 | 158 | std::size_t step_id = 0; 159 | for(;simulation.get_current_time() < final_time; 160 | ++step_id) 161 | { 162 | std::cout << "t = " << simulation.get_current_time() << std::endl; 163 | std::cout << " Time step..." << std::endl; 164 | 165 | t.start(); 166 | simulation.time_step(opening_angle, dt); 167 | ctx->get_command_queue().finish(); 168 | double time = t.stop(); 169 | std::cout << " Current performance: " << num_interactions / time * 1.e-9 170 | << " Ginteractions/s" << std::endl; 171 | 172 | std::cout << " Rendering particles..." << std::endl; 173 | renderer.render(device_particles, 174 | viewport_center, viewport_width); 175 | std::cout << " Saving image..." << std::endl; 176 | 177 | renderer.save_png(std::string("nbody_") 178 | +std::to_string(step_id) 179 | +".png"); 180 | 181 | if(step_id % 100 == 0) 182 | { 183 | // Save state as text file 184 | std::cout << " Saving state..." << std::endl; 185 | save_state(device_particles, step_id); 186 | } 187 | 188 | std::cout << " Step complete." << std::endl; 189 | 190 | } 191 | std::cout << "Saving final state..." << std::endl; 192 | save_state(device_particles, step_id); 193 | 194 | } 195 | catch(std::exception& e) 196 | { 197 | std::cout << "Fatal error: " << e.what() << std::endl; 198 | } 199 | return 0; 200 | } 201 | 202 | -------------------------------------------------------------------------------- /examples/nbody/nbody.hpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * This file is part of SpatialCL, a library for the spatial processing of 4 | * particles. 5 | * 6 | * Copyright (c) 2017, 2018 Aksel Alpay 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions are met: 11 | * 12 | * 1. Redistributions of source code must retain the above copyright notice, this 13 | * list of conditions and the following disclaimer. 14 | * 2. Redistributions in binary form must reproduce the above copyright notice, 15 | * this list of conditions and the following disclaimer in the documentation 16 | * and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef NBODY_HPP 31 | #define NBODY_HPP 32 | 33 | #include 34 | #include 35 | #include 36 | 37 | #include 38 | #include 39 | #include 40 | 41 | #include 42 | #include 43 | 44 | #include "nbody_tree.hpp" 45 | 46 | 47 | namespace nbody { 48 | 49 | template 50 | class nbody_query_handler : public spatialcl::query::basic_query 51 | { 52 | public: 53 | QCL_MAKE_MODULE(nbody_query_handler) 54 | 55 | /// \param evaluation_particles particles at whose location the 56 | /// acceleration should be calculated 57 | /// \param accelerations output buffer for the accelerations, 58 | /// must have \c num_particles entries of \c vector_type 59 | /// \param num_particles The number of particles in the \c 60 | /// evaluation_particles buffer. 61 | /// \param The opening angle for the tree walk 62 | nbody_query_handler(const cl::Buffer& evaluation_particles, 63 | std::size_t num_particles, 64 | Scalar opening_angle, 65 | const cl::Buffer& accelerations) 66 | : _eval_particles{evaluation_particles}, 67 | _accelerations{accelerations}, 68 | _eval_num_particles{num_particles}, 69 | _opening_angle_squared{opening_angle * opening_angle} 70 | { 71 | assert(opening_angle > 0.0f); 72 | } 73 | 74 | virtual void push_full_arguments(qcl::kernel_call& call) override 75 | { 76 | call.partial_argument_list(_eval_particles, 77 | _accelerations, 78 | static_cast(_eval_num_particles), 79 | _opening_angle_squared); 80 | } 81 | 82 | virtual std::size_t get_num_independent_queries() const override 83 | { 84 | return _eval_num_particles; 85 | } 86 | 87 | virtual ~nbody_query_handler(){} 88 | private: 89 | cl::Buffer _eval_particles; 90 | cl::Buffer _accelerations; 91 | 92 | std::size_t _eval_num_particles; 93 | Scalar _opening_angle_squared; 94 | public: 95 | QCL_MAKE_SOURCE( 96 | QCL_INCLUDE_MODULE(spatialcl::configuration>) 97 | QCL_PREPROCESSOR(define, gravitational_softening_squared 1.e-4f) 98 | QCL_PREPROCESSOR(define, 99 | dfs_node_selector(selection_result_ptr, 100 | current_node_key_ptr, 101 | node_index, 102 | bbox_min_corner, 103 | bbox_max_corner) 104 | { 105 | scalar node_width = bbox_max_corner.w; 106 | vector_type delta = bbox_min_corner - evaluation_position; 107 | scalar r2 = VECTOR_NORM2(delta); 108 | *selection_result_ptr = (node_width*node_width > r2*opening_angle_squared); 109 | } 110 | ) 111 | QCL_PREPROCESSOR(define, 112 | dfs_unique_node_discard_event(node_idx, 113 | bbox_min_corner, 114 | bbox_max_corner) 115 | { 116 | // Evaluate monopole 117 | vector_type delta = bbox_min_corner - evaluation_position; 118 | scalar r2 = VECTOR_NORM2(delta); 119 | 120 | acceleration.s012 += 121 | native_divide(bbox_min_corner.w * fast_normalize(delta.s012), r2+gravitational_softening_squared); 122 | } 123 | ) 124 | QCL_PREPROCESSOR(define, 125 | dfs_particle_processor(selection_result_ptr, 126 | particle_idx, 127 | current_particle) 128 | { 129 | // Add particle's contribution 130 | vector_type delta = PARTICLE_POSITION(current_particle)-evaluation_position; 131 | scalar r2 = VECTOR_NORM2(delta); 132 | vector_type contribution; 133 | contribution.s012 = 134 | current_particle.s3 * delta.s012 * rsqrt(r2) / 135 | (r2 + gravitational_softening_squared); 136 | // This ensures that the force is not calculated from 137 | // the particle to itself 138 | if(particle_idx != get_query_id()) 139 | acceleration += contribution; 140 | 141 | // For the relaxed and grouped dfs engine, we need to avoid remaining 142 | // at the lowest node indefinitely. We ensure this by 143 | // deselecting particles. This causes the query engine to go 144 | // up again if we are at a right node. 145 | *selection_result_ptr = 0; 146 | } 147 | ) 148 | R"( 149 | #define declare_full_query_parameter_set() \ 150 | __global particle_type* evaluated_particles, \ 151 | __global vector_type* accelerations, \ 152 | ulong num_evaluated_particles, \ 153 | scalar opening_angle_squared 154 | )" 155 | QCL_PREPROCESSOR(define, 156 | at_query_init() 157 | vector_type evaluation_position; 158 | if(get_query_id() < num_evaluated_particles) 159 | evaluation_position = PARTICLE_POSITION(evaluated_particles[get_query_id()]); 160 | vector_type acceleration = (vector_type)0.0f; 161 | ) 162 | QCL_PREPROCESSOR(define, 163 | at_query_exit() 164 | if(get_query_id() < num_evaluated_particles) 165 | accelerations[get_query_id()] = acceleration; 166 | ) 167 | QCL_PREPROCESSOR(define, 168 | get_num_queries() 169 | num_evaluated_particles 170 | ) 171 | ) 172 | }; 173 | 174 | template 175 | class nbody_integrator 176 | { 177 | public: 178 | nbody_integrator(const qcl::device_context_ptr& ctx) 179 | : _ctx{ctx}, _previous_dt{0.0f}, _t{0.0f} 180 | {} 181 | 182 | void advance(const cl::Buffer& particles, 183 | const cl::Buffer& accelerations, 184 | std::size_t num_particles, 185 | Scalar dt) 186 | { 187 | cl_int err = 188 | leapfrog_advance(_ctx, 189 | cl::NDRange{num_particles}, 190 | cl::NDRange{256})( 191 | particles, 192 | accelerations, 193 | static_cast(num_particles), 194 | _previous_dt, 195 | dt); 196 | qcl::check_cl_error(err, "Could not enqueue leapfrog_advance kernel"); 197 | 198 | err = _ctx->get_command_queue().finish(); 199 | qcl::check_cl_error(err, "Error while waiting for leapfrog_advance kernel" 200 | " to finish."); 201 | 202 | _t += dt; 203 | 204 | _previous_dt = dt; 205 | } 206 | 207 | Scalar get_current_time() const 208 | { 209 | return _t; 210 | } 211 | 212 | private: 213 | qcl::device_context_ptr _ctx; 214 | Scalar _previous_dt; 215 | Scalar _t; 216 | 217 | QCL_MAKE_MODULE(nbody_integrator) 218 | QCL_ENTRYPOINT(leapfrog_advance) 219 | QCL_MAKE_SOURCE( 220 | QCL_INCLUDE_MODULE(spatialcl::configuration>) 221 | QCL_RAW( 222 | __kernel void leapfrog_advance(__global particle_type* particles, 223 | __global vector_type* accelerations, 224 | ulong num_particles, 225 | scalar previous_dt, 226 | scalar dt) 227 | { 228 | size_t tid = get_global_id(0); 229 | 230 | if(tid < num_particles) 231 | { 232 | particle_type p = particles[tid]; 233 | vector_type acceleration = accelerations[tid]; 234 | 235 | // Bring v to the current state 236 | p.s456 += acceleration.s012 * previous_dt * 0.5f; 237 | 238 | // Calculate v_i+1/2 239 | p.s456 += acceleration.s012 * dt * 0.5f; 240 | // Update position 241 | p.s012 += p.s456 * dt; 242 | // We need to update the velocities to i+1, 243 | // but for performance reasons we do this always 244 | // at the beginning of a step 245 | 246 | // Save result 247 | particles[tid] = p; 248 | } 249 | } 250 | 251 | ) 252 | ) 253 | }; 254 | 255 | 256 | 257 | template 258 | class nbody_simulation 259 | { 260 | public: 261 | using nbody_tree_ptr = std::unique_ptr>; 262 | 263 | using particle_type = 264 | typename spatialcl::configuration>::particle_type; 265 | using vector_type = 266 | typename spatialcl::configuration>::vector_type; 267 | using host_vector3d = std::array; 268 | 269 | static particle_type encode_particle(vector_type position, 270 | Scalar mass, 271 | vector_type velocity) 272 | { 273 | particle_type result; 274 | for(std::size_t i = 0; i < 3; ++i) 275 | result.s[i] = position.s[i]; 276 | 277 | result.s[3] = mass; 278 | 279 | for(std::size_t i = 0; i < 3; ++i) 280 | result.s[i+4] = velocity.s[i]; 281 | 282 | return result; 283 | } 284 | 285 | 286 | nbody_simulation(const qcl::device_context_ptr& ctx, 287 | const qcl::device_array& initial_particles) 288 | : _ctx{ctx}, 289 | _particles{initial_particles}, 290 | _integrator{ctx}, 291 | _acceleration{ctx, initial_particles.size()} 292 | {} 293 | 294 | void time_step(Scalar opening_angle, 295 | Scalar dt = 0.1f) 296 | { 297 | // Calculate acceleration 298 | // - Build tree 299 | // -- Discard old tree 300 | _tree.reset(nullptr); 301 | // -- Create new one 302 | _tree = nbody_tree_ptr{new nbody_tree{_ctx, _particles}}; 303 | 304 | // - Query tree to obtain accelerations 305 | // -- Define queries 306 | using query_handler = nbody_query_handler; 307 | using query_engine = 308 | spatialcl::query::grouped_dfs_query_engine< 309 | nbody_tree, 310 | query_handler, 311 | 32 312 | >; 313 | 314 | query_engine engine; 315 | query_handler handler{ 316 | _particles.get_buffer(), 317 | _particles.size(), 318 | opening_angle, 319 | _acceleration.get_buffer() 320 | }; 321 | // -- Execute query to obtain accelerations 322 | cl_int err = engine(*_tree, handler); 323 | qcl::check_cl_error(err, "Error during tree query!"); 324 | 325 | // Perform time integration 326 | _integrator.advance(_particles.get_buffer(), 327 | _acceleration.get_buffer(), 328 | _particles.size(), 329 | dt); 330 | } 331 | 332 | Scalar get_current_time() const 333 | { 334 | return _integrator.get_current_time(); 335 | } 336 | 337 | void retrieve_results(std::vector& positions, 338 | std::vector& velocities, 339 | std::vector& masses) const 340 | { 341 | std::vector raw_data; 342 | _particles.read(raw_data); 343 | 344 | positions.resize(_particles.size()); 345 | velocities.resize(_particles.size()); 346 | masses.resize(_particles.size()); 347 | 348 | assert(raw_data.size() == _particles.size()); 349 | for(std::size_t i = 0; i < raw_data.size(); ++i) 350 | { 351 | for(std::size_t j = 0; j < 3; ++j) 352 | positions[i][j] = raw_data[i].s[j]; 353 | 354 | masses[i] = raw_data[i].s[3]; 355 | 356 | for(std::size_t j = 0; j < 3; ++j) 357 | velocities[i][j] = raw_data[i].s[4+j]; 358 | } 359 | } 360 | private: 361 | qcl::device_context_ptr _ctx; 362 | nbody_tree_ptr _tree; 363 | qcl::device_array _particles; 364 | 365 | nbody_integrator _integrator; 366 | 367 | qcl::device_array _acceleration; 368 | }; 369 | 370 | } 371 | 372 | #endif 373 | -------------------------------------------------------------------------------- /examples/nbody/nbody_tree.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef NBODY_TREE 30 | #define NBODY_TREE 31 | 32 | #include 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | #include 39 | #include 40 | #include 41 | 42 | namespace nbody { 43 | 44 | // We treat particles as having 8 components in order to save the particle 45 | // velocity directly with the particle 46 | template 47 | using nbody_type_descriptor = spatialcl::type_descriptor::generic; 48 | 49 | template 50 | using hilbert_sorter = 51 | spatialcl::key_based_sorter< 52 | spatialcl::hilbert_sort_key_generator< 53 | nbody_type_descriptor 54 | > 55 | >; 56 | 57 | template 58 | using nbody_basic_tree = spatialcl::particle_tree 59 | < 60 | hilbert_sorter, 61 | nbody_type_descriptor, 62 | typename spatialcl::configuration>::vector_type, 63 | typename spatialcl::configuration>::vector_type 64 | >; 65 | 66 | template 67 | class nbody_tree : public nbody_basic_tree 68 | { 69 | public: 70 | QCL_MAKE_MODULE(nbody_tree) 71 | 72 | using particle_type = 73 | typename spatialcl::configuration>::particle_type; 74 | using vector_type = 75 | typename spatialcl::configuration>::vector_type; 76 | 77 | 78 | nbody_tree(const qcl::device_context_ptr& ctx, 79 | const qcl::device_array& particles) 80 | : nbody_basic_tree{ctx, particles}, _ctx{ctx} 81 | { 82 | // Need at least two particles for the tree 83 | assert(particles.size() > 2); 84 | this->init_multipoles(); 85 | } 86 | 87 | private: 88 | void init_multipoles() 89 | { 90 | assert(this->get_num_node_levels() > 0); 91 | // Build monopoles for lowest level. 92 | 93 | // First, build lowest level 94 | cl_int err = this->build_ll_monopoles(_ctx, 95 | cl::NDRange{this->get_num_particles()}, 96 | cl::NDRange{256}) 97 | (this->get_node_values0(), 98 | this->get_node_values1(), 99 | this->get_sorted_particles(), 100 | static_cast(this->get_num_particles())); 101 | 102 | qcl::check_cl_error(err, "Could not enqueue build_ll_monopoles kernel"); 103 | 104 | for(int level = this->get_num_node_levels()-2; level >= 0; --level) 105 | { 106 | // Build higher levels 107 | err = this->build_monopoles(_ctx, 108 | cl::NDRange{1ul << level}, 109 | cl::NDRange{256}) 110 | (this->get_node_values0(), 111 | this->get_node_values1(), 112 | static_cast(level), 113 | static_cast(this->get_num_particles()), 114 | static_cast(this->get_effective_num_particles()), 115 | static_cast(this->get_effective_num_levels())); 116 | qcl::check_cl_error(err, "Could not enqueue build_monopoles kernel"); 117 | } 118 | err = _ctx->get_command_queue().finish(); 119 | qcl::check_cl_error(err,"Error while waiting for build_monopoles kernel to finish"); 120 | } 121 | 122 | qcl::device_context_ptr _ctx; 123 | 124 | 125 | QCL_ENTRYPOINT(build_ll_monopoles) 126 | QCL_ENTRYPOINT(build_monopoles) 127 | QCL_MAKE_SOURCE( 128 | QCL_INCLUDE_MODULE(spatialcl::configuration>) 129 | QCL_INCLUDE_MODULE(spatialcl::binary_tree) 130 | QCL_RAW ( 131 | // Build monopoles on lowest level 132 | __kernel void build_ll_monopoles(__global vector_type* monopoles, 133 | __global vector_type* node_widths, 134 | __global particle_type* particles, 135 | ulong num_particles) 136 | { 137 | ulong num_nodes = num_particles >> 1; 138 | if(num_particles & 1) 139 | ++num_nodes; 140 | 141 | for(ulong tid = get_global_id(0); 142 | tid < num_nodes; 143 | tid += get_global_size(0)) 144 | { 145 | ulong left_particle_idx = tid << 1; 146 | ulong right_particle_idx = left_particle_idx + 1; 147 | 148 | particle_type left_particle = particles[left_particle_idx]; 149 | particle_type right_particle = left_particle; 150 | if(right_particle_idx < num_particles) 151 | right_particle = particles[right_particle_idx]; 152 | 153 | vector_type monopole; 154 | monopole.xyz = left_particle.s3 * left_particle.s012; 155 | monopole.xyz += right_particle.s3 * right_particle.s012; 156 | 157 | scalar total_mass = left_particle.s3 + right_particle.s3; 158 | 159 | monopole.xyz /= total_mass; 160 | monopole.w = total_mass; 161 | 162 | vector_type node_extent; 163 | node_extent.xyz = fabs(left_particle.xyz - right_particle.xyz); 164 | node_extent.w = 0.33f * (node_extent.x + node_extent.y + node_extent.z); 165 | 166 | node_widths[tid] = node_extent; 167 | monopoles [tid] = monopole; 168 | 169 | } 170 | } 171 | // Build monopoles on higher levels 172 | __kernel void build_monopoles(__global vector_type* monopoles, 173 | __global vector_type* node_widths, 174 | uint current_level, 175 | ulong num_particles, 176 | ulong effective_num_particles, 177 | ulong effective_num_levels) 178 | { 179 | ulong num_nodes = BT_NUM_NODES(current_level); 180 | 181 | for(ulong tid = get_global_id(0); 182 | tid < num_nodes; 183 | tid += get_global_size(0)) 184 | { 185 | binary_tree_key_t node_key; 186 | binary_tree_key_init(&node_key, current_level, tid); 187 | 188 | if(binary_tree_is_node_used(&node_key, 189 | effective_num_levels, 190 | num_particles)) 191 | { 192 | binary_tree_key_t children_begin = binary_tree_get_children_begin(&node_key); 193 | binary_tree_key_t children_last = binary_tree_get_children_last (&node_key); 194 | 195 | int right_child_exists = binary_tree_is_node_used(&children_last, 196 | effective_num_levels, 197 | num_particles); 198 | 199 | ulong effective_child_idx = binary_tree_key_encode_global_id(&children_begin, 200 | effective_num_levels) 201 | - effective_num_particles; 202 | 203 | vector_type left_child_monopole = monopoles[effective_child_idx]; 204 | vector_type right_child_monopole = left_child_monopole; 205 | 206 | vector_type left_child_node_extent = node_widths[effective_child_idx]; 207 | vector_type right_child_node_extent = (vector_type)0.0f; 208 | 209 | if(right_child_exists) 210 | { 211 | right_child_monopole = monopoles[effective_child_idx + 1]; 212 | right_child_node_extent = node_widths[effective_child_idx + 1]; 213 | } 214 | scalar left_mass = left_child_monopole.w; 215 | scalar right_mass = right_child_monopole.w; 216 | // Calculate center of mass 217 | vector_type parent_monopole = 218 | left_mass * left_child_monopole + right_mass * right_child_monopole; 219 | scalar total_mass = left_mass + right_mass; 220 | parent_monopole.xyz /= total_mass; 221 | // Set total mass 222 | parent_monopole.w = total_mass; 223 | 224 | ulong effective_node_idx = binary_tree_key_encode_global_id(&node_key, 225 | effective_num_levels) 226 | - effective_num_particles; 227 | // Set result 228 | monopoles[effective_node_idx] = parent_monopole; 229 | 230 | vector_type node_extent; 231 | node_extent.xyz = fmax(left_child_monopole.xyz + 0.5f * left_child_node_extent.xyz, 232 | right_child_monopole.xyz + 0.5f * right_child_node_extent.xyz) 233 | - fmin(left_child_monopole.xyz - 0.5f * left_child_node_extent.xyz, 234 | right_child_monopole.xyz - 0.5f * right_child_node_extent.xyz); 235 | 236 | //node_extent.w = 3.f/(4.f * M_PI) * cbrt(node_extent.x * node_extent.y * node_extent.z); 237 | node_extent.w = fmax(node_extent.x, fmax(node_extent.y, node_extent.z)); 238 | node_widths[effective_node_idx] = node_extent; 239 | } 240 | } 241 | } 242 | ) 243 | ) 244 | }; 245 | 246 | 247 | } 248 | 249 | #endif 250 | -------------------------------------------------------------------------------- /examples/nbody/particle_renderer.hpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * This file is part of SpatialCL, a library for the spatial processing of 4 | * particles. 5 | * 6 | * Copyright (c) 2017, 2018 Aksel Alpay 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions are met: 11 | * 12 | * 1. Redistributions of source code must retain the above copyright notice, this 13 | * list of conditions and the following disclaimer. 14 | * 2. Redistributions in binary form must reproduce the above copyright notice, 15 | * this list of conditions and the following disclaimer in the documentation 16 | * and/or other materials provided with the distribution. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 22 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef PARTICLE_RENDERER_HPP 31 | #define PARTICLE_RENDERER_HPP 32 | 33 | #include 34 | #include 35 | 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include 42 | 43 | #include 44 | 45 | #include 46 | 47 | #include "nbody_tree.hpp" 48 | 49 | namespace nbody { 50 | 51 | template 52 | class particle_renderer 53 | { 54 | public: 55 | QCL_MAKE_MODULE(particle_renderer) 56 | 57 | using particle_type = 58 | typename spatialcl::configuration>::particle_type; 59 | 60 | 61 | particle_renderer(const qcl::device_context_ptr& ctx, 62 | std::size_t bins_x, 63 | std::size_t bins_y) 64 | : _ctx{ctx}, 65 | _bins_x{bins_x}, 66 | _bins_y{bins_y}, 67 | _rgb_histogram{ctx, bins_x * bins_y} 68 | { 69 | assert(bins_x > 0); 70 | assert(bins_y > 0); 71 | } 72 | 73 | void render(const qcl::device_array& particles, 74 | const std::array& render_plane_center, 75 | const std::array& render_plane_size) 76 | { 77 | // We do not use enqueueFillBuffer to reset the buffer to 0 78 | // because the pocl cuda backend currently does not support it. 79 | cl_int err = reset_buffer(_ctx, 80 | cl::NDRange{_bins_x * _bins_y}, 81 | cl::NDRange{128})(_rgb_histogram, 82 | static_cast(_bins_x*_bins_y)); 83 | 84 | qcl::check_cl_error(err, "Could not reset histogram buffer"); 85 | 86 | Scalar cell_size_x = render_plane_size[0] / static_cast(_bins_x); 87 | Scalar cell_size_y = render_plane_size[1] / static_cast(_bins_y); 88 | 89 | Scalar min_x, min_y; 90 | min_x = render_plane_center[0] - render_plane_size[0] / 2.0f; 91 | min_y = render_plane_center[1] - render_plane_size[1] / 2.0f; 92 | 93 | err = count_masses_per_bin(_ctx, 94 | cl::NDRange{particles.size()}, 95 | cl::NDRange{128})( 96 | particles, 97 | particles.size(), 98 | min_x, min_y, 99 | cell_size_x, cell_size_y, 100 | static_cast(_bins_x), static_cast(_bins_y), 101 | _rgb_histogram); 102 | 103 | qcl::check_cl_error(err, "Could not enqueue count_masses_per_bin kernel"); 104 | 105 | // Determine maximum bin value 106 | boost::compute::command_queue boost_queue{_ctx->get_command_queue().get()}; 107 | auto max_iterator = boost::compute::max_element( 108 | qcl::create_buffer_iterator(_rgb_histogram.get_buffer(), 0), 109 | qcl::create_buffer_iterator(_rgb_histogram.get_buffer(), _bins_x*_bins_y), 110 | boost_queue); 111 | 112 | cl_uint maximum_mass = max_iterator.read(boost_queue); 113 | if(maximum_mass == 0) 114 | maximum_mass = 1; 115 | // Calculate rgb values 116 | err = mass_to_rgb(_ctx, 117 | cl::NDRange{_bins_x * _bins_y}, 118 | cl::NDRange{128})( 119 | _rgb_histogram, 120 | static_cast(_bins_x * _bins_y), 121 | maximum_mass); 122 | 123 | qcl::check_cl_error(err, "Could not enqueue mass_to_rgb kernel"); 124 | 125 | err = _ctx->get_command_queue().finish(); 126 | 127 | qcl::check_cl_error(err, "Error while waiting for mass_to_rgb " 128 | "kernel to finish"); 129 | } 130 | 131 | void read_rendered_image(std::vector& result) const 132 | { 133 | _rgb_histogram.read(result); 134 | } 135 | 136 | void save_png(const std::string& filename) const 137 | { 138 | png::image image{ 139 | static_cast(_bins_x), 140 | static_cast(_bins_y) 141 | }; 142 | 143 | std::vector raw_data; 144 | read_rendered_image(raw_data); 145 | 146 | for (std::size_t y = 0; y < image.get_height(); ++y) 147 | { 148 | for (std::size_t x = 0; x < image.get_width(); ++x) 149 | { 150 | cl_uint pixel = raw_data[y * _bins_x + x]; 151 | cl_uchar* pixel_bytes = 152 | reinterpret_cast(&pixel); 153 | 154 | 155 | image[y][x] = png::rgb_pixel(pixel_bytes[0], 156 | pixel_bytes[1], 157 | pixel_bytes[2]); 158 | } 159 | } 160 | 161 | image.write(filename.c_str()); 162 | } 163 | 164 | 165 | std::size_t get_num_bins_x() const 166 | { 167 | return _bins_x; 168 | } 169 | 170 | std::size_t get_num_bins_y() const 171 | { 172 | return _bins_y; 173 | } 174 | 175 | static constexpr Scalar mass_quantum = 0.01f; 176 | 177 | private: 178 | QCL_ENTRYPOINT(reset_buffer) 179 | QCL_ENTRYPOINT(count_masses_per_bin) 180 | QCL_ENTRYPOINT(mass_to_rgb) 181 | QCL_MAKE_SOURCE( 182 | QCL_IMPORT_TYPE(particle_type) 183 | QCL_IMPORT_TYPE(Scalar) 184 | QCL_IMPORT_CONSTANT(mass_quantum) 185 | QCL_RAW( 186 | __kernel void reset_buffer(__global uint* buffer, 187 | ulong num_entries) 188 | { 189 | size_t tid = get_global_id(0); 190 | 191 | if(tid < num_entries) 192 | buffer[tid] = 0; 193 | } 194 | 195 | 196 | __kernel void count_masses_per_bin(__global particle_type* particles, 197 | ulong num_particles, 198 | Scalar grid_min_x, 199 | Scalar grid_min_y, 200 | Scalar cell_size_x, 201 | Scalar cell_size_y, 202 | ulong num_bins_x, 203 | ulong num_bins_y, 204 | __global volatile uint* output) 205 | { 206 | size_t tid = get_global_id(0); 207 | 208 | for(; tid < num_particles; 209 | tid += get_global_size(0)) 210 | { 211 | particle_type p = particles[tid]; 212 | 213 | uint discretized_mass = convert_uint_rte(p.s3 / mass_quantum); 214 | 215 | int bin_x = convert_int_rtn((p.s0 - grid_min_x) / cell_size_x); 216 | int bin_y = convert_int_rtn((p.s1 - grid_min_y) / cell_size_y); 217 | 218 | if(bin_x >= 0 && bin_x < num_bins_x) 219 | { 220 | if(bin_y >= 0 && bin_y < num_bins_y) 221 | { 222 | ulong bin_index = bin_y * num_bins_x + bin_x; 223 | atomic_add(&(output[bin_index]), discretized_mass); 224 | } 225 | } 226 | } 227 | } 228 | 229 | __kernel void mass_to_rgb(__global uint* discretized_masses, 230 | ulong total_num_bins, 231 | uint maximum_mass) 232 | { 233 | size_t tid = get_global_id(0); 234 | 235 | if(tid < total_num_bins) 236 | { 237 | 238 | // We adopt a sqrt scale for better visibility of 239 | // low density structures 240 | Scalar relative_mass = sqrt((Scalar)(discretized_masses[tid])) / 241 | sqrt((Scalar)maximum_mass); 242 | 243 | uint rgb = 0; 244 | 245 | uchar color_value = (uchar)(255 * clamp(relative_mass, 0.0f, 1.0f)); 246 | for(int i = 0; i < 4; ++i) 247 | { 248 | rgb <<= 8; 249 | rgb |= (uint)color_value; 250 | } 251 | 252 | discretized_masses[tid] = rgb; 253 | } 254 | } 255 | 256 | ) 257 | ) 258 | 259 | qcl::device_context_ptr _ctx; 260 | 261 | std::size_t _bins_x; 262 | std::size_t _bins_y; 263 | 264 | qcl::device_array _rgb_histogram; 265 | }; 266 | 267 | } 268 | 269 | #endif 270 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | subdirs(construction range_query knn_query) 2 | -------------------------------------------------------------------------------- /tests/construction/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | add_executable(basic basic.cpp) 4 | target_link_libraries (basic ${OpenCL_LIBRARIES}) 5 | -------------------------------------------------------------------------------- /tests/construction/basic.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include 30 | #include 31 | 32 | 33 | #include 34 | #include "../../common/environment.hpp" 35 | #include "../../common/random_vectors.hpp" 36 | 37 | void print_vector(const std::vector& data, std::size_t begin, std::size_t size) 38 | { 39 | std::cout << "["; 40 | for(std::size_t i = begin; i < data.size() && (i < begin+size); ++i) 41 | { 42 | std::cout << "(" 43 | << data[i].s[0] << ", " 44 | << data[i].s[1] << ", " 45 | << data[i].s[2] << ")"; 46 | if(i != data.size() - 1 && i != (begin+size-1)) 47 | std::cout << ","; 48 | std::cout << std::endl; 49 | } 50 | std::cout << "]" << std::endl; 51 | } 52 | 53 | int main(int argc, char** argv) 54 | { 55 | // Setup environment 56 | common::environment env; 57 | qcl::device_context_ptr ctx = env.get_device_context(); 58 | 59 | // Create random particles 60 | std::vector particles; 61 | common::random_vectors rnd; 62 | rnd(128, particles); 63 | 64 | // Construct tree from particles 65 | spatialcl::hilbert_bvh_sp3d_tree<3> gpu_tree{ctx, particles}; 66 | 67 | // Retrieve bounding boxes of the tree 68 | cl::Buffer bbox_min_corners = gpu_tree.get_bbox_min_corners(); 69 | cl::Buffer bbox_max_corners = gpu_tree.get_bbox_max_corners(); 70 | 71 | std::vector host_min_corners(gpu_tree.get_num_nodes()); 72 | std::vector host_max_corners(gpu_tree.get_num_nodes()); 73 | std::vector sorted_particles(particles.size()); 74 | 75 | ctx->memcpy_d2h(host_min_corners.data(), bbox_min_corners, gpu_tree.get_num_nodes()); 76 | ctx->memcpy_d2h(host_max_corners.data(), bbox_max_corners, gpu_tree.get_num_nodes()); 77 | ctx->memcpy_d2h(sorted_particles.data(), gpu_tree.get_sorted_particles(), particles.size()); 78 | 79 | // Print nodes at the different tree levels 80 | 81 | std::cout << "l6min="; 82 | print_vector(host_min_corners,0,64); 83 | std::cout << "l5min="; 84 | print_vector(host_min_corners,64,32); 85 | std::cout << "l4min="; 86 | print_vector(host_min_corners,64+32,16); 87 | std::cout << "l3min="; 88 | print_vector(host_min_corners,64+32+16,8); 89 | std::cout << "l2min="; 90 | print_vector(host_min_corners,64+32+16+8,4); 91 | std::cout << "l1min="; 92 | print_vector(host_min_corners,64+32+16+8+4,2); 93 | std::cout << "l0min="; 94 | print_vector(host_min_corners,64+32+16+8+4+2,1); 95 | 96 | std::cout << "l6max="; 97 | print_vector(host_max_corners,0,64); 98 | std::cout << "l5max="; 99 | print_vector(host_max_corners,64,32); 100 | std::cout << "l4max="; 101 | print_vector(host_max_corners,64+32,16); 102 | std::cout << "l3max="; 103 | print_vector(host_max_corners,64+32+16,8); 104 | std::cout << "l2max="; 105 | print_vector(host_max_corners,64+32+16+8,4); 106 | std::cout << "l1max="; 107 | print_vector(host_max_corners,64+32+16+8+4,2); 108 | std::cout << "l0max="; 109 | print_vector(host_max_corners,64+32+16+8+4+2,1); 110 | 111 | std::cout << "particles="; 112 | print_vector(sorted_particles, 0, sorted_particles.size()); 113 | } 114 | -------------------------------------------------------------------------------- /tests/knn_query/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(knn_query knn_query.cpp) 2 | target_link_libraries (knn_query ${OpenCL_LIBRARIES}) 3 | -------------------------------------------------------------------------------- /tests/knn_query/knn_query.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include 35 | #include 36 | 37 | #include 38 | #include 39 | 40 | #include "../../common/environment.hpp" 41 | #include "../../common/random_vectors.hpp" 42 | #include "../../common/verification_knn.hpp" 43 | 44 | constexpr std::size_t particle_dimension = 3; 45 | 46 | using tree_type = spatialcl::hilbert_bvh_sp3d_tree; 47 | using type_system = tree_type::type_system; 48 | using scalar = type_system::scalar; 49 | 50 | const std::size_t num_particles = 32000; 51 | const std::size_t num_queries = 2000; 52 | constexpr std::size_t K = 8; 53 | 54 | using particle_type = spatialcl::configuration::particle_type; 55 | using vector_type = spatialcl::configuration::vector_type; 56 | constexpr std::size_t dimension = type_system::dimension; 57 | 58 | using strict_dfs_knn_engine = 59 | spatialcl::query::strict_dfs_knn_query_engine; 60 | 61 | using relaxed_dfs_knn_engine = 62 | spatialcl::query::relaxed_dfs_knn_query_engine; 63 | 64 | template 65 | using grouped_dfs_knn_engine = 66 | spatialcl::query::grouped_dfs_knn_query_engine; 67 | 68 | template 69 | std::size_t execute_knn_query_test(const qcl::device_context_ptr& ctx, 70 | const tree_type& tree, 71 | const std::vector& host_queries, 72 | const qcl::device_array& queries, 73 | const std::vector& particles, 74 | qcl::device_array& result) 75 | { 76 | // ToDo: Reset results 77 | 78 | Query_engine query_engine; 79 | typename Query_engine::handler_type query_handler { 80 | queries.get_buffer(), 81 | result.get_buffer(), 82 | queries.size() 83 | }; 84 | 85 | std::cout << "Executing query..." << std::endl; 86 | // Executy query 87 | query_engine(tree, query_handler); 88 | 89 | cl_int err = ctx->get_command_queue().finish(); 90 | qcl::check_cl_error(err, "Error while executing KNN query"); 91 | 92 | // Retrieve results 93 | std::vector host_results; 94 | result.read(host_results); 95 | 96 | std::cout << "Verifying results, please wait..." << std::endl; 97 | // Verify results 98 | common::verification::naive_cpu_knn_verifier verifier{ 99 | host_queries 100 | }; 101 | 102 | return verifier(particles, host_results); 103 | } 104 | 105 | int main(int argc, char* argv[]) 106 | { 107 | common::environment env; 108 | qcl::device_context_ptr ctx = env.get_device_context(); 109 | 110 | std::vector particles; 111 | // Create random 3D positions 112 | common::random_vectors rnd; 113 | rnd(num_particles, particles); 114 | 115 | tree_type gpu_tree{ctx, particles}; 116 | 117 | std::vector query_points; 118 | rnd(num_queries, query_points); 119 | 120 | qcl::device_array queries{ctx, query_points}; 121 | qcl::device_array result{ctx, K * queries.size()}; 122 | 123 | std::size_t num_errors = 0; 124 | 125 | #define RUN_TEST(test_name) \ 126 | num_errors = \ 127 | execute_knn_query_test(ctx, \ 128 | gpu_tree, \ 129 | query_points, \ 130 | queries, \ 131 | particles, \ 132 | result); \ 133 | std::cout << BOOST_PP_STRINGIZE(test_name) <<" completed queries with " \ 134 | << num_errors << " errors." << std::endl 135 | 136 | RUN_TEST(strict_dfs_knn_engine); 137 | RUN_TEST(relaxed_dfs_knn_engine); 138 | RUN_TEST(grouped_dfs_knn_engine<16>); 139 | RUN_TEST(grouped_dfs_knn_engine<32>); 140 | RUN_TEST(grouped_dfs_knn_engine<64>); 141 | 142 | return 0; 143 | } 144 | -------------------------------------------------------------------------------- /tests/range_query/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(range_query range_query.cpp) 2 | target_link_libraries (range_query ${OpenCL_LIBRARIES}) 3 | -------------------------------------------------------------------------------- /tests/range_query/range_query.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of SpatialCL, a library for the spatial processing of 3 | * particles. 4 | * 5 | * Copyright (c) 2017, 2018 Aksel Alpay 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions are met: 10 | * 11 | * 1. Redistributions of source code must retain the above copyright notice, this 12 | * list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include 30 | 31 | #include 32 | 33 | #include 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | #include "../../common/environment.hpp" 40 | #include "../../common/random_vectors.hpp" 41 | #include "../../common/verification_range.hpp" 42 | 43 | constexpr std::size_t particle_dimension = 3; 44 | 45 | const std::size_t num_particles = 32000; 46 | const std::size_t num_queries = 2000; 47 | 48 | constexpr std::size_t max_retrieved_particles = 8; 49 | 50 | using tree_type = spatialcl::hilbert_bvh_sp3d_tree; 51 | using type_system = tree_type::type_system; 52 | using scalar = type_system::scalar; 53 | 54 | constexpr scalar query_diameter = 0.05f; 55 | 56 | // Define queries 57 | using strict_dfs_range_engine = 58 | spatialcl::query::strict_dfs_range_query_engine; 60 | 61 | using relaxed_dfs_range_engine = 62 | spatialcl::query::relaxed_dfs_range_query_engine; 64 | 65 | template 66 | using grouped_dfs_range_engine = 67 | spatialcl::query::grouped_dfs_range_query_engine; 70 | 71 | 72 | using particle_type = spatialcl::configuration::particle_type; 73 | using vector_type = spatialcl::configuration::vector_type; 74 | constexpr std::size_t dimension = type_system::dimension; 75 | 76 | template 77 | std::size_t execute_range_query_test(const qcl::device_context_ptr& ctx, 78 | const tree_type& tree, 79 | const std::vector& host_queries_min, 80 | const std::vector& host_queries_max, 81 | const qcl::device_array& queries_min, 82 | const qcl::device_array& queries_max, 83 | const std::vector& particles, 84 | qcl::device_array& result, 85 | qcl::device_array& num_results) 86 | { 87 | Query_engine query_engine; 88 | 89 | typename Query_engine::handler_type query_handler { 90 | queries_min.get_buffer(), 91 | queries_max.get_buffer(), 92 | result.get_buffer(), 93 | num_results.get_buffer(), 94 | queries_min.size() 95 | }; 96 | 97 | std::cout << "Executing query..." << std::endl; 98 | 99 | query_engine(tree, query_handler); 100 | 101 | cl_int err = ctx->get_command_queue().finish(); 102 | qcl::check_cl_error(err, "Error while executing range query"); 103 | 104 | // Retrieve results 105 | std::vector host_results; 106 | std::vector host_num_results; 107 | result.read(host_results); 108 | num_results.read(host_num_results); 109 | 110 | std::cout << "Verifying results, please wait..." << std::endl; 111 | // Verify results 112 | common::verification::naive_cpu_range_verifier verifier{ 113 | host_queries_min, 114 | host_queries_max, 115 | max_retrieved_particles 116 | }; 117 | 118 | return verifier(particles, host_results, host_num_results); 119 | } 120 | 121 | int main() 122 | { 123 | // Setup particle tree 124 | common::environment env; 125 | qcl::device_context_ptr ctx = env.get_device_context(); 126 | 127 | std::vector particles; 128 | common::random_vectors rnd; 129 | rnd(num_particles, particles); 130 | 131 | tree_type gpu_tree{ctx, particles}; 132 | 133 | // Create random ranges for the queries 134 | std::vector query_points; 135 | rnd(num_queries, query_points); 136 | 137 | std::vector host_ranges_min = query_points; 138 | std::vector host_ranges_max = query_points; 139 | 140 | std::transform(host_ranges_min.begin(), host_ranges_min.end(), 141 | host_ranges_min.begin(), 142 | [&](vector_type current){ 143 | for(std::size_t i = 0; i < dimension; ++i) 144 | current.s[i] -= query_diameter / 2; 145 | return current; 146 | }); 147 | 148 | std::transform(host_ranges_max.begin(), host_ranges_max.end(), 149 | host_ranges_max.begin(), 150 | [&](vector_type current){ 151 | for(std::size_t i = 0; i < dimension; ++i) 152 | current.s[i] += query_diameter / 2; 153 | return current; 154 | }); 155 | 156 | // Create buffers on the compute device 157 | qcl::device_array ranges_min{ctx, host_ranges_min}; 158 | qcl::device_array ranges_max{ctx, host_ranges_max}; 159 | qcl::device_array result_particles{ctx, 160 | num_queries*max_retrieved_particles}; 161 | qcl::device_array result_num_retrieved_particles{ctx, 162 | num_queries}; 163 | 164 | 165 | std::size_t num_errors = 0; 166 | 167 | #define RUN_TEST(test_name) \ 168 | num_errors = \ 169 | execute_range_query_test(ctx, \ 170 | gpu_tree, \ 171 | host_ranges_min, \ 172 | host_ranges_max, \ 173 | ranges_min, \ 174 | ranges_max, \ 175 | particles, \ 176 | result_particles, \ 177 | result_num_retrieved_particles); \ 178 | std::cout << BOOST_PP_STRINGIZE(test_name) <<" completed queries with " \ 179 | << num_errors << " errors." << std::endl 180 | 181 | RUN_TEST(strict_dfs_range_engine); 182 | RUN_TEST(relaxed_dfs_range_engine); 183 | //RUN_TEST(grouped_dfs_range_engine<16>); 184 | RUN_TEST(grouped_dfs_range_engine<32>); 185 | RUN_TEST(grouped_dfs_range_engine<64>); 186 | 187 | return 0; 188 | } 189 | --------------------------------------------------------------------------------