├── glsl ├── cbt_Dispatcher.glsl ├── cbt_SumReduction.glsl ├── cbt_SumReductionPrepass.glsl └── cbt.glsl ├── LICENSE.txt ├── README.md ├── hlsl └── ConcurrentBinaryTree.hlsl └── cbt.h /glsl/cbt_Dispatcher.glsl: -------------------------------------------------------------------------------- 1 | // requires cbt.glsl 2 | uniform int u_CbtID = 0; 3 | layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; 4 | layout(std430, binding = CBT_DISPATCHER_BUFFER_BINDING) 5 | buffer DispatchIndirectCommandBuffer { 6 | uint u_CbtDispatchBuffer[]; 7 | }; 8 | 9 | void main() 10 | { 11 | const int cbtID = u_CbtID; 12 | uint nodeCount = cbt_NodeCount(cbtID); 13 | 14 | u_CbtDispatchBuffer[0] = max(nodeCount >> 8, 1u); 15 | } 16 | -------------------------------------------------------------------------------- /glsl/cbt_SumReduction.glsl: -------------------------------------------------------------------------------- 1 | // requires cbt.glsl 2 | #ifndef CBT_LOCAL_SIZE_X 3 | # define CBT_LOCAL_SIZE_X 256 4 | #endif 5 | uniform int u_CbtID = 0; 6 | uniform int u_PassID; 7 | layout (local_size_x = CBT_LOCAL_SIZE_X, 8 | local_size_y = 1, 9 | local_size_z = 1) in; 10 | 11 | void main(void) 12 | { 13 | const int cbtID = u_CbtID; 14 | uint cnt = (1u << u_PassID); 15 | uint threadID = gl_GlobalInvocationID.x; 16 | 17 | if (threadID < cnt) { 18 | uint nodeID = threadID + cnt; 19 | uint x0 = cbt_HeapRead(cbtID, cbt_CreateNode(nodeID << 1u , u_PassID + 1)); 20 | uint x1 = cbt_HeapRead(cbtID, cbt_CreateNode(nodeID << 1u | 1u, u_PassID + 1)); 21 | 22 | cbt__HeapWrite(cbtID, cbt_CreateNode(nodeID, u_PassID), x0 + x1); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /glsl/cbt_SumReductionPrepass.glsl: -------------------------------------------------------------------------------- 1 | // requires cbt.glsl 2 | #ifndef CBT_LOCAL_SIZE_X 3 | # define CBT_LOCAL_SIZE_X 256 4 | #endif 5 | uniform int u_CbtID = 0; 6 | uniform int u_PassID; 7 | layout (local_size_x = CBT_LOCAL_SIZE_X, 8 | local_size_y = 1, 9 | local_size_z = 1) in; 10 | 11 | void main(void) 12 | { 13 | const int cbtID = u_CbtID; 14 | uint cnt = (1u << u_PassID); 15 | uint threadID = gl_GlobalInvocationID.x << 5; 16 | 17 | if (threadID < cnt) { 18 | uint nodeID = threadID + cnt; 19 | uint alignedBitOffset = cbt__NodeBitID(cbtID, cbt_CreateNode(nodeID, u_PassID)); 20 | uint bitField = u_CbtBuffers[cbtID].heap[alignedBitOffset >> 5u]; 21 | uint bitData = 0u; 22 | 23 | // 2-bits 24 | bitField = (bitField & 0x55555555u) + ((bitField >> 1u) & 0x55555555u); 25 | bitData = bitField; 26 | u_CbtBuffers[cbtID].heap[(alignedBitOffset - cnt) >> 5u] = bitData; 27 | 28 | // 3-bits 29 | bitField = (bitField & 0x33333333u) + ((bitField >> 2u) & 0x33333333u); 30 | bitData = ((bitField >> 0u) & (7u << 0u)) 31 | | ((bitField >> 1u) & (7u << 3u)) 32 | | ((bitField >> 2u) & (7u << 6u)) 33 | | ((bitField >> 3u) & (7u << 9u)) 34 | | ((bitField >> 4u) & (7u << 12u)) 35 | | ((bitField >> 5u) & (7u << 15u)) 36 | | ((bitField >> 6u) & (7u << 18u)) 37 | | ((bitField >> 7u) & (7u << 21u)); 38 | cbt__HeapWriteExplicit(cbtID, cbt_CreateNode(nodeID >> 2u, u_PassID - 2), 24, bitData); 39 | 40 | // 4-bits 41 | bitField = (bitField & 0x0F0F0F0Fu) + ((bitField >> 4u) & 0x0F0F0F0Fu); 42 | bitData = ((bitField >> 0u) & (15u << 0u)) 43 | | ((bitField >> 4u) & (15u << 4u)) 44 | | ((bitField >> 8u) & (15u << 8u)) 45 | | ((bitField >> 12u) & (15u << 12u)); 46 | cbt__HeapWriteExplicit(cbtID, cbt_CreateNode(nodeID >> 3u, u_PassID - 3), 16, bitData); 47 | 48 | // 5-bits 49 | bitField = (bitField & 0x00FF00FFu) + ((bitField >> 8u) & 0x00FF00FFu); 50 | bitData = ((bitField >> 0u) & (31u << 0u)) 51 | | ((bitField >> 11u) & (31u << 5u)); 52 | cbt__HeapWriteExplicit(cbtID, cbt_CreateNode(nodeID >> 4u, u_PassID - 4), 10, bitData); 53 | 54 | // 6-bits 55 | bitField = (bitField & 0x0000FFFFu) + ((bitField >> 16u) & 0x0000FFFFu); 56 | bitData = bitField; 57 | cbt__HeapWriteExplicit(cbtID, cbt_CreateNode(nodeID >> 5u, u_PassID - 5), 6, bitData); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------ 2 | This software is available under 2 licenses -- choose whichever you prefer. 3 | ------------------------------------------------------------------------------ 4 | ALTERNATIVE A - MIT License 5 | Copyright (c) 2019 Jonathan Dupuy 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | of the Software, and to permit persons to whom the Software is furnished to do 11 | so, subject to the following conditions: 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | SOFTWARE. 21 | ------------------------------------------------------------------------------ 22 | ALTERNATIVE B - Public Domain (www.unlicense.org) 23 | This is free and unencumbered software released into the public domain. 24 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 25 | software, either in source code form or as a compiled binary, for any purpose, 26 | commercial or non-commercial, and by any means. 27 | In jurisdictions that recognize copyright laws, the author or authors of this 28 | software dedicate any and all copyright interest in the software to the public 29 | domain. We make this dedication for the benefit of the public at large and to 30 | the detriment of our heirs and successors. We intend this dedication to be an 31 | overt act of relinquishment in perpetuity of all present and future rights to 32 | this software under copyright law. 33 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 36 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 37 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 38 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 39 | ------------------------------------------------------------------------------ 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Concurrent Binary Tree Library 2 | 3 | This library provides a concurrent binary tree data-structure suitable for accelerating the processing of subdivision algorithms on multicore processors, including GPUs. More details are available in my paper ["Concurrent Binary Trees (with application to Longest Edge Bisection)"](https://onrendering.com/). 4 | 5 | ### Usage 6 | 7 | **Initialization** 8 | A CBT requires a maximum depth (typically the maximum depth of the subdivision algorithm you're interested in accelerating). 9 | ```c 10 | cbt_Tree *cbt = cbt_Create(myMaximumDepth); 11 | ``` 12 | Once this depth chosen, it can not be changed throughout the lifetime of the CBT instance. You can query the maximum depth of the CBT as follows: 13 | ```c 14 | maximumDepth = cbt_MaxDepth(cbt); 15 | ``` 16 | By default, the CBT will be initialized at the root node. You can also initialize it at an explicit subdivision depth as follows: 17 | ```c 18 | cbt_Tree *cbt = cbt_CreateAtDepth(myMaximumDepth, myInitializationDepth); 19 | ``` 20 | Note that the initialization depth must be less or equal to the maximum depth of the CBT. 21 | Always remember to release the meomory once you're done with your CBT: 22 | ```c 23 | cbt_Release(cbt); 24 | ``` 25 | 26 | **Resetting the tree** 27 | Additionally, you can reset the subdivision by using any of the following routines: 28 | ```c 29 | cbt_ResetToRoot(cbt); // resets the CBT to its root 30 | cbt_ResetToCeil(cbt); // resets the CBT to its maximum depth 31 | cbt_ResetToDepth(cbt, myInitializationDepth); // resets the CBT to a custom depth 32 | ``` 33 | 34 | **Updating the tree in parallel** 35 | The main advantage of CBTs is their ability to update their topology in parallel. Nodes can be split or merged using respectively `cbt_SplitNode(cbt, node)` and `cbt_MergeNode(cbt, node)`. In order to process the operations in parallel, you can provide a custom callback that will be executed in parallel within an OpenMP parallel for loop. Here is a simple example that splits or merges nodes if their index is even: 36 | ```c 37 | // update callback 38 | void UpdateCallback(cbt_Tree *cbt, const cbt_Node node, const void *userData) 39 | { 40 | if ((node.id & 1) == 0) { 41 | #ifdef SPLIT 42 | cbt_SplitNode(cbt, node); 43 | #else 44 | cbt_MergeNode(cbt, node); 45 | #endif 46 | } 47 | } 48 | 49 | // execute the update callback in parallel 50 | cbt_Update(cbt, &UpdateCallback, NULL); 51 | ``` 52 | For a more complex example, see [this repo](https://github.com/jdupuy/LongestEdgeBisection2D). 53 | 54 | **Queries** 55 | You can query the number of leaf nodes in the CBT using 56 | ```c 57 | int64_t nodeCount = cbt_NodeCount(cbt); 58 | ``` 59 | You can retrieve the i-th leaf node using 60 | ```c 61 | cbt_Node node = cbt_DecodeNode(cbt, i); 62 | ``` 63 | Conversely, you can retrieve the index of an existing leaf node using 64 | ```c 65 | int64_t nodeID = cbt_EncodeNode(cbt, node); 66 | ``` 67 | 68 | 69 | **Serialization** 70 | Internally, the CBT uses a compact binary heap data-structure, i.e., a 1D array. This makes the CBT trivial to serialize. To access the heap, use 71 | ```c 72 | int64_t cbtByteSize = cbt_HeapByteSize(cbt); // size in Bytes of the CBT 73 | char *cbtMemory = cbt_GetHeap(cbt); // CBT raw-data 74 | ``` 75 | 76 | 77 | **GPU implementation** 78 | The GLSL folder provides a GLSL implementation of the library. An HLSL port of the library would also be welcome. 79 | For a GPU implementation example, see [this repo](https://github.com/jdupuy/LongestEdgeBisection2D). 80 | 81 | 82 | ### License 83 | 84 | The code from this repository is released in public domain. You can do anything you want with them. You have no legal obligation to do anything else, although I appreciate attribution. 85 | 86 | It is also licensed under the MIT open source license, if you have lawyers who are unhappy with public domain. 87 | 88 | -------------------------------------------------------------------------------- /hlsl/ConcurrentBinaryTree.hlsl: -------------------------------------------------------------------------------- 1 | /* public domain library for building binary trees in parallel 2 | by Jonathan Dupuy 3 | */ 4 | 5 | #ifndef CBT_HEAP_BUFFER_BINDING 6 | #ifdef CBT_FLAG_WRITE 7 | #define CBT_HEAP_BUFFER_BINDING register(u0) 8 | #else 9 | #define CBT_HEAP_BUFFER_BINDING register(t0) 10 | #endif 11 | #endif 12 | 13 | #ifdef CBT_FLAG_WRITE 14 | RWStructuredBuffer u_CbtBuffer : CBT_HEAP_BUFFER_BINDING; 15 | #else 16 | StructuredBuffer u_CbtBuffer : CBT_HEAP_BUFFER_BINDING; 17 | #endif 18 | 19 | // data structures 20 | struct cbt_Node { 21 | uint id; // heapID 22 | int depth; // findMSB(heapID) := node depth 23 | }; 24 | 25 | // manipulation 26 | #ifdef CBT_FLAG_WRITE 27 | void cbt_SplitNode_Fast(in const cbt_Node node); 28 | void cbt_SplitNode(in const cbt_Node node); 29 | void cbt_MergeNode_Fast(in const cbt_Node node); 30 | void cbt_MergeNode(in const cbt_Node node); 31 | #endif 32 | 33 | // O(1) queries 34 | int cbt_MaxDepth(); 35 | uint cbt_NodeCount(); 36 | uint cbt_HeapRead(in const cbt_Node node); 37 | bool cbt_IsLeafNode(in const cbt_Node node); 38 | bool cbt_IsCeilNode(in const cbt_Node node); 39 | bool cbt_IsRootNode(in const cbt_Node node); 40 | bool cbt_IsNullNode(in const cbt_Node node); 41 | 42 | // O(depth) queries 43 | uint cbt_EncodeNode(in const cbt_Node node); 44 | cbt_Node cbt_DecodeNode(uint nodeID); 45 | 46 | // node constructors 47 | cbt_Node cbt_CreateNode(uint id); 48 | cbt_Node cbt_CreateNode(uint id, int depth); 49 | cbt_Node cbt_ParentNode_Fast(in const cbt_Node node); 50 | cbt_Node cbt_ParentNode (in const cbt_Node node); 51 | cbt_Node cbt_SiblingNode_Fast(in const cbt_Node node); 52 | cbt_Node cbt_SiblingNode (in const cbt_Node node); 53 | cbt_Node cbt_LeftSiblingNode_Fast(in const cbt_Node node); 54 | cbt_Node cbt_LeftSiblingNode (in const cbt_Node node); 55 | cbt_Node cbt_RightSiblingNode_Fast(in const cbt_Node node); 56 | cbt_Node cbt_RightSiblingNode (in const cbt_Node node); 57 | cbt_Node cbt_LeftChildNode_Fast(in const cbt_Node node); 58 | cbt_Node cbt_LeftChildNode (in const cbt_Node node); 59 | cbt_Node cbt_RightChildNode_Fast(in const cbt_Node node); 60 | cbt_Node cbt_RightChildNode (in const cbt_Node node); 61 | 62 | // ----------------------------------------------------------------------------- 63 | // ----------------------------------------------------------------------------- 64 | // ----------------------------------------------------------------------------- 65 | 66 | /******************************************************************************* 67 | * GetBitValue -- Returns the value of a bit stored in a 32-bit word 68 | * 69 | */ 70 | uint cbt__GetBitValue(uint bitField, uint bitID) 71 | { 72 | return ((bitField >> bitID) & 1u); 73 | } 74 | 75 | 76 | /******************************************************************************* 77 | * SetBitValue -- Sets the value of a bit stored in a 32-bit word 78 | * 79 | */ 80 | #ifdef CBT_FLAG_WRITE 81 | void cbt__SetBitValue(uint bufferID, uint bitID, uint bitValue) 82 | { 83 | const uint bitMask = ~(1u << bitID); 84 | 85 | InterlockedAnd(u_CbtBuffer[bufferID], bitMask); 86 | InterlockedOr(u_CbtBuffer[bufferID], bitValue << bitID); 87 | } 88 | 89 | 90 | /******************************************************************************* 91 | * BitFieldInsert -- Returns the bit field after insertion of some bit data in range 92 | * [bitOffset, bitOffset + bitCount - 1] 93 | * 94 | */ 95 | void 96 | cbt__BitFieldInsert(uint bufferID, uint bitOffset, uint bitCount, uint bitData) 97 | { 98 | uint bitMask = ~(~(0xFFFFFFFFu << bitCount) << bitOffset); 99 | 100 | InterlockedAnd(u_CbtBuffer[bufferID], bitMask); 101 | InterlockedOr(u_CbtBuffer[bufferID], bitData << bitOffset); 102 | } 103 | #endif // CBT_FLAG_WRITE 104 | 105 | 106 | /******************************************************************************* 107 | * BitFieldExtract -- Extracts bits [bitOffset, bitOffset + bitCount - 1] from 108 | * a bit field, returning them in the least significant bits of the result. 109 | * 110 | */ 111 | uint cbt__BitFieldExtract(uint bitField, uint bitOffset, uint bitCount) 112 | { 113 | uint bitMask = ~(0xFFFFFFFFu << bitCount); 114 | 115 | return (bitField >> bitOffset) & bitMask; 116 | } 117 | 118 | 119 | /******************************************************************************* 120 | * IsCeilNode -- Checks if a node is a ceil node, i.e., that can not split further 121 | * 122 | */ 123 | bool cbt_IsCeilNode(in const cbt_Node node) 124 | { 125 | return (node.depth == cbt_MaxDepth()); 126 | } 127 | 128 | 129 | /******************************************************************************* 130 | * IsRootNode -- Checks if a node is a root node 131 | * 132 | */ 133 | bool cbt_IsRootNode(in const cbt_Node node) 134 | { 135 | return (node.id == 1u); 136 | } 137 | 138 | 139 | /******************************************************************************* 140 | * IsNullNode -- Checks if a node is a null node 141 | * 142 | */ 143 | bool cbt_IsNullNode(in const cbt_Node node) 144 | { 145 | return (node.id == 0u); 146 | } 147 | 148 | 149 | /******************************************************************************* 150 | * CreateNode -- Constructor for the Node data structure 151 | * 152 | */ 153 | cbt_Node cbt_CreateNode(uint id) 154 | { 155 | return cbt_CreateNode(id, firstbithigh(id)); 156 | } 157 | 158 | cbt_Node cbt_CreateNode(uint id, int depth) 159 | { 160 | cbt_Node node; 161 | 162 | node.id = id; 163 | node.depth = depth; 164 | 165 | return node; 166 | } 167 | 168 | 169 | /******************************************************************************* 170 | * ParentNode -- Computes the parent of the input node 171 | * 172 | */ 173 | cbt_Node cbt_ParentNode_Fast(in const cbt_Node node) 174 | { 175 | return cbt_CreateNode(node.id >> 1, node.depth - 1); 176 | } 177 | 178 | cbt_Node cbt_ParentNode(in const cbt_Node node) 179 | { 180 | if (cbt_IsNullNode(node)) 181 | return node; 182 | else 183 | return cbt_ParentNode_Fast(node); 184 | } 185 | 186 | 187 | /******************************************************************************* 188 | * CeilNode -- Returns the associated ceil node, i.e., the deepest possible leaf 189 | * 190 | */ 191 | cbt_Node cbt__CeilNode_Fast(in const cbt_Node node) 192 | { 193 | int maxDepth = cbt_MaxDepth(); 194 | return cbt_CreateNode(node.id << (maxDepth - node.depth), maxDepth); 195 | } 196 | 197 | cbt_Node cbt__CeilNode(in const cbt_Node node) 198 | { 199 | if (cbt_IsNullNode(node)) 200 | return node; 201 | else 202 | return cbt__CeilNode_Fast(node); 203 | } 204 | 205 | 206 | /******************************************************************************* 207 | * SiblingNode -- Computes the sibling of the input node 208 | * 209 | */ 210 | cbt_Node cbt_SiblingNode_Fast(in const cbt_Node node) 211 | { 212 | return cbt_CreateNode(node.id ^ 1u, node.depth); 213 | } 214 | 215 | cbt_Node cbt_SiblingNode(in const cbt_Node node) 216 | { 217 | if (cbt_IsNullNode(node)) 218 | return node; 219 | else 220 | return cbt_SiblingNode_Fast(node); 221 | } 222 | 223 | 224 | /******************************************************************************* 225 | * RightSiblingNode -- Computes the right sibling of the input node 226 | * 227 | */ 228 | cbt_Node cbt_RightSiblingNode_Fast(in const cbt_Node node) 229 | { 230 | return cbt_CreateNode(node.id | 1u, node.depth); 231 | } 232 | 233 | cbt_Node cbt_RightSiblingNode(in const cbt_Node node) 234 | { 235 | if (cbt_IsNullNode(node)) 236 | return node; 237 | else 238 | return cbt_RightSiblingNode_Fast(node); 239 | } 240 | 241 | 242 | /******************************************************************************* 243 | * LeftSiblingNode -- Computes the left sibling of the input node 244 | * 245 | */ 246 | cbt_Node cbt_LeftSiblingNode_Fast(in const cbt_Node node) 247 | { 248 | return cbt_CreateNode(node.id & (~1u), node.depth); 249 | } 250 | 251 | cbt_Node cbt_LeftSiblingNode(in const cbt_Node node) 252 | { 253 | if (cbt_IsNullNode(node)) 254 | return node; 255 | else 256 | return cbt_LeftSiblingNode_Fast(node); 257 | } 258 | 259 | 260 | /******************************************************************************* 261 | * RightChildNode -- Computes the right child of the input node 262 | * 263 | */ 264 | cbt_Node cbt_RightChildNode_Fast(in const cbt_Node node) 265 | { 266 | return cbt_CreateNode((node.id << 1) | 1u, node.depth + 1); 267 | } 268 | 269 | cbt_Node cbt_RightChildNode(in const cbt_Node node) 270 | { 271 | if (cbt_IsNullNode(node)) 272 | return node; 273 | else 274 | return cbt_RightChildNode_Fast(node); 275 | } 276 | 277 | 278 | /******************************************************************************* 279 | * LeftChildNode -- Computes the left child of the input node 280 | * 281 | */ 282 | cbt_Node cbt_LeftChildNode_Fast(in const cbt_Node node) 283 | { 284 | return cbt_CreateNode(node.id << 1, node.depth + 1); 285 | } 286 | 287 | cbt_Node cbt_LeftChildNode(in const cbt_Node node) 288 | { 289 | if (cbt_IsNullNode(node)) 290 | return node; 291 | else 292 | return cbt_LeftChildNode_Fast(node); 293 | } 294 | 295 | 296 | /******************************************************************************* 297 | * HeapByteSize -- Computes the number of Bytes to allocate for the bitfield 298 | * 299 | * For a tree of max depth D, the number of Bytes is 2^(D-1). 300 | * Note that 2 bits are "wasted" in the sense that they only serve 301 | * to round the required number of bytes to a power of two. 302 | * 303 | */ 304 | uint cbt__HeapByteSize(uint maxDepth) 305 | { 306 | return 1u << (maxDepth - 1); 307 | } 308 | 309 | 310 | /******************************************************************************* 311 | * HeapUint32Size -- Computes the number of uints to allocate for the bitfield 312 | * 313 | */ 314 | uint cbt__HeapUint32Size(uint maxDepth) 315 | { 316 | return cbt__HeapByteSize(maxDepth) >> 2; 317 | } 318 | 319 | 320 | /******************************************************************************* 321 | * NodeBitID -- Returns the bit index that stores data associated with a given node 322 | * 323 | * For a LEB of max depth D and given an index in [0, 2^(D+1) - 1], this 324 | * functions is used to emulate the behaviour of a lookup in an array, i.e., 325 | * uint32_t[nodeID]. It provides the first bit in memory that stores 326 | * information associated with the element of index nodeID. 327 | * 328 | * For data located at level d, the bit offset is 2^d x (3 - d + D) 329 | * We then offset this quantity by the index by (nodeID - 2^d) x (D + 1 - d) 330 | * Note that the null index (nodeID = 0) is also supported. 331 | * 332 | */ 333 | uint cbt__NodeBitID(in const cbt_Node node) 334 | { 335 | uint tmp1 = 2u << node.depth; 336 | uint tmp2 = uint(1 + cbt_MaxDepth() - node.depth); 337 | 338 | return tmp1 + node.id * tmp2; 339 | } 340 | 341 | 342 | /******************************************************************************* 343 | * NodeBitID_BitField -- Computes the bitfield bit location associated to a node 344 | * 345 | * Here, the node is converted into a final node and its bit offset is 346 | * returned, which is finalNodeID + 2^{D + 1} 347 | */ 348 | uint cbt__NodeBitID_BitField(in const cbt_Node node) 349 | { 350 | return cbt__NodeBitID(cbt__CeilNode(node)); 351 | } 352 | 353 | 354 | /******************************************************************************* 355 | * DataBitSize -- Returns the number of bits associated with a given node 356 | * 357 | */ 358 | int cbt__NodeBitSize(in const cbt_Node node) 359 | { 360 | return cbt_MaxDepth() - node.depth + 1; 361 | } 362 | 363 | 364 | /******************************************************************************* 365 | * HeapArgs 366 | * 367 | * The LEB heap data structure uses an array of 32-bit words to store its data. 368 | * Whenever we need to access a certain bit range, we need to query two such 369 | * words (because sometimes the requested bit range overlaps two 32-bit words). 370 | * The HeapArg data structure provides arguments for reading from and/or 371 | * writing to the two 32-bit words that bound the queries range. 372 | * 373 | */ 374 | struct cbt__HeapArgs { 375 | uint heapIndexLSB, heapIndexMSB; 376 | uint bitOffsetLSB; 377 | uint bitCountLSB, bitCountMSB; 378 | }; 379 | 380 | cbt__HeapArgs cbt__CreateHeapArgs(in const cbt_Node node, int bitCount) 381 | { 382 | uint alignedBitOffset = cbt__NodeBitID(node); 383 | uint maxHeapIndex = cbt__HeapUint32Size(cbt_MaxDepth()) - 1u; 384 | uint heapIndexLSB = (alignedBitOffset >> 5u); 385 | uint heapIndexMSB = min(heapIndexLSB + 1, maxHeapIndex); 386 | cbt__HeapArgs args; 387 | 388 | args.bitOffsetLSB = alignedBitOffset & 31u; 389 | args.bitCountLSB = min(32u - args.bitOffsetLSB, bitCount); 390 | args.bitCountMSB = bitCount - args.bitCountLSB; 391 | args.heapIndexLSB = heapIndexLSB; 392 | args.heapIndexMSB = heapIndexMSB; 393 | 394 | return args; 395 | } 396 | 397 | 398 | /******************************************************************************* 399 | * HeapWrite -- Sets bitCount bits located at nodeID to bitData 400 | * 401 | * Note that this procedure writes to at most two uint32 elements. 402 | * Two elements are relevant whenever the specified interval overflows 32-bit 403 | * words. 404 | * 405 | */ 406 | #ifdef CBT_FLAG_WRITE 407 | void cbt__HeapWriteExplicit(in const cbt_Node node, int bitCount, uint bitData) 408 | { 409 | cbt__HeapArgs args = cbt__CreateHeapArgs(node, bitCount); 410 | 411 | cbt__BitFieldInsert(args.heapIndexLSB, 412 | args.bitOffsetLSB, 413 | args.bitCountLSB, 414 | bitData); 415 | cbt__BitFieldInsert(args.heapIndexMSB, 416 | 0u, 417 | args.bitCountMSB, 418 | bitData >> args.bitCountLSB); 419 | } 420 | 421 | void cbt__HeapWrite(in const cbt_Node node, uint bitData) 422 | { 423 | cbt__HeapWriteExplicit(node, cbt__NodeBitSize(node), bitData); 424 | } 425 | #endif // CBT_FLAG_WRITE 426 | 427 | 428 | /******************************************************************************* 429 | * HeapRead -- Returns bitCount bits located at nodeID 430 | * 431 | * Note that this procedure writes to at most two uint32 elements. 432 | * Two elements are relevant whenever the specified interval overflows 32-bit 433 | * words. 434 | * 435 | */ 436 | uint cbt__HeapReadExplicit(in const cbt_Node node, int bitCount) 437 | { 438 | cbt__HeapArgs args = cbt__CreateHeapArgs(node, bitCount); 439 | uint lsb = cbt__BitFieldExtract(u_CbtBuffer[args.heapIndexLSB], 440 | args.bitOffsetLSB, 441 | args.bitCountLSB); 442 | uint msb = cbt__BitFieldExtract(u_CbtBuffer[args.heapIndexMSB], 443 | 0u, 444 | args.bitCountMSB); 445 | 446 | return (lsb | (msb << args.bitCountLSB)); 447 | } 448 | 449 | uint cbt_HeapRead(in const cbt_Node node) 450 | { 451 | return cbt__HeapReadExplicit(node, cbt__NodeBitSize(node)); 452 | } 453 | 454 | 455 | /******************************************************************************* 456 | * HeapWrite_BitField -- Sets the bit associated to a leaf node to bitValue 457 | * 458 | * This is a dedicated routine to write directly to the bitfield. 459 | * 460 | */ 461 | #ifdef CBT_FLAG_WRITE 462 | void cbt__HeapWrite_BitField(in const cbt_Node node, uint bitValue) 463 | { 464 | uint bitID = cbt__NodeBitID_BitField(node); 465 | 466 | cbt__SetBitValue(bitID >> 5u, bitID & 31u, bitValue); 467 | } 468 | #endif // CBT_FLAG_WRITE 469 | 470 | 471 | /******************************************************************************* 472 | * HeapRead_BitField -- Returns the value of the bit associated to a leaf node 473 | * 474 | * This is a dedicated routine to read directly from the bitfield. 475 | * 476 | */ 477 | uint cbt__HeapRead_BitField(in const cbt_Node node) 478 | { 479 | uint bitID = cbt__NodeBitID_BitField(node); 480 | 481 | return cbt__GetBitValue(u_CbtBuffer[bitID >> 5u], bitID & 31u); 482 | } 483 | 484 | 485 | /******************************************************************************* 486 | * IsLeafNode -- Checks if a node is a leaf node 487 | * 488 | */ 489 | bool cbt_IsLeafNode(in const cbt_Node node) 490 | { 491 | return (cbt_HeapRead(node) == 1u); 492 | } 493 | 494 | 495 | /******************************************************************************* 496 | * Split -- Subdivides a node in two 497 | * 498 | */ 499 | #ifdef CBT_FLAG_WRITE 500 | void cbt_SplitNode_Fast(in const cbt_Node node) 501 | { 502 | cbt__HeapWrite_BitField(cbt_RightChildNode(node), 1u); 503 | } 504 | void cbt_SplitNode(in const cbt_Node node) 505 | { 506 | if (!cbt_IsCeilNode(node)) 507 | cbt_SplitNode_Fast(node); 508 | } 509 | 510 | 511 | /******************************************************************************* 512 | * Merge -- Merges the node with its neighbour 513 | * 514 | */ 515 | void cbt_MergeNode_Fast(in const cbt_Node node) 516 | { 517 | cbt__HeapWrite_BitField(cbt_RightSiblingNode(node), 0u); 518 | } 519 | void cbt_MergeNode(in const cbt_Node node) 520 | { 521 | if (!cbt_IsRootNode(node)) 522 | cbt_MergeNode_Fast(node); 523 | } 524 | #endif // CBT_FLAG_WRITE 525 | 526 | 527 | /******************************************************************************* 528 | * MaxDepth -- Returns the maximum depth 529 | * 530 | */ 531 | int cbt_MaxDepth() 532 | { 533 | return firstbitlow(u_CbtBuffer[0]); 534 | } 535 | 536 | 537 | /******************************************************************************* 538 | * NodeCount -- Returns the number of triangles in the LEB 539 | * 540 | */ 541 | uint cbt_NodeCount() 542 | { 543 | return cbt_HeapRead(cbt_CreateNode(1u, 0)); 544 | } 545 | 546 | 547 | /******************************************************************************* 548 | * Decode the LEB Node associated to an index 549 | * 550 | */ 551 | cbt_Node cbt_DecodeNode(uint nodeID) 552 | { 553 | cbt_Node node = cbt_CreateNode(1u, 0); 554 | 555 | while (cbt_HeapRead(node) > 1u) { 556 | cbt_Node leftChild = cbt_LeftChildNode_Fast(node); 557 | uint cmp = cbt_HeapRead(leftChild); 558 | uint b = nodeID < cmp ? 0u : 1u; 559 | 560 | node = leftChild; 561 | node.id |= b; 562 | nodeID -= cmp * b; 563 | } 564 | 565 | return node; 566 | } 567 | 568 | 569 | /******************************************************************************* 570 | * EncodeNode -- Returns the bit index associated with the Node 571 | * 572 | * This does the inverse of the DecodeNode routine. 573 | * 574 | */ 575 | uint cbt_EncodeNode(in const cbt_Node node) 576 | { 577 | uint nodeID = 0u; 578 | cbt_Node nodeIterator = node; 579 | 580 | while (nodeIterator.id > 1u) { 581 | cbt_Node sibling = cbt_LeftSiblingNode_Fast(nodeIterator); 582 | uint nodeCount = cbt_HeapRead(sibling); 583 | 584 | nodeID += (nodeIterator.id & 1u) * nodeCount; 585 | nodeIterator = cbt_ParentNode(nodeIterator); 586 | } 587 | 588 | return nodeID; 589 | } 590 | -------------------------------------------------------------------------------- /glsl/cbt.glsl: -------------------------------------------------------------------------------- 1 | /* cbt.glsl - public domain library for building binary trees in parallel (GLSL port) 2 | by Jonathan Dupuy 3 | 4 | */ 5 | 6 | // buffer binding (allows for simultaneous use of multiple CBTs) 7 | #ifndef CBT_HEAP_BUFFER_BINDING 8 | # error User must specify the binding of the CBT heap buffer 9 | #endif 10 | #ifndef CBT_HEAP_BUFFER_COUNT 11 | # define CBT_HEAP_BUFFER_COUNT 1 12 | #endif 13 | layout(std430, binding = CBT_HEAP_BUFFER_BINDING) 14 | #ifndef CBT_READ_ONLY 15 | buffer cbt_Buffer { 16 | #else 17 | readonly buffer cbt_Buffer { 18 | #endif 19 | uint heap[]; 20 | } u_CbtBuffers[CBT_HEAP_BUFFER_COUNT]; 21 | 22 | // data structures 23 | struct cbt_Node { 24 | uint id; // heapID 25 | int depth; // findMSB(heapID) := node depth 26 | }; 27 | 28 | // manipulation 29 | void cbt_SplitNode_Fast(const int cbtID, in const cbt_Node node); 30 | void cbt_SplitNode (const int cbtID, in const cbt_Node node); 31 | void cbt_MergeNode_Fast(const int cbtID, in const cbt_Node node); 32 | void cbt_MergeNode (const int cbtID, in const cbt_Node node); 33 | 34 | // O(1) queries 35 | uint cbt_HeapRead(const int cbtID, in const cbt_Node node); 36 | int cbt_MaxDepth(const int cbtID); 37 | uint cbt_NodeCount(const int cbtID); 38 | bool cbt_IsLeafNode(const int cbtID, in const cbt_Node node); 39 | bool cbt_IsCeilNode(const int cbtID, in const cbt_Node node); 40 | bool cbt_IsRootNode( in const cbt_Node node); 41 | bool cbt_IsNullNode( in const cbt_Node node); 42 | 43 | // O(depth) queries 44 | uint cbt_EncodeNode(const int cbtID, in const cbt_Node node); 45 | cbt_Node cbt_DecodeNode(const int cbtID, uint nodeID); 46 | 47 | // node constructors 48 | cbt_Node cbt_CreateNode (uint id); 49 | cbt_Node cbt_CreateNode (uint id, int depth); 50 | cbt_Node cbt_ParentNode (const cbt_Node node); 51 | cbt_Node cbt_ParentNode_Fast (const cbt_Node node); 52 | cbt_Node cbt_SiblingNode (const cbt_Node node); 53 | cbt_Node cbt_SiblingNode_Fast (const cbt_Node node); 54 | cbt_Node cbt_LeftSiblingNode (const cbt_Node node); 55 | cbt_Node cbt_LeftSiblingNode_Fast (const cbt_Node node); 56 | cbt_Node cbt_RightSiblingNode (const cbt_Node node); 57 | cbt_Node cbt_RightSiblingNode_Fast(const cbt_Node node); 58 | cbt_Node cbt_LeftChildNode (const cbt_Node node); 59 | cbt_Node cbt_LeftChildNode_Fast (const cbt_Node node); 60 | cbt_Node cbt_RightChildNode (const cbt_Node node); 61 | cbt_Node cbt_RightChildNode_Fast (const cbt_Node node); 62 | 63 | // ----------------------------------------------------------------------------- 64 | // ----------------------------------------------------------------------------- 65 | // ----------------------------------------------------------------------------- 66 | 67 | /******************************************************************************* 68 | * GetBitValue -- Returns the value of a bit stored in a 32-bit word 69 | * 70 | */ 71 | uint cbt__GetBitValue(uint bitField, uint bitID) 72 | { 73 | return ((bitField >> bitID) & 1u); 74 | } 75 | 76 | 77 | /******************************************************************************* 78 | * SetBitValue -- Sets the value of a bit stored in a 32-bit word 79 | * 80 | */ 81 | void 82 | cbt__SetBitValue(const int cbtID, uint bufferID, uint bitID, uint bitValue) 83 | { 84 | const uint bitMask = ~(1u << bitID); 85 | 86 | atomicAnd(u_CbtBuffers[cbtID].heap[bufferID], bitMask); 87 | atomicOr(u_CbtBuffers[cbtID].heap[bufferID], bitValue << bitID); 88 | } 89 | 90 | 91 | /******************************************************************************* 92 | * BitFieldInsert -- Returns the bit field after insertion of some bit data in range 93 | * [bitOffset, bitOffset + bitCount - 1] 94 | * 95 | */ 96 | void 97 | cbt__BitFieldInsert( 98 | const int cbtID, 99 | uint bufferID, 100 | uint bitOffset, 101 | uint bitCount, 102 | uint bitData 103 | ) { 104 | uint bitMask = ~(~(0xFFFFFFFFu << bitCount) << bitOffset); 105 | 106 | atomicAnd(u_CbtBuffers[cbtID].heap[bufferID], bitMask); 107 | atomicOr(u_CbtBuffers[cbtID].heap[bufferID], bitData << bitOffset); 108 | } 109 | 110 | 111 | /******************************************************************************* 112 | * BitFieldExtract -- Extracts bits [bitOffset, bitOffset + bitCount - 1] from 113 | * a bit field, returning them in the least significant bits of the result. 114 | * 115 | */ 116 | uint cbt__BitFieldExtract(uint bitField, uint bitOffset, uint bitCount) 117 | { 118 | uint bitMask = ~(0xFFFFFFFFu << bitCount); 119 | 120 | return (bitField >> bitOffset) & bitMask; 121 | } 122 | 123 | 124 | /******************************************************************************* 125 | * IsCeilNode -- Checks if a node is a ceil node, i.e., that can not split further 126 | * 127 | */ 128 | bool cbt_IsCeilNode(const int cbtID, in const cbt_Node node) 129 | { 130 | return (node.depth == cbt_MaxDepth(cbtID)); 131 | } 132 | 133 | 134 | /******************************************************************************* 135 | * IsRootNode -- Checks if a node is a root node 136 | * 137 | */ 138 | bool cbt_IsRootNode(in const cbt_Node node) 139 | { 140 | return (node.id == 1u); 141 | } 142 | 143 | 144 | /******************************************************************************* 145 | * IsNullNode -- Checks if a node is a null node 146 | * 147 | */ 148 | bool cbt_IsNullNode(in const cbt_Node node) 149 | { 150 | return (node.id == 0u); 151 | } 152 | 153 | 154 | /******************************************************************************* 155 | * CreateNode -- Constructor for the Node data structure 156 | * 157 | */ 158 | cbt_Node cbt_CreateNode(uint id) 159 | { 160 | return cbt_CreateNode(id, findMSB(id)); 161 | } 162 | 163 | cbt_Node cbt_CreateNode(uint id, int depth) 164 | { 165 | cbt_Node node; 166 | 167 | node.id = id; 168 | node.depth = depth; 169 | 170 | return node; 171 | } 172 | 173 | 174 | /******************************************************************************* 175 | * ParentNode -- Computes the parent of the input node 176 | * 177 | */ 178 | cbt_Node cbt_ParentNode_Fast(in const cbt_Node node) 179 | { 180 | return cbt_CreateNode(node.id >> 1, node.depth - 1); 181 | } 182 | 183 | cbt_Node cbt_ParentNode(in const cbt_Node node) 184 | { 185 | return cbt_IsNullNode(node) ? node : cbt_ParentNode_Fast(node); 186 | } 187 | 188 | 189 | /******************************************************************************* 190 | * CeilNode -- Returns the associated ceil node, i.e., the deepest possible leaf 191 | * 192 | */ 193 | cbt_Node cbt__CeilNode_Fast(const int cbtID, in const cbt_Node node) 194 | { 195 | int maxDepth = cbt_MaxDepth(cbtID); 196 | return cbt_CreateNode(node.id << (maxDepth - node.depth), maxDepth); 197 | } 198 | 199 | cbt_Node cbt__CeilNode(const int cbtID, in const cbt_Node node) 200 | { 201 | return cbt_IsNullNode(node) ? node : cbt__CeilNode_Fast(cbtID, node); 202 | } 203 | 204 | 205 | /******************************************************************************* 206 | * SiblingNode -- Computes the sibling of the input node 207 | * 208 | */ 209 | cbt_Node cbt_SiblingNode_Fast(in const cbt_Node node) 210 | { 211 | return cbt_CreateNode(node.id ^ 1u, node.depth); 212 | } 213 | 214 | cbt_Node cbt_SiblingNode(in const cbt_Node node) 215 | { 216 | return cbt_IsNullNode(node) ? node : cbt_SiblingNode_Fast(node); 217 | } 218 | 219 | 220 | /******************************************************************************* 221 | * RightSiblingNode -- Computes the right sibling of the input node 222 | * 223 | */ 224 | cbt_Node cbt_RightSiblingNode_Fast(in const cbt_Node node) 225 | { 226 | return cbt_CreateNode(node.id | 1u, node.depth); 227 | } 228 | 229 | cbt_Node cbt_RightSiblingNode(in const cbt_Node node) 230 | { 231 | return cbt_IsNullNode(node) ? node : cbt_RightSiblingNode_Fast(node); 232 | } 233 | 234 | 235 | /******************************************************************************* 236 | * LeftSiblingNode -- Computes the left sibling of the input node 237 | * 238 | */ 239 | cbt_Node cbt_LeftSiblingNode_Fast(in const cbt_Node node) 240 | { 241 | return cbt_CreateNode(node.id & (~1u), node.depth); 242 | } 243 | 244 | cbt_Node cbt_LeftSiblingNode(in const cbt_Node node) 245 | { 246 | return cbt_IsNullNode(node) ? node : cbt_LeftSiblingNode_Fast(node); 247 | } 248 | 249 | 250 | /******************************************************************************* 251 | * RightChildNode -- Computes the right child of the input node 252 | * 253 | */ 254 | cbt_Node cbt_RightChildNode_Fast(in const cbt_Node node) 255 | { 256 | return cbt_CreateNode((node.id << 1) | 1u, node.depth + 1); 257 | } 258 | 259 | cbt_Node cbt_RightChildNode(in const cbt_Node node) 260 | { 261 | return cbt_IsNullNode(node) ? node : cbt_RightChildNode_Fast(node); 262 | } 263 | 264 | 265 | /******************************************************************************* 266 | * LeftChildNode -- Computes the left child of the input node 267 | * 268 | */ 269 | cbt_Node cbt_LeftChildNode_Fast(in const cbt_Node node) 270 | { 271 | return cbt_CreateNode(node.id << 1, node.depth + 1); 272 | } 273 | 274 | cbt_Node cbt_LeftChildNode(in const cbt_Node node) 275 | { 276 | return cbt_IsNullNode(node) ? node : cbt_LeftChildNode_Fast(node); 277 | } 278 | 279 | 280 | /******************************************************************************* 281 | * HeapByteSize -- Computes the number of Bytes to allocate for the bitfield 282 | * 283 | * For a tree of max depth D, the number of Bytes is 2^(D-1). 284 | * Note that 2 bits are "wasted" in the sense that they only serve 285 | * to round the required number of bytes to a power of two. 286 | * 287 | */ 288 | uint cbt__HeapByteSize(uint cbtMaxDepth) 289 | { 290 | return 1u << (cbtMaxDepth - 1); 291 | } 292 | 293 | 294 | /******************************************************************************* 295 | * HeapUint32Size -- Computes the number of uints to allocate for the bitfield 296 | * 297 | */ 298 | uint cbt__HeapUint32Size(uint cbtMaxDepth) 299 | { 300 | return cbt__HeapByteSize(cbtMaxDepth) >> 2; 301 | } 302 | 303 | 304 | /******************************************************************************* 305 | * NodeBitID -- Returns the bit index that stores data associated with a given node 306 | * 307 | * For a LEB of max depth D and given an index in [0, 2^(D+1) - 1], this 308 | * functions is used to emulate the behaviour of a lookup in an array, i.e., 309 | * uint32_t[nodeID]. It provides the first bit in memory that stores 310 | * information associated with the element of index nodeID. 311 | * 312 | * For data located at level d, the bit offset is 2^d x (3 - d + D) 313 | * We then offset this quantity by the index by (nodeID - 2^d) x (D + 1 - d) 314 | * Note that the null index (nodeID = 0) is also supported. 315 | * 316 | */ 317 | uint cbt__NodeBitID(const int cbtID, in const cbt_Node node) 318 | { 319 | uint tmp1 = 2u << node.depth; 320 | uint tmp2 = uint(1 + cbt_MaxDepth(cbtID) - node.depth); 321 | 322 | return tmp1 + node.id * tmp2; 323 | } 324 | 325 | 326 | /******************************************************************************* 327 | * NodeBitID_BitField -- Computes the bitfield bit location associated to a node 328 | * 329 | * Here, the node is converted into a final node and its bit offset is 330 | * returned, which is finalNodeID + 2^{D + 1} 331 | */ 332 | uint cbt__NodeBitID_BitField(const int cbtID, in const cbt_Node node) 333 | { 334 | return cbt__NodeBitID(cbtID, cbt__CeilNode(cbtID, node)); 335 | } 336 | 337 | 338 | /******************************************************************************* 339 | * DataBitSize -- Returns the number of bits associated with a given node 340 | * 341 | */ 342 | int cbt__NodeBitSize(const int cbtID, in const cbt_Node node) 343 | { 344 | return cbt_MaxDepth(cbtID) - node.depth + 1; 345 | } 346 | 347 | 348 | /******************************************************************************* 349 | * HeapArgs 350 | * 351 | * The LEB heap data structure uses an array of 32-bit words to store its data. 352 | * Whenever we need to access a certain bit range, we need to query two such 353 | * words (because sometimes the requested bit range overlaps two 32-bit words). 354 | * The HeapArg data structure provides arguments for reading from and/or 355 | * writing to the two 32-bit words that bound the queries range. 356 | * 357 | */ 358 | struct cbt__HeapArgs { 359 | uint heapIndexLSB, heapIndexMSB; 360 | uint bitOffsetLSB; 361 | uint bitCountLSB, bitCountMSB; 362 | }; 363 | 364 | cbt__HeapArgs 365 | cbt__CreateHeapArgs(const int cbtID, in const cbt_Node node, int bitCount) 366 | { 367 | uint alignedBitOffset = cbt__NodeBitID(cbtID, node); 368 | uint maxHeapIndex = cbt__HeapUint32Size(cbt_MaxDepth(cbtID)) - 1u; 369 | uint heapIndexLSB = (alignedBitOffset >> 5u); 370 | uint heapIndexMSB = min(heapIndexLSB + 1, maxHeapIndex); 371 | cbt__HeapArgs args; 372 | 373 | args.bitOffsetLSB = alignedBitOffset & 31u; 374 | args.bitCountLSB = min(32u - args.bitOffsetLSB, bitCount); 375 | args.bitCountMSB = bitCount - args.bitCountLSB; 376 | args.heapIndexLSB = heapIndexLSB; 377 | args.heapIndexMSB = heapIndexMSB; 378 | 379 | return args; 380 | } 381 | 382 | 383 | /******************************************************************************* 384 | * HeapWrite -- Sets bitCount bits located at nodeID to bitData 385 | * 386 | * Note that this procedure writes to at most two uint32 elements. 387 | * Two elements are relevant whenever the specified interval overflows 32-bit 388 | * words. 389 | * 390 | */ 391 | void 392 | cbt__HeapWriteExplicit( 393 | const int cbtID, 394 | in const cbt_Node node, 395 | int bitCount, 396 | uint bitData 397 | ) { 398 | cbt__HeapArgs args = cbt__CreateHeapArgs(cbtID, node, bitCount); 399 | 400 | cbt__BitFieldInsert(cbtID, 401 | args.heapIndexLSB, 402 | args.bitOffsetLSB, 403 | args.bitCountLSB, 404 | bitData); 405 | cbt__BitFieldInsert(cbtID, 406 | args.heapIndexMSB, 407 | 0u, 408 | args.bitCountMSB, 409 | bitData >> args.bitCountLSB); 410 | } 411 | 412 | void cbt__HeapWrite(const int cbtID, in const cbt_Node node, uint bitData) 413 | { 414 | cbt__HeapWriteExplicit(cbtID, node, cbt__NodeBitSize(cbtID, node), bitData); 415 | } 416 | 417 | 418 | /******************************************************************************* 419 | * HeapRead -- Returns bitCount bits located at nodeID 420 | * 421 | * Note that this procedure writes to at most two uint32 elements. 422 | * Two elements are relevant whenever the specified interval overflows 32-bit 423 | * words. 424 | * 425 | */ 426 | uint 427 | cbt__HeapReadExplicit(const int cbtID, in const cbt_Node node, int bitCount) 428 | { 429 | cbt__HeapArgs args = cbt__CreateHeapArgs(cbtID, node, bitCount); 430 | uint lsb = cbt__BitFieldExtract(u_CbtBuffers[cbtID].heap[args.heapIndexLSB], 431 | args.bitOffsetLSB, 432 | args.bitCountLSB); 433 | uint msb = cbt__BitFieldExtract(u_CbtBuffers[cbtID].heap[args.heapIndexMSB], 434 | 0u, 435 | args.bitCountMSB); 436 | 437 | return (lsb | (msb << args.bitCountLSB)); 438 | } 439 | 440 | uint cbt_HeapRead(const int cbtID, in const cbt_Node node) 441 | { 442 | return cbt__HeapReadExplicit(cbtID, node, cbt__NodeBitSize(cbtID, node)); 443 | } 444 | 445 | 446 | /******************************************************************************* 447 | * HeapWrite_BitField -- Sets the bit associated to a leaf node to bitValue 448 | * 449 | * This is a dedicated routine to write directly to the bitfield. 450 | * 451 | */ 452 | void 453 | cbt__HeapWrite_BitField(const int cbtID, in const cbt_Node node, uint bitValue) 454 | { 455 | uint bitID = cbt__NodeBitID_BitField(cbtID, node); 456 | 457 | cbt__SetBitValue(cbtID, bitID >> 5u, bitID & 31u, bitValue); 458 | } 459 | 460 | 461 | /******************************************************************************* 462 | * HeapRead_BitField -- Returns the value of the bit associated to a leaf node 463 | * 464 | * This is a dedicated routine to read directly from the bitfield. 465 | * 466 | */ 467 | uint cbt__HeapRead_BitField(const int cbtID, in const cbt_Node node) 468 | { 469 | uint bitID = cbt__NodeBitID_BitField(cbtID, node); 470 | 471 | return cbt__GetBitValue(u_CbtBuffers[cbtID].heap[bitID >> 5u], bitID & 31u); 472 | } 473 | 474 | 475 | /******************************************************************************* 476 | * IsLeafNode -- Checks if a node is a leaf node 477 | * 478 | */ 479 | bool cbt_IsLeafNode(const int cbtID, in const cbt_Node node) 480 | { 481 | return (cbt_HeapRead(cbtID, node) == 1u); 482 | } 483 | 484 | 485 | /******************************************************************************* 486 | * Split -- Subdivides a node in two 487 | * 488 | */ 489 | void cbt_SplitNode_Fast(const int cbtID, in const cbt_Node node) 490 | { 491 | cbt__HeapWrite_BitField(cbtID, cbt_RightChildNode(node), 1u); 492 | } 493 | void cbt_SplitNode(const int cbtID, in const cbt_Node node) 494 | { 495 | if (!cbt_IsCeilNode(cbtID, node)) 496 | cbt_SplitNode_Fast(cbtID, node); 497 | } 498 | 499 | 500 | /******************************************************************************* 501 | * Merge -- Merges the node with its neighbour 502 | * 503 | */ 504 | void cbt_MergeNode_Fast(const int cbtID, in const cbt_Node node) 505 | { 506 | cbt__HeapWrite_BitField(cbtID, cbt_RightSiblingNode(node), 0u); 507 | } 508 | void cbt_MergeNode(const int cbtID, in const cbt_Node node) 509 | { 510 | if (!cbt_IsRootNode(node)) 511 | cbt_MergeNode_Fast(cbtID, node); 512 | } 513 | 514 | 515 | /******************************************************************************* 516 | * MaxDepth -- Returns the maximum depth 517 | * 518 | */ 519 | int cbt_MaxDepth(const int cbtID) 520 | { 521 | return findLSB(u_CbtBuffers[cbtID].heap[0]); 522 | } 523 | 524 | 525 | /******************************************************************************* 526 | * NodeCount -- Returns the number of triangles in the LEB 527 | * 528 | */ 529 | uint cbt_NodeCount(const int cbtID) 530 | { 531 | return cbt_HeapRead(cbtID, cbt_CreateNode(1u, 0)); 532 | } 533 | 534 | 535 | /******************************************************************************* 536 | * Decode the LEB Node associated to an index 537 | * 538 | */ 539 | cbt_Node cbt_DecodeNode(const int cbtID, uint nodeID) 540 | { 541 | cbt_Node node = cbt_CreateNode(1u, 0); 542 | 543 | while (cbt_HeapRead(cbtID, node) > 1u) { 544 | cbt_Node leftChild = cbt_LeftChildNode_Fast(node); 545 | uint cmp = cbt_HeapRead(cbtID, leftChild); 546 | uint b = nodeID < cmp ? 0u : 1u; 547 | 548 | node = leftChild; 549 | node.id|= b; 550 | nodeID-= cmp * b; 551 | } 552 | 553 | return node; 554 | } 555 | 556 | 557 | /******************************************************************************* 558 | * EncodeNode -- Returns the bit index associated with the Node 559 | * 560 | * This does the inverse of the DecodeNode routine. 561 | * 562 | */ 563 | uint cbt_EncodeNode(const int cbtID, in const cbt_Node node) 564 | { 565 | uint nodeID = 0u; 566 | cbt_Node nodeIterator = node; 567 | 568 | while (nodeIterator.id > 1u) { 569 | cbt_Node sibling = cbt_LeftSiblingNode_Fast(nodeIterator); 570 | uint nodeCount = cbt_HeapRead(cbtID, sibling); 571 | 572 | nodeID+= (nodeIterator.id & 1u) * nodeCount; 573 | nodeIterator = cbt_ParentNode(nodeIterator); 574 | } 575 | 576 | return nodeID; 577 | } 578 | -------------------------------------------------------------------------------- /cbt.h: -------------------------------------------------------------------------------- 1 | /* cbt.h - public domain library for building binary trees in parallel 2 | by Jonathan Dupuy 3 | 4 | Do this: 5 | #define CBT_IMPLEMENTATION 6 | before you include this file in *one* C or C++ file to create the implementation. 7 | 8 | // i.e. it should look like this: 9 | #include ... 10 | #include ... 11 | #include ... 12 | #define CBT_IMPLEMENTATION 13 | #include "cbt.h" 14 | 15 | INTERFACING 16 | define CBT_ASSERT(x) to avoid using assert.h 17 | define CBT_MALLOC(x) to use your own memory allocator 18 | define CBT_FREE(x) to use your own memory deallocator 19 | define CBT_MEMCPY(dst, src, num) to use your own memcpy routine 20 | */ 21 | 22 | #ifndef CBT_INCLUDE_CBT_H 23 | #define CBT_INCLUDE_CBT_H 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | #ifdef CBT_STATIC 30 | #define CBTDEF static 31 | #else 32 | #define CBTDEF extern 33 | #endif 34 | 35 | #include 36 | #include 37 | 38 | typedef struct cbt_Tree cbt_Tree; 39 | typedef struct { 40 | uint64_t id : 58; // heapID 41 | uint64_t depth: 6; // log2(heapID) 42 | } cbt_Node; 43 | 44 | // create / destroy tree 45 | CBTDEF cbt_Tree *cbt_Create(int64_t maxDepth); 46 | CBTDEF cbt_Tree *cbt_CreateAtDepth(int64_t maxDepth, int64_t depth); 47 | CBTDEF void cbt_Release(cbt_Tree *tree); 48 | 49 | // loaders 50 | CBTDEF void cbt_ResetToRoot(cbt_Tree *tree); 51 | CBTDEF void cbt_ResetToCeil(cbt_Tree *tree); 52 | CBTDEF void cbt_ResetToDepth(cbt_Tree *tree, int64_t depth); 53 | 54 | // manipulation 55 | CBTDEF void cbt_SplitNode_Fast(cbt_Tree *tree, const cbt_Node node); 56 | CBTDEF void cbt_SplitNode (cbt_Tree *tree, const cbt_Node node); 57 | CBTDEF void cbt_MergeNode_Fast(cbt_Tree *tree, const cbt_Node node); 58 | CBTDEF void cbt_MergeNode (cbt_Tree *tree, const cbt_Node node); 59 | typedef void (*cbt_UpdateCallback)(cbt_Tree *tree, 60 | const cbt_Node node, 61 | const void *userData); 62 | CBTDEF void cbt_Update(cbt_Tree *tree, 63 | cbt_UpdateCallback updater, 64 | const void *userData); 65 | 66 | // O(1) queries 67 | CBTDEF int64_t cbt_MaxDepth(const cbt_Tree *tree); 68 | CBTDEF int64_t cbt_NodeCount(const cbt_Tree *tree); 69 | CBTDEF uint64_t cbt_HeapRead(const cbt_Tree *tree, const cbt_Node node); 70 | CBTDEF bool cbt_IsLeafNode(const cbt_Tree *tree, const cbt_Node node); 71 | CBTDEF bool cbt_IsCeilNode(const cbt_Tree *tree, const cbt_Node node); 72 | CBTDEF bool cbt_IsRootNode( const cbt_Node node); 73 | CBTDEF bool cbt_IsNullNode( const cbt_Node node); 74 | 75 | // node constructors 76 | CBTDEF cbt_Node cbt_CreateNode (uint64_t id, int64_t depth); 77 | CBTDEF cbt_Node cbt_CreateNodeFromHeapID (uint64_t heapID); 78 | CBTDEF cbt_Node cbt_ParentNode (const cbt_Node node); 79 | CBTDEF cbt_Node cbt_ParentNode_Fast (const cbt_Node node); 80 | CBTDEF cbt_Node cbt_SiblingNode (const cbt_Node node); 81 | CBTDEF cbt_Node cbt_SiblingNode_Fast (const cbt_Node node); 82 | CBTDEF cbt_Node cbt_LeftSiblingNode (const cbt_Node node); 83 | CBTDEF cbt_Node cbt_LeftSiblingNode_Fast (const cbt_Node node); 84 | CBTDEF cbt_Node cbt_RightSiblingNode (const cbt_Node node); 85 | CBTDEF cbt_Node cbt_RightSiblingNode_Fast(const cbt_Node node); 86 | CBTDEF cbt_Node cbt_LeftChildNode (const cbt_Node node); 87 | CBTDEF cbt_Node cbt_LeftChildNode_Fast (const cbt_Node node); 88 | CBTDEF cbt_Node cbt_RightChildNode (const cbt_Node node); 89 | CBTDEF cbt_Node cbt_RightChildNode_Fast (const cbt_Node node); 90 | 91 | // O(depth) queries 92 | CBTDEF cbt_Node cbt_DecodeNode(const cbt_Tree *tree, int64_t leafID); 93 | CBTDEF int64_t cbt_EncodeNode(const cbt_Tree *tree, const cbt_Node node); 94 | 95 | // serialization 96 | CBTDEF int64_t cbt_HeapByteSize(const cbt_Tree *tree); 97 | CBTDEF const char *cbt_GetHeap(const cbt_Tree *tree); 98 | CBTDEF void cbt_SetHeap(cbt_Tree *tree, const char *heapToCopy); 99 | 100 | #ifdef __cplusplus 101 | } // extern "C" 102 | #endif 103 | 104 | // 105 | // 106 | //// end header file /////////////////////////////////////////////////////////// 107 | #endif // CBT_INCLUDE_CBT_H 108 | 109 | #ifdef CBT_IMPLEMENTATION 110 | 111 | #ifndef CBT_ASSERT 112 | # include 113 | # define CBT_ASSERT(x) assert(x) 114 | #endif 115 | 116 | #ifndef CBT_MALLOC 117 | # include 118 | # define CBT_MALLOC(x) (malloc(x)) 119 | # define CBT_FREE(x) (free(x)) 120 | #else 121 | # ifndef CBT_FREE 122 | # error CBT_MALLOC defined without CBT_FREE 123 | # endif 124 | #endif 125 | 126 | #ifndef CBT_MEMCPY 127 | # include 128 | # define CBT_MEMCPY(dst, src, num) memcpy(dst, src, num) 129 | #endif 130 | 131 | #ifndef _OPENMP 132 | # define CBT_ATOMIC 133 | # define CBT_PARALLEL_FOR 134 | # define CBT_BARRIER 135 | #else 136 | # if defined(_WIN32) 137 | # define CBT_ATOMIC __pragma("omp atomic" ) 138 | # define CBT_PARALLEL_FOR __pragma("omp parallel for") 139 | # define CBT_BARRIER __pragma("omp barrier") 140 | # else 141 | # define CBT_ATOMIC _Pragma("omp atomic" ) 142 | # define CBT_PARALLEL_FOR _Pragma("omp parallel for") 143 | # define CBT_BARRIER _Pragma("omp barrier") 144 | # endif 145 | #endif 146 | 147 | 148 | /******************************************************************************* 149 | * FindLSB -- Returns the position of the least significant bit 150 | * 151 | */ 152 | static inline int64_t cbt__FindLSB(uint64_t x) 153 | { 154 | int64_t lsb = 0; 155 | 156 | while (((x >> lsb) & 1u) == 0u) { 157 | ++lsb; 158 | } 159 | 160 | return lsb; 161 | } 162 | 163 | 164 | /******************************************************************************* 165 | * FindMSB -- Returns the position of the most significant bit 166 | * 167 | */ 168 | static inline int64_t cbt__FindMSB(uint64_t x) 169 | { 170 | int64_t msb = 0; 171 | 172 | while (x > 1u) { 173 | ++msb; 174 | x = x >> 1; 175 | } 176 | 177 | return msb; 178 | } 179 | 180 | 181 | /******************************************************************************* 182 | * MinValue -- Returns the minimum value between two inputs 183 | * 184 | */ 185 | static inline uint64_t cbt__MinValue(uint64_t a, uint64_t b) 186 | { 187 | return a < b ? a : b; 188 | } 189 | 190 | 191 | /******************************************************************************* 192 | * SetBitValue -- Sets the value of a bit stored in a bitfield 193 | * 194 | */ 195 | static void 196 | cbt__SetBitValue(uint64_t *bitField, int64_t bitID, uint64_t bitValue) 197 | { 198 | const uint64_t bitMask = ~(1ULL << bitID); 199 | 200 | CBT_ATOMIC 201 | (*bitField)&= bitMask; 202 | CBT_ATOMIC 203 | (*bitField)|= (bitValue << bitID); 204 | } 205 | 206 | 207 | /******************************************************************************* 208 | * BitfieldInsert -- Inserts data in range [offset, offset + count - 1] 209 | * 210 | */ 211 | static inline void 212 | cbt__BitFieldInsert( 213 | uint64_t *bitField, 214 | int64_t bitOffset, 215 | int64_t bitCount, 216 | uint64_t bitData 217 | ) { 218 | CBT_ASSERT(bitOffset < 64 && bitCount <= 64 && bitOffset + bitCount <= 64); 219 | uint64_t bitMask = ~(~(0xFFFFFFFFFFFFFFFFULL << bitCount) << bitOffset); 220 | CBT_ATOMIC 221 | (*bitField)&= bitMask; 222 | CBT_ATOMIC 223 | (*bitField)|= (bitData << bitOffset); 224 | } 225 | 226 | 227 | /******************************************************************************* 228 | * BitFieldExtract -- Extracts bits [bitOffset, bitOffset + bitCount - 1] from 229 | * a bitfield, returning them in the least significant bits of the result. 230 | * 231 | */ 232 | static inline uint64_t 233 | cbt__BitFieldExtract( 234 | const uint64_t bitField, 235 | int64_t bitOffset, 236 | int64_t bitCount 237 | ) { 238 | CBT_ASSERT(bitOffset < 64 && bitCount < 64 && bitOffset + bitCount <= 64); 239 | uint64_t bitMask = ~(0xFFFFFFFFFFFFFFFFULL << bitCount); 240 | 241 | return (bitField >> bitOffset) & bitMask; 242 | } 243 | 244 | 245 | /******************************************************************************* 246 | * Parallel Binary Tree Data-Structure 247 | * 248 | */ 249 | struct cbt_Tree { 250 | uint64_t *heap; 251 | }; 252 | 253 | 254 | /******************************************************************************* 255 | * IsCeilNode -- Checks if a node is a ceil node, i.e., that can not split further 256 | * 257 | */ 258 | CBTDEF bool cbt_IsCeilNode(const cbt_Tree *tree, const cbt_Node node) 259 | { 260 | return (node.depth == cbt_MaxDepth(tree)); 261 | } 262 | 263 | 264 | /******************************************************************************* 265 | * IsRootNode -- Checks if a node is a root node 266 | * 267 | */ 268 | CBTDEF bool cbt_IsRootNode(const cbt_Node node) 269 | { 270 | return (node.id == 1u); 271 | } 272 | 273 | 274 | /******************************************************************************* 275 | * IsNullNode -- Checks if a node is a null node 276 | * 277 | */ 278 | CBTDEF bool cbt_IsNullNode(const cbt_Node node) 279 | { 280 | return (node.id == 0u); 281 | } 282 | 283 | 284 | /******************************************************************************* 285 | * CreateNode -- Constructor for the Node data structure 286 | * 287 | */ 288 | CBTDEF cbt_Node cbt_CreateNodeFromHeapID(uint64_t heapID) 289 | { 290 | return cbt_CreateNode(heapID, cbt__FindMSB(heapID)); 291 | } 292 | 293 | 294 | /******************************************************************************* 295 | * CreateNode -- Constructor for the Node data structure 296 | * 297 | */ 298 | CBTDEF cbt_Node cbt_CreateNode(uint64_t id, int64_t depth) 299 | { 300 | cbt_Node node; 301 | 302 | node.id = id; 303 | node.depth = depth; 304 | 305 | return node; 306 | } 307 | 308 | 309 | /******************************************************************************* 310 | * ParentNode -- Computes the parent of the input node 311 | * 312 | */ 313 | CBTDEF cbt_Node cbt_ParentNode_Fast(const cbt_Node node) 314 | { 315 | return cbt_CreateNode(node.id >> 1, node.depth - 1); 316 | } 317 | 318 | CBTDEF cbt_Node cbt_ParentNode(const cbt_Node node) 319 | { 320 | return cbt_IsNullNode(node) ? node : cbt_ParentNode_Fast(node); 321 | } 322 | 323 | 324 | /******************************************************************************* 325 | * CeilNode -- Returns the associated ceil node, i.e., the deepest possible leaf 326 | * 327 | */ 328 | static cbt_Node cbt__CeilNode_Fast(const cbt_Tree *tree, const cbt_Node node) 329 | { 330 | int64_t maxDepth = cbt_MaxDepth(tree); 331 | 332 | return cbt_CreateNode(node.id << (maxDepth - node.depth), maxDepth); 333 | } 334 | 335 | static cbt_Node cbt__CeilNode(const cbt_Tree *tree, const cbt_Node node) 336 | { 337 | return cbt_IsNullNode(node) ? node : cbt__CeilNode_Fast(tree, node); 338 | } 339 | 340 | 341 | /******************************************************************************* 342 | * SiblingNode -- Computes the sibling of the input node 343 | * 344 | */ 345 | CBTDEF cbt_Node cbt_SiblingNode_Fast(const cbt_Node node) 346 | { 347 | return cbt_CreateNode(node.id ^ 1u, node.depth); 348 | } 349 | 350 | CBTDEF cbt_Node cbt_SiblingNode(const cbt_Node node) 351 | { 352 | return cbt_IsNullNode(node) ? node : cbt_SiblingNode_Fast(node); 353 | } 354 | 355 | 356 | /******************************************************************************* 357 | * RightSiblingNode -- Computes the right sibling of the input node 358 | * 359 | */ 360 | CBTDEF cbt_Node cbt_RightSiblingNode_Fast(const cbt_Node node) 361 | { 362 | return cbt_CreateNode(node.id | 1u, node.depth); 363 | } 364 | 365 | CBTDEF cbt_Node cbt_RightSiblingNode(const cbt_Node node) 366 | { 367 | return cbt_IsNullNode(node) ? node : cbt_RightSiblingNode_Fast(node); 368 | } 369 | 370 | 371 | /******************************************************************************* 372 | * LeftSiblingNode -- Computes the left sibling of the input node 373 | * 374 | */ 375 | CBTDEF cbt_Node cbt_LeftSiblingNode_Fast(const cbt_Node node) 376 | { 377 | return cbt_CreateNode(node.id & (~1u), node.depth); 378 | } 379 | 380 | CBTDEF cbt_Node cbt_LeftSiblingNode(const cbt_Node node) 381 | { 382 | return cbt_IsNullNode(node) ? node : cbt_LeftSiblingNode_Fast(node); 383 | } 384 | 385 | 386 | /******************************************************************************* 387 | * RightChildNode -- Computes the right child of the input node 388 | * 389 | */ 390 | CBTDEF cbt_Node cbt_RightChildNode_Fast(const cbt_Node node) 391 | { 392 | return cbt_CreateNode(node.id << 1u | 1u, node.depth + 1); 393 | } 394 | 395 | CBTDEF cbt_Node cbt_RightChildNode(const cbt_Node node) 396 | { 397 | return cbt_IsNullNode(node) ? node : cbt_RightChildNode_Fast(node); 398 | } 399 | 400 | 401 | /******************************************************************************* 402 | * LeftChildNode -- Computes the left child of the input node 403 | * 404 | */ 405 | CBTDEF cbt_Node cbt_LeftChildNode_Fast(const cbt_Node node) 406 | { 407 | return cbt_CreateNode(node.id << 1u, node.depth + 1); 408 | } 409 | 410 | CBTDEF cbt_Node cbt_LeftChildNode(const cbt_Node node) 411 | { 412 | return cbt_IsNullNode(node) ? node : cbt_LeftChildNode_Fast(node); 413 | } 414 | 415 | 416 | /******************************************************************************* 417 | * HeapByteSize -- Computes the number of Bytes to allocate for the bitfield 418 | * 419 | * For a tree of max depth D, the number of Bytes is 2^(D-1). 420 | * Note that 2 bits are "wasted" in the sense that they only serve 421 | * to round the required number of bytes to a power of two. 422 | * 423 | */ 424 | static int64_t cbt__HeapByteSize(uint64_t treeMaxDepth) 425 | { 426 | return 1LL << (treeMaxDepth - 1); 427 | } 428 | 429 | 430 | /******************************************************************************* 431 | * HeapUint64Size -- Computes the number of uints to allocate for the bitfield 432 | * 433 | */ 434 | static inline int64_t cbt__HeapUint64Size(int64_t treeMaxDepth) 435 | { 436 | return cbt__HeapByteSize(treeMaxDepth) >> 3; 437 | } 438 | 439 | 440 | /******************************************************************************* 441 | * NodeBitID -- Returns the bit index that stores data associated with a given node 442 | * 443 | * For a tree of max depth D and given an index in [0, 2^(D+1) - 1], this 444 | * functions is used to emulate the behaviour of a lookup in an array, i.e., 445 | * uint[nodeID]. It provides the first bit in memory that stores 446 | * information associated with the element of index nodeID. 447 | * 448 | * For data located at level d, the bit offset is 2^d x (3 - d + D) 449 | * We then offset this quantity by the index by (nodeID - 2^d) x (D + 1 - d) 450 | * Note that the null index (nodeID = 0) is also supported. 451 | * 452 | */ 453 | static inline int64_t cbt__NodeBitID(const cbt_Tree *tree, const cbt_Node node) 454 | { 455 | int64_t tmp1 = 2LL << node.depth; 456 | int64_t tmp2 = 1LL + cbt_MaxDepth(tree) - node.depth; 457 | 458 | return tmp1 + node.id * tmp2; 459 | } 460 | 461 | 462 | /******************************************************************************* 463 | * NodeBitID_BitField -- Computes the bitfield bit location associated to a node 464 | * 465 | * Here, the node is converted into a final node and its bit offset is 466 | * returned, which is finalNodeID + 2^{D + 1} 467 | */ 468 | static int64_t 469 | cbt__NodeBitID_BitField(const cbt_Tree *tree, const cbt_Node node) 470 | { 471 | return cbt__NodeBitID(tree, cbt__CeilNode(tree, node)); 472 | } 473 | 474 | 475 | /******************************************************************************* 476 | * NodeBitSize -- Returns the number of bits storing the input node value 477 | * 478 | */ 479 | static inline int64_t 480 | cbt__NodeBitSize(const cbt_Tree *tree, const cbt_Node node) 481 | { 482 | return cbt_MaxDepth(tree) - node.depth + 1; 483 | } 484 | 485 | 486 | /******************************************************************************* 487 | * HeapArgs 488 | * 489 | * The CBT heap data structure uses an array of 64-bit words to store its data. 490 | * Whenever we need to access a certain bit range, we need to query two such 491 | * words (because sometimes the requested bit range overlaps two 64-bit words). 492 | * The HeapArg data structure provides arguments for reading from and/or 493 | * writing to the two 64-bit words that bound the queries range. 494 | * 495 | */ 496 | typedef struct { 497 | uint64_t *bitFieldLSB, *bitFieldMSB; 498 | int64_t bitOffsetLSB; 499 | int64_t bitCountLSB, bitCountMSB; 500 | } cbt__HeapArgs; 501 | 502 | cbt__HeapArgs 503 | cbt__CreateHeapArgs(const cbt_Tree *tree, const cbt_Node node, int64_t bitCount) 504 | { 505 | int64_t alignedBitOffset = cbt__NodeBitID(tree, node); 506 | int64_t maxBufferIndex = cbt__HeapUint64Size(cbt_MaxDepth(tree)) - 1; 507 | int64_t bufferIndexLSB = (alignedBitOffset >> 6); 508 | int64_t bufferIndexMSB = cbt__MinValue(bufferIndexLSB + 1, maxBufferIndex); 509 | cbt__HeapArgs args; 510 | 511 | args.bitOffsetLSB = alignedBitOffset & 63; 512 | args.bitCountLSB = cbt__MinValue(64 - args.bitOffsetLSB, bitCount); 513 | args.bitCountMSB = bitCount - args.bitCountLSB; 514 | args.bitFieldLSB = &tree->heap[bufferIndexLSB]; 515 | args.bitFieldMSB = &tree->heap[bufferIndexMSB]; 516 | 517 | return args; 518 | } 519 | 520 | 521 | /******************************************************************************* 522 | * HeapWrite -- Sets bitCount bits located at nodeID to bitData 523 | * 524 | * Note that this procedure writes to at most two uint64 elements. 525 | * Two elements are relevant whenever the specified interval overflows 64-bit 526 | * words. 527 | * 528 | */ 529 | static void 530 | cbt__HeapWriteExplicit( 531 | cbt_Tree *tree, 532 | const cbt_Node node, 533 | int64_t bitCount, 534 | uint64_t bitData 535 | ) { 536 | cbt__HeapArgs args = cbt__CreateHeapArgs(tree, node, bitCount); 537 | 538 | cbt__BitFieldInsert(args.bitFieldLSB, 539 | args.bitOffsetLSB, 540 | args.bitCountLSB, 541 | bitData); 542 | cbt__BitFieldInsert(args.bitFieldMSB, 543 | 0u, 544 | args.bitCountMSB, 545 | bitData >> args.bitCountLSB); 546 | } 547 | 548 | static void 549 | cbt__HeapWrite(cbt_Tree *tree, const cbt_Node node, uint64_t bitData) 550 | { 551 | cbt__HeapWriteExplicit(tree, node, cbt__NodeBitSize(tree, node), bitData); 552 | } 553 | 554 | 555 | /******************************************************************************* 556 | * HeapRead -- Returns bitCount bits located at nodeID 557 | * 558 | * Note that this procedure reads from two uint64 elements. 559 | * This is because the data is not necessarily aligned with 64-bit 560 | * words. 561 | * 562 | */ 563 | static uint64_t 564 | cbt__HeapReadExplicit( 565 | const cbt_Tree *tree, 566 | const cbt_Node node, 567 | int64_t bitCount 568 | ) { 569 | cbt__HeapArgs args = cbt__CreateHeapArgs(tree, node, bitCount); 570 | uint64_t lsb = cbt__BitFieldExtract(*args.bitFieldLSB, 571 | args.bitOffsetLSB, 572 | args.bitCountLSB); 573 | uint64_t msb = cbt__BitFieldExtract(*args.bitFieldMSB, 574 | 0u, 575 | args.bitCountMSB); 576 | 577 | return (lsb | (msb << args.bitCountLSB)); 578 | } 579 | 580 | CBTDEF uint64_t cbt_HeapRead(const cbt_Tree *tree, const cbt_Node node) 581 | { 582 | return cbt__HeapReadExplicit(tree, node, cbt__NodeBitSize(tree, node)); 583 | } 584 | 585 | 586 | /******************************************************************************* 587 | * HeapWrite_BitField -- Sets the bit associated to a leaf node to bitValue 588 | * 589 | * This is a dedicated routine to write directly to the bitfield. 590 | * 591 | */ 592 | static void 593 | cbt__HeapWrite_BitField( 594 | cbt_Tree *tree, 595 | const cbt_Node node, 596 | const uint64_t bitValue 597 | ) { 598 | int64_t bitID = cbt__NodeBitID_BitField(tree, node); 599 | 600 | cbt__SetBitValue(&tree->heap[bitID >> 6], bitID & 63, bitValue); 601 | } 602 | 603 | 604 | /******************************************************************************* 605 | * ClearBitField -- Clears the bitfield 606 | * 607 | */ 608 | static void cbt__ClearBitfield(cbt_Tree *tree) 609 | { 610 | int64_t maxDepth = cbt_MaxDepth(tree); 611 | int64_t bufferMinID = 1LL << (maxDepth - 5); 612 | int64_t bufferMaxID = cbt__HeapUint64Size(maxDepth); 613 | 614 | CBT_PARALLEL_FOR 615 | for (int bufferID = bufferMinID; bufferID < bufferMaxID; ++bufferID) { 616 | tree->heap[bufferID] = 0; 617 | } 618 | CBT_BARRIER 619 | } 620 | 621 | 622 | /******************************************************************************* 623 | * IsLeafNode -- Checks if a node is a leaf node, i.e., that has no descendants 624 | * 625 | */ 626 | CBTDEF bool cbt_IsLeafNode(const cbt_Tree *tree, const cbt_Node node) 627 | { 628 | return (cbt_HeapRead(tree, node) == 1u); 629 | } 630 | 631 | 632 | /******************************************************************************* 633 | * GetHeapMemory -- Returns a read-only pointer to the heap memory 634 | * 635 | */ 636 | CBTDEF const char *cbt_GetHeap(const cbt_Tree *tree) 637 | { 638 | return (const char *)tree->heap; 639 | } 640 | 641 | 642 | /******************************************************************************* 643 | * SetHeapMemory -- Sets the heap memory from a read-only buffer 644 | * 645 | */ 646 | CBTDEF void cbt_SetHeap(cbt_Tree *tree, const char *buffer) 647 | { 648 | CBT_MEMCPY(tree->heap, buffer, cbt_HeapByteSize(tree)); 649 | } 650 | 651 | 652 | /******************************************************************************* 653 | * HeapByteSize -- Returns the amount of bytes consumed by the CBT heap 654 | * 655 | */ 656 | CBTDEF int64_t cbt_HeapByteSize(const cbt_Tree *tree) 657 | { 658 | return cbt__HeapByteSize(cbt_MaxDepth(tree)); 659 | } 660 | 661 | 662 | /******************************************************************************* 663 | * ComputeSumReduction -- Sums the 2 elements below the current slot 664 | * 665 | */ 666 | static void cbt__ComputeSumReduction(cbt_Tree *tree) 667 | { 668 | int64_t depth = cbt_MaxDepth(tree); 669 | uint64_t minNodeID = (1ULL << depth); 670 | uint64_t maxNodeID = (2ULL << depth); 671 | 672 | // prepass: processes deepest levels in parallel 673 | CBT_PARALLEL_FOR 674 | for (uint64_t nodeID = minNodeID; nodeID < maxNodeID; nodeID+= 64u) { 675 | cbt_Node heapNode = cbt_CreateNode(nodeID, depth); 676 | int64_t alignedBitOffset = cbt__NodeBitID(tree, heapNode); 677 | uint64_t bitField = tree->heap[alignedBitOffset >> 6]; 678 | uint64_t bitData = 0u; 679 | 680 | // 2-bits 681 | bitField = (bitField & 0x5555555555555555ULL) 682 | + ((bitField >> 1) & 0x5555555555555555ULL); 683 | bitData = bitField; 684 | tree->heap[(alignedBitOffset - minNodeID) >> 6] = bitData; 685 | 686 | // 3-bits 687 | bitField = (bitField & 0x3333333333333333ULL) 688 | + ((bitField >> 2) & 0x3333333333333333ULL); 689 | bitData = ((bitField >> 0) & (7ULL << 0)) 690 | | ((bitField >> 1) & (7ULL << 3)) 691 | | ((bitField >> 2) & (7ULL << 6)) 692 | | ((bitField >> 3) & (7ULL << 9)) 693 | | ((bitField >> 4) & (7ULL << 12)) 694 | | ((bitField >> 5) & (7ULL << 15)) 695 | | ((bitField >> 6) & (7ULL << 18)) 696 | | ((bitField >> 7) & (7ULL << 21)) 697 | | ((bitField >> 8) & (7ULL << 24)) 698 | | ((bitField >> 9) & (7ULL << 27)) 699 | | ((bitField >> 10) & (7ULL << 30)) 700 | | ((bitField >> 11) & (7ULL << 33)) 701 | | ((bitField >> 12) & (7ULL << 36)) 702 | | ((bitField >> 13) & (7ULL << 39)) 703 | | ((bitField >> 14) & (7ULL << 42)) 704 | | ((bitField >> 15) & (7ULL << 45)); 705 | cbt__HeapWriteExplicit(tree, cbt_CreateNode(nodeID >> 2, depth - 2), 48ULL, bitData); 706 | 707 | // 4-bits 708 | bitField = (bitField & 0x0F0F0F0F0F0F0F0FULL) 709 | + ((bitField >> 4) & 0x0F0F0F0F0F0F0F0FULL); 710 | bitData = ((bitField >> 0) & (15ULL << 0)) 711 | | ((bitField >> 4) & (15ULL << 4)) 712 | | ((bitField >> 8) & (15ULL << 8)) 713 | | ((bitField >> 12) & (15ULL << 12)) 714 | | ((bitField >> 16) & (15ULL << 16)) 715 | | ((bitField >> 20) & (15ULL << 20)) 716 | | ((bitField >> 24) & (15ULL << 24)) 717 | | ((bitField >> 28) & (15ULL << 28)); 718 | cbt__HeapWriteExplicit(tree, cbt_CreateNode(nodeID >> 3, depth - 3), 32ULL, bitData); 719 | 720 | // 5-bits 721 | bitField = (bitField & 0x00FF00FF00FF00FFULL) 722 | + ((bitField >> 8) & 0x00FF00FF00FF00FFULL); 723 | bitData = ((bitField >> 0) & (31ULL << 0)) 724 | | ((bitField >> 11) & (31ULL << 5)) 725 | | ((bitField >> 22) & (31ULL << 10)) 726 | | ((bitField >> 33) & (31ULL << 15)); 727 | cbt__HeapWriteExplicit(tree, cbt_CreateNode(nodeID >> 4, depth - 4), 20ULL, bitData); 728 | 729 | // 6-bits 730 | bitField = (bitField & 0x0000FFFF0000FFFFULL) 731 | + ((bitField >> 16) & 0x0000FFFF0000FFFFULL); 732 | bitData = ((bitField >> 0) & (63ULL << 0)) 733 | | ((bitField >> 26) & (63ULL << 6)); 734 | cbt__HeapWriteExplicit(tree, cbt_CreateNode(nodeID >> 5, depth - 5), 12ULL, bitData); 735 | 736 | // 7-bits 737 | bitField = (bitField & 0x00000000FFFFFFFFULL) 738 | + ((bitField >> 32) & 0x00000000FFFFFFFFULL); 739 | bitData = bitField; 740 | cbt__HeapWriteExplicit(tree, cbt_CreateNode(nodeID >> 6, depth - 6), 7ULL, bitData); 741 | } 742 | CBT_BARRIER 743 | depth-= 6; 744 | 745 | // iterate over elements atomically 746 | while (--depth >= 0) { 747 | uint64_t minNodeID = 1ULL << depth; 748 | uint64_t maxNodeID = 2ULL << depth; 749 | 750 | CBT_PARALLEL_FOR 751 | for (uint64_t j = minNodeID; j < maxNodeID; ++j) { 752 | uint64_t x0 = cbt_HeapRead(tree, cbt_CreateNode(j << 1 , depth + 1)); 753 | uint64_t x1 = cbt_HeapRead(tree, cbt_CreateNode(j << 1 | 1, depth + 1)); 754 | 755 | cbt__HeapWrite(tree, cbt_CreateNode(j, depth), x0 + x1); 756 | } 757 | CBT_BARRIER 758 | } 759 | } 760 | 761 | 762 | /******************************************************************************* 763 | * Buffer Ctor 764 | * 765 | */ 766 | CBTDEF cbt_Tree *cbt_CreateAtDepth(int64_t maxDepth, int64_t depth) 767 | { 768 | CBT_ASSERT(maxDepth >= 5 && "maxDepth must be at least 5"); 769 | CBT_ASSERT(maxDepth <= 58 && "maxDepth must be at most 58"); 770 | cbt_Tree *tree = (cbt_Tree *)CBT_MALLOC(sizeof(*tree)); 771 | 772 | tree->heap = (uint64_t *)CBT_MALLOC(cbt__HeapByteSize(maxDepth)); 773 | tree->heap[0] = 1ULL << (maxDepth); // store max Depth 774 | 775 | cbt_ResetToDepth(tree, depth); 776 | 777 | return tree; 778 | } 779 | 780 | CBTDEF cbt_Tree *cbt_Create(int64_t maxDepth) 781 | { 782 | return cbt_CreateAtDepth(maxDepth, 0); 783 | } 784 | 785 | 786 | /******************************************************************************* 787 | * Buffer Dtor 788 | * 789 | */ 790 | CBTDEF void cbt_Release(cbt_Tree *tree) 791 | { 792 | CBT_FREE(tree->heap); 793 | CBT_FREE(tree); 794 | } 795 | 796 | 797 | /******************************************************************************* 798 | * ResetToDepth -- Initializes a CBT to its a specific subdivision level 799 | * 800 | */ 801 | CBTDEF void cbt_ResetToDepth(cbt_Tree *tree, int64_t depth) 802 | { 803 | CBT_ASSERT(depth >= 0 && "depth must be at least equal to 0"); 804 | CBT_ASSERT(depth <= cbt_MaxDepth(tree) && "depth must be at most equal to maxDepth"); 805 | uint64_t minNodeID = 1ULL << depth; 806 | uint64_t maxNodeID = 2ULL << depth; 807 | 808 | cbt__ClearBitfield(tree); 809 | 810 | CBT_PARALLEL_FOR 811 | for (uint64_t nodeID = minNodeID; nodeID < maxNodeID; ++nodeID) { 812 | cbt_Node node = cbt_CreateNode(nodeID, depth); 813 | 814 | cbt__HeapWrite_BitField(tree, node, 1u); 815 | } 816 | CBT_BARRIER 817 | 818 | cbt__ComputeSumReduction(tree); 819 | } 820 | 821 | 822 | /******************************************************************************* 823 | * ResetToCeil -- Initializes a CBT to its maximum subdivision level 824 | * 825 | */ 826 | CBTDEF void cbt_ResetToCeil(cbt_Tree *tree) 827 | { 828 | cbt_ResetToDepth(tree, cbt_MaxDepth(tree)); 829 | } 830 | 831 | 832 | /******************************************************************************* 833 | * ResetToRoot -- Initializes a CBT to its minimum subdivision level 834 | * 835 | */ 836 | CBTDEF void cbt_ResetToRoot(cbt_Tree *tree) 837 | { 838 | cbt_ResetToDepth(tree, 0); 839 | } 840 | 841 | 842 | /******************************************************************************* 843 | * Split -- Subdivides a node in two 844 | * 845 | * The _Fast version does not check if the node can actually split, so 846 | * use it wisely, i.e., when you're absolutely sure the node depth is 847 | * less than maxDepth. 848 | * 849 | */ 850 | CBTDEF void cbt_SplitNode_Fast(cbt_Tree *tree, const cbt_Node node) 851 | { 852 | cbt__HeapWrite_BitField(tree, cbt_RightChildNode(node), 1u); 853 | } 854 | 855 | CBTDEF void cbt_SplitNode(cbt_Tree *tree, const cbt_Node node) 856 | { 857 | if (!cbt_IsCeilNode(tree, node)) 858 | cbt_SplitNode_Fast(tree, node); 859 | } 860 | 861 | 862 | /******************************************************************************* 863 | * Merge -- Merges the node with its neighbour 864 | * 865 | * The _Fast version does not check if the node can actually merge, so 866 | * use it wisely, i.e., when you're absolutely sure the node depth is 867 | * greater than 0. 868 | * 869 | */ 870 | CBTDEF void cbt_MergeNode_Fast(cbt_Tree *tree, const cbt_Node node) 871 | { 872 | cbt__HeapWrite_BitField(tree, cbt_RightSiblingNode(node), 0u); 873 | } 874 | 875 | CBTDEF void cbt_MergeNode(cbt_Tree *tree, const cbt_Node node) 876 | { 877 | if (!cbt_IsRootNode(node)) 878 | cbt_MergeNode_Fast(tree, node); 879 | } 880 | 881 | 882 | /******************************************************************************* 883 | * Update -- Split or merge each node in parallel 884 | * 885 | * The user provides an updater function that is responsible for 886 | * splitting or merging each node. 887 | * 888 | */ 889 | CBTDEF void 890 | cbt_Update(cbt_Tree *tree, cbt_UpdateCallback updater, const void *userData) 891 | { 892 | CBT_PARALLEL_FOR 893 | for (int64_t handle = 0; handle < cbt_NodeCount(tree); ++handle) { 894 | updater(tree, cbt_DecodeNode(tree, handle), userData); 895 | } 896 | CBT_BARRIER 897 | 898 | cbt__ComputeSumReduction(tree); 899 | } 900 | 901 | 902 | /******************************************************************************* 903 | * MaxDepth -- Returns the max CBT depth 904 | * 905 | */ 906 | CBTDEF int64_t cbt_MaxDepth(const cbt_Tree *tree) 907 | { 908 | return cbt__FindLSB(tree->heap[0]); 909 | } 910 | 911 | 912 | /******************************************************************************* 913 | * NodeCount -- Returns the number of triangles in the CBT 914 | * 915 | */ 916 | CBTDEF int64_t cbt_NodeCount(const cbt_Tree *tree) 917 | { 918 | return cbt_HeapRead(tree, cbt_CreateNode(1u, 0)); 919 | } 920 | 921 | 922 | /******************************************************************************* 923 | * DecodeNode -- Returns the leaf node associated to index nodeID 924 | * 925 | * This is procedure is for iterating over the nodes. 926 | * 927 | */ 928 | CBTDEF cbt_Node cbt_DecodeNode(const cbt_Tree *tree, int64_t handle) 929 | { 930 | CBT_ASSERT(handle < cbt_NodeCount(tree) && "handle > NodeCount"); 931 | CBT_ASSERT(handle >= 0 && "handle < 0"); 932 | 933 | cbt_Node node = cbt_CreateNode(1u, 0); 934 | 935 | while (cbt_HeapRead(tree, node) > 1u) { 936 | cbt_Node heapNode = cbt_CreateNode(node.id<<= 1u, ++node.depth); 937 | uint64_t cmp = cbt_HeapRead(tree, heapNode); 938 | uint64_t b = (uint64_t)handle < cmp ? 0u : 1u; 939 | 940 | node.id|= b; 941 | handle-= cmp * b; 942 | } 943 | 944 | return node; 945 | } 946 | 947 | 948 | /******************************************************************************* 949 | * EncodeNode -- Returns the bit index associated with the Node 950 | * 951 | * This does the inverse of the DecodeNode routine. 952 | * 953 | */ 954 | CBTDEF int64_t cbt_EncodeNode(const cbt_Tree *tree, const cbt_Node node) 955 | { 956 | CBT_ASSERT(cbt_IsLeafNode(tree, node) && "node is not a leaf"); 957 | 958 | int64_t handle = 0u; 959 | cbt_Node nodeIterator = node; 960 | 961 | while (nodeIterator.id > 1u) { 962 | cbt_Node sibling = cbt_LeftSiblingNode_Fast(nodeIterator); 963 | uint64_t nodeCount = cbt_HeapRead(tree, sibling); 964 | 965 | handle+= (nodeIterator.id & 1u) * nodeCount; 966 | nodeIterator = cbt_ParentNode_Fast(nodeIterator); 967 | } 968 | 969 | return handle; 970 | } 971 | 972 | 973 | #undef CBT_ATOMIC 974 | #undef CBT_PARALLEL_FOR 975 | #undef CBT_BARRIER 976 | #endif 977 | 978 | --------------------------------------------------------------------------------