├── AdaptiveGridTree.h ├── FastCollisionDetectionLib.h ├── Generator.h ├── LICENSE ├── README.md ├── test1-no-grid-random-distribution.txt ├── test2-unoptimized-grid.txt ├── test3-optimized-grid-1.txt ├── test4-optimized-grid-multithreaded-collision-test.txt ├── test5-teapot-in-stadium-problem-unoptimized.txt ├── test6-teapot-in-stadium-problem-library-solution.txt ├── test7-teapot-in-stadium-library-allpairs.txt ├── test8-simd-brute-force-20k-particles.txt └── test9-sparse-linear-adaptive-grid-20k-particles.txt /AdaptiveGridTree.h: -------------------------------------------------------------------------------- 1 | /* 2 | * AdaptiveGridTree.h 3 | * 4 | * Created on: Apr 14, 2022 5 | * Author: tugrul 6 | */ 7 | 8 | #ifndef ADAPTIVEGRIDTREE_H_ 9 | #define ADAPTIVEGRIDTREE_H_ 10 | 11 | 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include"FastCollisionDetectionLib.h" 18 | 19 | // simple dense octree 20 | constexpr int numNodesPerNode = 64; 21 | struct GridTreeNode 22 | { 23 | GridTreeNode() 24 | { 25 | for(int i=0;i childNodes[numNodesPerNode]; 30 | std::vector parOrder; 31 | 32 | }; 33 | 34 | 35 | template 36 | class AdaptiveGridTree 37 | { 38 | public: 39 | AdaptiveGridTree(const float minx, const float miny, const float minz, const float maxx, const float maxy, const float maxz): 40 | minX(minx),minY(miny),minZ(minz),maxX(maxx),maxY(maxy),maxZ(maxz), workCtr(0) 41 | { 42 | 43 | 44 | for(int i=0;i lg(mut); 83 | parId.reset(); 84 | parMinX.reset(); 85 | parMinY.reset(); 86 | parMinZ.reset(); 87 | parMaxX.reset(); 88 | parMaxY.reset(); 89 | parMaxZ.reset(); 90 | } 91 | 92 | template 93 | void addParticles(const int N, Derived * ptr) 94 | { 95 | std::lock_guard lg(mut); 96 | const int ofsId = parId.allocate(N); 97 | const int ofsMinx = parMinX.allocate(N); 98 | const int ofsMiny = parMinY.allocate(N); 99 | const int ofsMinz = parMinZ.allocate(N); 100 | const int ofsMaxx = parMaxX.allocate(N); 101 | const int ofsMaxy = parMaxY.allocate(N); 102 | const int ofsMaxz = parMaxZ.allocate(N); 103 | for(int i=0;i*>(ptr+i)->getId()); 106 | parMinX.set(ofsMinx+i,static_cast*>(ptr+i)->getMinX()); 107 | parMinY.set(ofsMiny+i,static_cast*>(ptr+i)->getMinY()); 108 | parMinZ.set(ofsMinz+i,static_cast*>(ptr+i)->getMinZ()); 109 | parMaxX.set(ofsMaxx+i,static_cast*>(ptr+i)->getMaxX()); 110 | parMaxY.set(ofsMaxy+i,static_cast*>(ptr+i)->getMaxY()); 111 | parMaxZ.set(ofsMaxz+i,static_cast*>(ptr+i)->getMaxZ()); 112 | } 113 | } 114 | 115 | 116 | std::vector> computeAllPairs(GridTreeNode * parent = nullptr, float minx=0.0f, float miny=0.0f, float minz=0.0f, float maxx=0.0f, float maxy=0.0f, float maxz=0.0f, 117 | std::vector> * result=nullptr, FastColDetLib::Memory> * mapping = nullptr) 118 | { 119 | 120 | GridTreeNode root; 121 | std::vector> resultRoot; 122 | if(!parent) 123 | { 124 | 125 | workCtr=0; 126 | const int N = parId.size(); 127 | mappingRoot.reset(); 128 | mappingRoot.allocate(N); 129 | minx = minX; 130 | miny = minY; 131 | minz = minZ; 132 | maxx = maxX; 133 | maxy = maxY; 134 | maxz = maxZ; 135 | parent = &root; 136 | 137 | 138 | 139 | for(int i=0;iparOrder.push_back(i); 142 | mappingRoot.getRef(i).reset(); 143 | } 144 | result = &resultRoot; 145 | mapping = &mappingRoot; 146 | 147 | } 148 | 149 | 150 | 151 | // build tree until 4096 or less particles left for each leaf 152 | if(parent->parOrder.size()>10000) 153 | { 154 | const int N = parent->parOrder.size(); 155 | for(int i=0;ichildNodes[i]=std::make_shared(); 159 | 160 | } 161 | 162 | 163 | const float stepx = 4.0f/(maxx - minx); 164 | const float stepy = 4.0f/(maxy - miny); 165 | const float stepz = 4.0f/(maxz - minz); 166 | 167 | for(int i=0;iparOrder[i]; 175 | const int mincornerstartx = std::floor((parMinX.get(parOrdI) - minx) * stepx); 176 | const int maxcornerendx = std::floor((parMaxX.get(parOrdI) - minx) * stepx); 177 | const int mincornerstarty = std::floor((parMinY.get(parOrdI) - miny) * stepy); 178 | const int maxcornerendy = std::floor((parMaxY.get(parOrdI) - miny) * stepy); 179 | const int mincornerstartz = std::floor((parMinZ.get(parOrdI) - minz) * stepz); 180 | const int maxcornerendz = std::floor((parMaxZ.get(parOrdI) - minz) * stepz); 181 | for(int ii=mincornerstartz;ii<=maxcornerendz;ii++) 182 | for(int j=mincornerstarty;j<=maxcornerendy;j++) 183 | for(int k=mincornerstartx;k<=maxcornerendx;k++) 184 | { 185 | 186 | if(ii<0 || ii>=4 || j<0 || j>=4 || k<0 || k>=4) 187 | continue; 188 | auto & acc = accumulator[k+j*4+ii*16]; 189 | acc.set(acc.allocate(1), parOrdI); 190 | } 191 | } 192 | 193 | { 194 | 195 | for(int i=0;ichildNodes[i]->parOrder.push_back(accumulator[i].get(j)); 201 | } 202 | } 203 | } 204 | 205 | for(int i=0;ichildNodes[i]->parOrder.size()>1) 208 | { 209 | computeAllPairs(parent->childNodes[i].get(), 210 | minx+(i&3)/stepx, miny+((i/4)&3)/stepy, minz+(i/16)/stepz, 211 | minx+(i&3)/stepx + 1.0f/stepx, miny+((i/4)&3)/stepy + 1.0f/stepy, minz+(i/16)/stepz + 1.0f/stepz, result,mapping); 212 | 213 | } 214 | } 215 | } 216 | else 217 | { 218 | 219 | if(parent->parOrder.size()>1) 220 | { 221 | 222 | // offload to another thread as a sparse-linear-adaptive-grid 223 | workCtr++; 224 | 225 | taskQueue.push2([&,parent,minx,miny,minz,maxx,maxy,maxz,result](FastColDetLib::MemoryPool mem) 226 | { 227 | 228 | FastColDetLib::AdaptiveGridV2 subGrid(mem,minx,miny,minz,maxx,maxy,maxz); 229 | subGrid.clear(); 230 | { 231 | 232 | subGrid.addParticlesWithoutInterface(parent->parOrder.size(), 0, parent->parOrder, 233 | parId, 234 | parMinX, parMinY, parMinZ, 235 | parMaxX, parMaxY, parMaxZ 236 | ); 237 | 238 | subGrid.buildTree(); 239 | } 240 | 241 | 242 | 243 | const std::vector> coll = subGrid.findCollisionsAll(); 244 | 245 | 246 | if(coll.size()>0) 247 | { 248 | std::lock_guard lg(mut); 249 | result->insert(result->end(),coll.begin(),coll.end()); 250 | } 251 | 252 | }); 253 | 254 | } 255 | 256 | } 257 | 258 | 259 | if(parent == &root) 260 | { 261 | constexpr int mutN = 1024; 262 | constexpr int mutN1 = mutN-1; 263 | FastColDetLib::MutexWithoutFalseSharing mutArr[mutN]; 264 | 265 | int endQueue = 0; 266 | while(endQueue lg(mut); 275 | resultRootMem.reset(); 276 | 277 | { 278 | 279 | { 280 | 281 | 282 | const int nr = resultRoot.size(); 283 | 284 | 285 | for(int i=0;i=nr) 295 | break; 296 | 297 | const unsigned int rfirst = resultRoot[i+j].first; 298 | const int rsecond = resultRoot[i+j].second; 299 | if(lastId != rfirst) 300 | { 301 | //std::lock_guard lg(mutArr[rfirst&255].mut); 302 | mutArr[lastId&mutN1].mut.unlock(); 303 | mutArr[rfirst&mutN1].mut.lock(); 304 | mapping->getRef(rfirst).insert(rsecond); 305 | lastId = rfirst; 306 | } 307 | else 308 | { 309 | //std::lock_guard lg(mutArr[rfirst&255].mut); 310 | mapping->getRef(rfirst).insert(rsecond); 311 | } 312 | } 313 | 314 | mutArr[lastId&mutN1].mut.unlock(); 315 | 316 | }); 317 | } 318 | 319 | endQueue = 0; 320 | while(endQueuesize(); 332 | 333 | int allocSize = 0; 334 | std::vector> allocOfs; 335 | for(int i=0;i lg(mutArr[i&255].mut); 338 | const int isz = mapping->getRef(i).size(); 339 | if(isz > 0) 340 | { 341 | allocOfs.push_back(std::pair(i,allocSize)); 342 | allocSize += isz; 343 | } 344 | } 345 | 346 | resultRootMem.allocate(allocSize); 347 | 348 | const int N2 = allocOfs.size(); 349 | for(int i0=0;i0=N2) 359 | break; 360 | 361 | 362 | auto & ref = mapping->getRef(allocOfs[i].first); 363 | const int n = ref.size(); 364 | const int ofs0 = allocOfs[i].second; 365 | for(int j=0;j(allocOfs[i].first,ref.get(j))); 368 | 369 | } 370 | 371 | } 372 | 373 | }); 374 | } 375 | 376 | endQueue = 0; 377 | while(endQueue thr; 402 | FastColDetLib::SyncQueue> taskQueue; 403 | FastColDetLib::SyncQueue taskCompleteQueue; 404 | 405 | FastColDetLib::Memory> resultRootMem; 406 | FastColDetLib::Memory> mappingRoot; 407 | 408 | 409 | 410 | const float minX,minY,minZ,maxX,maxY,maxZ; 411 | int workCtr; 412 | FastColDetLib::Memory parId; 413 | 414 | FastColDetLib::Memory parMinX; 415 | FastColDetLib::Memory parMinY; 416 | FastColDetLib::Memory parMinZ; 417 | FastColDetLib::Memory parMaxX; 418 | FastColDetLib::Memory parMaxY; 419 | FastColDetLib::Memory parMaxZ; 420 | 421 | 422 | FastColDetLib::Memory accumulator[numNodesPerNode]; 423 | 424 | }; 425 | 426 | 427 | #endif /* ADAPTIVEGRIDTREE_H_ */ 428 | -------------------------------------------------------------------------------- /FastCollisionDetectionLib.h: -------------------------------------------------------------------------------- 1 | /* 2 | * FastCollisionDetectionLib.h 3 | * 4 | * Created on: Mar 11, 2022 5 | * Author: tugrul 6 | */ 7 | 8 | #ifndef FASTCOLLISIONDETECTIONLIB_H_ 9 | #define FASTCOLLISIONDETECTIONLIB_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | 30 | namespace FastColDetLib 31 | { 32 | 33 | 34 | 35 | template 36 | class SyncQueue 37 | { 38 | public: 39 | SyncQueue(){} 40 | void push(T t) 41 | { 42 | std::unique_lock lc(m); 43 | q.push(t); 44 | c.notify_one(); 45 | } 46 | 47 | void push2(T t) 48 | { 49 | std::unique_lock lc(m); 50 | q.push(t); 51 | c.notify_all(); 52 | } 53 | 54 | T pop() 55 | { 56 | std::unique_lock lc(m); 57 | while(q.empty()) 58 | { 59 | c.wait(lc); 60 | } 61 | T result = q.front(); 62 | q.pop(); 63 | return result; 64 | } 65 | 66 | int size() 67 | { 68 | std::unique_lock lc(m); 69 | return q.size(); 70 | } 71 | private: 72 | std::queue q; 73 | std::mutex m; 74 | std::condition_variable c; 75 | }; 76 | 77 | struct MutexWithoutFalseSharing 78 | { 79 | std::mutex mut; 80 | char padding[(64-sizeof(std::mutex))>0?(64-sizeof(std::mutex)):64]; 81 | }; 82 | 83 | inline 84 | const int intersectDim(const float minx, const float maxx, const float minx2, const float maxx2) noexcept 85 | { 86 | return !((maxx < minx2) || (maxx2 < minx)); 87 | } 88 | 89 | 90 | 91 | 92 | inline 93 | void comp4vs4( const int * const __restrict__ partId1, const int * const __restrict__ partId2, 94 | const float * const __restrict__ minx1, const float * const __restrict__ minx2, 95 | const float * const __restrict__ miny1, const float * const __restrict__ miny2, 96 | const float * const __restrict__ minz1, const float * const __restrict__ minz2, 97 | const float * const __restrict__ maxx1, const float * const __restrict__ maxx2, 98 | const float * const __restrict__ maxy1, const float * const __restrict__ maxy2, 99 | const float * const __restrict__ maxz1, const float * const __restrict__ maxz2, 100 | int * const __restrict__ out 101 | ) noexcept 102 | { 103 | 104 | alignas(32) 105 | const int tileId2[16]={ 106 | // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 107 | partId2[0],partId2[0],partId2[0],partId2[0], 108 | partId2[1],partId2[1],partId2[1],partId2[1], 109 | partId2[2],partId2[2],partId2[2],partId2[2], 110 | partId2[3],partId2[3],partId2[3],partId2[3] 111 | }; 112 | 113 | 114 | 115 | alignas(32) 116 | const float tileMinX2[16]={ 117 | // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 118 | minx2[0],minx2[0],minx2[0],minx2[0], 119 | minx2[1],minx2[1],minx2[1],minx2[1], 120 | minx2[2],minx2[2],minx2[2],minx2[2], 121 | minx2[3],minx2[3],minx2[3],minx2[3] 122 | }; 123 | 124 | 125 | alignas(32) 126 | const float tileMinY2[16]={ 127 | // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 128 | miny2[0],miny2[0],miny2[0],miny2[0], 129 | miny2[1],miny2[1],miny2[1],miny2[1], 130 | miny2[2],miny2[2],miny2[2],miny2[2], 131 | miny2[3],miny2[3],miny2[3],miny2[3] 132 | }; 133 | 134 | 135 | 136 | alignas(32) 137 | const float tileMinZ2[16]={ 138 | // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 139 | minz2[0],minz2[0],minz2[0],minz2[0], 140 | minz2[1],minz2[1],minz2[1],minz2[1], 141 | minz2[2],minz2[2],minz2[2],minz2[2], 142 | minz2[3],minz2[3],minz2[3],minz2[3] 143 | }; 144 | 145 | 146 | 147 | 148 | 149 | 150 | alignas(32) 151 | const float tileMaxX2[16]={ 152 | // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 153 | maxx2[0],maxx2[0],maxx2[0],maxx2[0], 154 | maxx2[1],maxx2[1],maxx2[1],maxx2[1], 155 | maxx2[2],maxx2[2],maxx2[2],maxx2[2], 156 | maxx2[3],maxx2[3],maxx2[3],maxx2[3] 157 | }; 158 | 159 | 160 | 161 | alignas(32) 162 | const float tileMaxY2[16]={ 163 | // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 164 | maxy2[0],maxy2[0],maxy2[0],maxy2[0], 165 | maxy2[1],maxy2[1],maxy2[1],maxy2[1], 166 | maxy2[2],maxy2[2],maxy2[2],maxy2[2], 167 | maxy2[3],maxy2[3],maxy2[3],maxy2[3] 168 | }; 169 | 170 | 171 | 172 | alignas(32) 173 | const float tileMaxZ2[16]={ 174 | // 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3 175 | maxz2[0],maxz2[0],maxz2[0],maxz2[0], 176 | maxz2[1],maxz2[1],maxz2[1],maxz2[1], 177 | maxz2[2],maxz2[2],maxz2[2],maxz2[2], 178 | maxz2[3],maxz2[3],maxz2[3],maxz2[3] 179 | }; 180 | 181 | 182 | 183 | for(int i=0;i<16;i++) 184 | { 185 | const int o1 = (partId1[i] < tileId2[i]); 186 | const int o2 = intersectDim(minx1[i], maxx1[i], tileMinX2[i], tileMaxX2[i]); 187 | const int o3 = intersectDim(miny1[i], maxy1[i], tileMinY2[i], tileMaxY2[i]); 188 | const int o4 = intersectDim(minz1[i], maxz1[i], tileMinZ2[i], tileMaxZ2[i]); 189 | out[i] = o1 && o2 && o3 && o4; 190 | 191 | } 192 | 193 | }; 194 | 195 | 196 | 197 | 198 | /* 199 | * interface to build various objects that can collide each other 200 | * 201 | */ 202 | template 203 | class IParticle 204 | { 205 | public: 206 | virtual const CoordType getMaxX()const =0; 207 | virtual const CoordType getMaxY()const =0; 208 | virtual const CoordType getMaxZ()const =0; 209 | 210 | virtual const CoordType getMinX()const =0; 211 | virtual const CoordType getMinY()const =0; 212 | virtual const CoordType getMinZ()const =0; 213 | virtual const int getId()const =0; 214 | 215 | const bool intersectX(IParticle* p) 216 | { 217 | return !((getMaxX() < p->getMinX()) || (p->getMaxX() < getMinX())); 218 | } 219 | 220 | const bool intersectY(IParticle* p) 221 | { 222 | return !((getMaxY() < p->getMinY()) || (p->getMaxY() < getMinY())); 223 | } 224 | 225 | const bool intersectZ(IParticle* p) 226 | { 227 | return !((getMaxZ() < p->getMinZ()) || (p->getMaxZ() < getMinZ())); 228 | } 229 | 230 | virtual ~IParticle(){}; 231 | }; 232 | 233 | 234 | class Bench 235 | { 236 | public: 237 | Bench(size_t * targetPtr) 238 | { 239 | target=targetPtr; 240 | t1 = std::chrono::duration_cast< std::chrono::nanoseconds >(std::chrono::high_resolution_clock::now().time_since_epoch()); 241 | } 242 | 243 | ~Bench() 244 | { 245 | t2 = std::chrono::duration_cast< std::chrono::nanoseconds >(std::chrono::high_resolution_clock::now().time_since_epoch()); 246 | *target= t2.count() - t1.count(); 247 | } 248 | private: 249 | size_t * target; 250 | std::chrono::nanoseconds t1,t2; 251 | }; 252 | 253 | // keeps record of unique values inserted 254 | // works for positive integers (-1 reserved for first comparisons) 255 | template 256 | struct FastUnique 257 | { 258 | public: 259 | FastUnique() 260 | { 261 | it=0; 262 | for(int i=0;i 339 | class Memory 340 | { 341 | public: 342 | Memory() 343 | { 344 | memory=std::make_shared>(); 345 | allocPtr=std::make_shared(); 346 | *allocPtr = 0; 347 | allocPtrPtr=allocPtr.get(); 348 | memory->resize(1024); 349 | ptr=memory->data(); 350 | } 351 | 352 | inline 353 | DataType * getPtr(const int index) const noexcept 354 | { 355 | 356 | return ptr+index; 357 | } 358 | 359 | inline 360 | DataType& getRef(const int index) const noexcept 361 | { 362 | 363 | return ((DataType* __restrict__ const)ptr)[index]; 364 | } 365 | 366 | inline 367 | const DataType get(const int index) const noexcept 368 | { 369 | 370 | return ((DataType* __restrict__ const)ptr)[index]; 371 | } 372 | 373 | 374 | inline 375 | DataType get(const int index) noexcept 376 | { 377 | 378 | return ((DataType* __restrict__ const)ptr)[index]; 379 | } 380 | 381 | inline 382 | void set(const int index, const DataType data) const noexcept 383 | { 384 | 385 | ((DataType* __restrict__ const)ptr)[index]=data; 386 | } 387 | 388 | inline 389 | void readFrom(Memory& mem, const int index, const int indexThis, const int n) 390 | { 391 | std::copy(mem.ptr+index,mem.ptr+index+n,ptr+indexThis); 392 | } 393 | 394 | inline 395 | void writeTo(std::vector& vec) 396 | { 397 | std::copy(ptr,ptr+*allocPtrPtr,vec.data()); 398 | } 399 | 400 | inline 401 | const int allocate(const int size) 402 | { 403 | const int result = *allocPtrPtr; 404 | 405 | while(size + *allocPtrPtr >= memory->size()) 406 | { 407 | memory->resize(memory->size()*2); 408 | } 409 | *allocPtrPtr += size; 410 | ptr=memory->data(); 411 | return result; 412 | } 413 | 414 | inline 415 | const int capacity() 416 | { 417 | return memory->size(); 418 | } 419 | 420 | inline 421 | const int size() 422 | { 423 | return *allocPtrPtr; 424 | } 425 | 426 | inline 427 | void reset() 428 | { 429 | *allocPtrPtr = 0; 430 | } 431 | 432 | private: 433 | DataType* ptr; 434 | std::shared_ptr allocPtr; 435 | int* allocPtrPtr; 436 | std::shared_ptr> memory; 437 | }; 438 | 439 | 440 | constexpr int testParticleLimit = 128; // maximum particle AABB overlapping allowed on same cell 441 | constexpr int testUniqueLimit = 32; // maximum particle AABB overlapping allowed per particle 442 | struct MemoryPool 443 | { 444 | void clear() 445 | { 446 | nodeCollisionMask.reset(); 447 | 448 | childNodeCount.reset(); 449 | index.reset(); 450 | indexParticle.reset(); 451 | orderParticle.reset(); 452 | minX.reset(); 453 | maxX.reset(); 454 | minY.reset(); 455 | maxY.reset(); 456 | minZ.reset(); 457 | maxZ.reset(); 458 | nodeMinX.reset(); 459 | nodeMinY.reset(); 460 | nodeMinZ.reset(); 461 | nodeInvWidth.reset(); 462 | nodeInvHeight.reset(); 463 | nodeInvDepth.reset(); 464 | leafOffset.reset(); 465 | allPairsColl.reset(); 466 | allPairsCollmapping.reset(); 467 | for(int i=0;i<64;i++) 468 | { 469 | idTmp[i].reset(); 470 | orderTmp[i].reset(); 471 | } 472 | } 473 | 474 | 475 | 476 | // node-particle collision 477 | Memory nodeCollisionMask; 478 | 479 | Memory childNodeCount; 480 | Memory index; 481 | Memory indexParticle; 482 | Memory orderParticle; 483 | Memory nodeMinX; 484 | Memory nodeMinY; 485 | Memory nodeMinZ; 486 | Memory nodeInvWidth; 487 | Memory nodeInvHeight; 488 | Memory nodeInvDepth; 489 | Memory minX; 490 | Memory maxX; 491 | Memory minY; 492 | Memory maxY; 493 | Memory minZ; 494 | Memory maxZ; 495 | 496 | Memory idTmp[64]; 497 | Memory orderTmp[64]; 498 | 499 | Memory> allPairsColl; 500 | 501 | Memory> allPairsCollmapping; 502 | Memory leafOffset; 503 | 504 | }; 505 | 506 | struct AdaptiveGridV2Fields 507 | { 508 | AdaptiveGridV2Fields(MemoryPool mPool, const float minx, const float miny, const float minz, 509 | const float maxx, const float maxy, const float maxz):mem(mPool), 510 | minCornerX(minx),minCornerY(miny),minCornerZ(minz),maxCornerX(maxx),maxCornerY(maxy),maxCornerZ(maxz), 511 | cellWidth ((maxx-minx)*0.25f), 512 | cellHeight ((maxy-miny)*0.25f), 513 | cellDepth ((maxz-minz)*0.25f), 514 | cellWidthInv (1.0f/((maxx-minx)*0.25f)), 515 | cellHeightInv(1.0f/((maxy-miny)*0.25f)), 516 | cellDepthInv (1.0f/((maxz-minz)*0.25f)) 517 | 518 | { 519 | 520 | } 521 | 522 | 523 | 524 | MemoryPool mem; 525 | const float minCornerX; 526 | const float minCornerY; 527 | const float minCornerZ; 528 | 529 | const float maxCornerX; 530 | const float maxCornerY; 531 | const float maxCornerZ; 532 | 533 | const float cellWidth; 534 | const float cellHeight; 535 | const float cellDepth; 536 | 537 | const float cellWidthInv; 538 | const float cellHeightInv; 539 | const float cellDepthInv; 540 | 541 | 542 | }; 543 | 544 | 545 | class AdaptiveGridV2 546 | { 547 | private: 548 | 549 | 550 | // stores a bit in a byte at a position 551 | inline void storeBit(uint64_t & data, const uint64_t value, const int pos) noexcept 552 | { 553 | data = (value << pos) | (data & ~(((uint64_t)1) << pos)); 554 | } 555 | 556 | 557 | 558 | 559 | public: 560 | AdaptiveGridV2(MemoryPool mem, const float minx, const float miny, const float minz, 561 | const float maxx, const float maxy, const float maxz) 562 | { 563 | fields = std::make_shared(mem,minx,miny,minz,maxx,maxy,maxz); 564 | 565 | } 566 | 567 | void clear() 568 | { 569 | 570 | 571 | fields->mem.clear(); 572 | 573 | // set current (root) node's particle start index to 0 574 | const int indexParticleStart = fields->mem.index.allocate(1); 575 | fields->mem.index.set(indexParticleStart,0); 576 | 577 | // set current (root) node's number of particles to 0 578 | const int indexNumParticles = fields->mem.index.allocate(1); 579 | fields->mem.index.set(indexNumParticles,0); 580 | 581 | // set current (root) node's child node start 582 | const int indexChildNodeStart = fields->mem.index.allocate(1); 583 | fields->mem.index.set(indexChildNodeStart,3); 584 | 585 | 586 | // set AABB of current (root) node 587 | // X 588 | 589 | const int indexBoundMinXFloat = fields->mem.nodeMinX.allocate(1); 590 | fields->mem.nodeMinX.set(indexBoundMinXFloat,fields->minCornerX); 591 | 592 | 593 | // Y 594 | 595 | const int indexBoundMinYFloat = fields->mem.nodeMinY.allocate(1); 596 | fields->mem.nodeMinY.set(indexBoundMinYFloat,fields->minCornerY); 597 | 598 | 599 | // Z 600 | 601 | const int indexBoundMinZFloat = fields->mem.nodeMinZ.allocate(1); 602 | fields->mem.nodeMinZ.set(indexBoundMinZFloat,fields->minCornerZ); 603 | 604 | 605 | // cell inverse width 606 | 607 | const int indexWidthFloat = fields->mem.nodeInvWidth.allocate(1); 608 | fields->mem.nodeInvWidth.set(indexWidthFloat,fields->cellWidthInv); 609 | 610 | 611 | // cell inverse height 612 | 613 | const int indexHeightFloat = fields->mem.nodeInvHeight.allocate(1); 614 | fields->mem.nodeInvHeight.set(indexHeightFloat,fields->cellHeightInv); 615 | 616 | 617 | // cell inverse depth 618 | 619 | const int indexDepthFloat = fields->mem.nodeInvDepth.allocate(1); 620 | fields->mem.nodeInvDepth.set(indexDepthFloat,fields->cellDepthInv); 621 | 622 | 623 | fields->mem.childNodeCount.set(fields->mem.childNodeCount.allocate(1),0); 624 | fields->mem.nodeCollisionMask.set(fields->mem.nodeCollisionMask.allocate(1),0); 625 | 626 | 627 | } 628 | 629 | 630 | template 631 | inline void addParticles(const int numParticlesToAdd, Derived * const __restrict__ particles) 632 | { 633 | const int pId = fields->mem.indexParticle.allocate(numParticlesToAdd); 634 | const int oId = fields->mem.orderParticle.allocate(numParticlesToAdd); 635 | 636 | const int maxXId = fields->mem.maxX.allocate(numParticlesToAdd); 637 | const int maxYId = fields->mem.maxY.allocate(numParticlesToAdd); 638 | const int maxZId = fields->mem.maxZ.allocate(numParticlesToAdd); 639 | const int minXId = fields->mem.minX.allocate(numParticlesToAdd); 640 | const int minYId = fields->mem.minY.allocate(numParticlesToAdd); 641 | const int minZId = fields->mem.minZ.allocate(numParticlesToAdd); 642 | fields->mem.index.set(1,fields->mem.index.get(1)+numParticlesToAdd); 643 | 644 | for(int i=0;i * const curPtr = static_cast* const>(particles+i); 648 | 649 | 650 | fields->mem.indexParticle.set(pId+i,curPtr->getId()); 651 | fields->mem.orderParticle.set(oId+i,oId+i); 652 | 653 | fields->mem.maxX.set(maxXId+i,curPtr->getMaxX()); 654 | fields->mem.maxY.set(maxYId+i,curPtr->getMaxY()); 655 | fields->mem.maxZ.set(maxZId+i,curPtr->getMaxZ()); 656 | fields->mem.minX.set(minXId+i,curPtr->getMinX()); 657 | fields->mem.minY.set(minYId+i,curPtr->getMinY()); 658 | fields->mem.minZ.set(minZId+i,curPtr->getMinZ()); 659 | 660 | 661 | 662 | } 663 | } 664 | 665 | 666 | 667 | inline void addParticlesWithoutInterface(const int numParticlesToAdd, const int particleOfs, std::vector orders, 668 | Memory ids, 669 | Memory minx0, Memory miny0, Memory minz0, 670 | Memory maxx0, Memory maxy0, Memory maxz0 671 | ) 672 | { 673 | const int pId = fields->mem.indexParticle.allocate(numParticlesToAdd); 674 | const int oId = fields->mem.orderParticle.allocate(numParticlesToAdd); 675 | 676 | const int maxXId = fields->mem.maxX.allocate(numParticlesToAdd); 677 | const int maxYId = fields->mem.maxY.allocate(numParticlesToAdd); 678 | const int maxZId = fields->mem.maxZ.allocate(numParticlesToAdd); 679 | const int minXId = fields->mem.minX.allocate(numParticlesToAdd); 680 | const int minYId = fields->mem.minY.allocate(numParticlesToAdd); 681 | const int minZId = fields->mem.minZ.allocate(numParticlesToAdd); 682 | fields->mem.index.set(1,fields->mem.index.get(1)+numParticlesToAdd); 683 | 684 | for(int i=0;imem.indexParticle.set(pId+i,ids.get(ord)); 688 | fields->mem.orderParticle.set(oId+i,oId+i); 689 | 690 | fields->mem.maxX.set(maxXId+i,maxx0.get(ord)); 691 | fields->mem.maxY.set(maxYId+i,maxy0.get(ord)); 692 | fields->mem.maxZ.set(maxZId+i,maxz0.get(ord)); 693 | fields->mem.minX.set(minXId+i,minx0.get(ord)); 694 | fields->mem.minY.set(minYId+i,miny0.get(ord)); 695 | fields->mem.minZ.set(minZId+i,minz0.get(ord)); 696 | 697 | } 698 | } 699 | 700 | 701 | 702 | struct NodeTask 703 | { 704 | NodeTask(const int n1=0):nodePointer(n1){ } 705 | const int nodePointer; 706 | }; 707 | 708 | struct LeafTask 709 | { 710 | LeafTask(const int n1=0):particlePointer(n1){ } 711 | int particlePointer; 712 | }; 713 | 714 | 715 | 716 | // returns id values of particles 717 | std::vector findCollisions(const float minx, const float miny, const float minz, 718 | const float maxx, const float maxy, const float maxz) 719 | { 720 | FastUnique fastSet; 721 | std::vector result; 722 | std::stack nodesToCompute; 723 | std::vector particlesToCompute; 724 | 725 | 726 | // push root node to work queue 727 | nodesToCompute.push(NodeTask(0)); 728 | 729 | 730 | 731 | // traverse all colliding sparse cells 732 | while(!nodesToCompute.empty() /* stack>=0 */) 733 | { 734 | NodeTask task = nodesToCompute.top(); 735 | nodesToCompute.pop(); 736 | 737 | 738 | 739 | const int pointer = fields->mem.index.get(task.nodePointer+2); 740 | const int npdiv3 = task.nodePointer/3; 741 | const int numChildNodes = fields->mem.childNodeCount.get(npdiv3); 742 | 743 | // if this is not a leaf node, traverse all child nodes (they are sparse, so may be less than 8(8bit mask) or 64(64 bit mask)) 744 | if(pointer<0) 745 | { 746 | // get current node's information 747 | const float minCornerX = fields->mem.nodeMinX.get(npdiv3); 748 | const float minCornerY = fields->mem.nodeMinY.get(npdiv3); 749 | const float minCornerZ = fields->mem.nodeMinZ.get(npdiv3); 750 | 751 | const float cellWidthInv = fields->mem.nodeInvWidth.get(npdiv3); 752 | const float cellHeightInv = fields->mem.nodeInvHeight.get(npdiv3); 753 | const float cellDepthInv = fields->mem.nodeInvDepth.get(npdiv3); 754 | 755 | 756 | const int indexStartX = std::floor((minx - minCornerX)*cellWidthInv); 757 | const int indexEndX = std::floor((maxx - minCornerX)*cellWidthInv); 758 | 759 | const int indexStartY = std::floor((miny - minCornerY)*cellHeightInv); 760 | const int indexEndY = std::floor((maxy - minCornerY)*cellHeightInv); 761 | 762 | const int indexStartZ = std::floor((minz - minCornerZ)*cellDepthInv); 763 | const int indexEndZ = std::floor((maxz - minCornerZ)*cellDepthInv); 764 | 765 | 766 | // prepare cell indicator mask (1 bit = has object, 0 bit = empty)) 767 | uint64_t maskCellsFilled=0; 768 | for(int k=indexStartZ; k<=indexEndZ; k++) 769 | { 770 | if(k<0 || k>=4) 771 | continue; 772 | for(int j=indexStartY; j<=indexEndY; j++) 773 | { 774 | if(j<0 || j>=4) 775 | continue; 776 | for(int i=indexStartX; i<=indexEndX; i++) 777 | { 778 | if(i<0 || i>=4) 779 | continue; 780 | 781 | storeBit(maskCellsFilled,1,i+j*4+k*16); 782 | 783 | } 784 | } 785 | } 786 | 787 | 788 | const int nodeOffset = -pointer-1; 789 | for(int i=0;imem.nodeCollisionMask.get((nodeOffset+i*3)/3); 793 | if(maskCellsFilled & cellMask) 794 | { 795 | nodesToCompute.push(NodeTask(nodeOffset+i*3)); 796 | } 797 | } 798 | } 799 | else 800 | { 801 | // this is leaf node 802 | 803 | const int ptr = fields->mem.index.get(task.nodePointer); 804 | const int n = fields->mem.index.get(task.nodePointer+1); 805 | 806 | 807 | 808 | 809 | for(int i=0;imem.orderParticle.get(index); 832 | const int partId = fields->mem.indexParticle.get(orderId); 833 | if(fastSet.test(partId)) 834 | { 835 | 836 | 837 | 838 | const float minX = fields->mem.minX.get(orderId); 839 | const float maxX = fields->mem.maxX.get(orderId); 840 | 841 | 842 | if(intersectDim(minx, maxx, minX, maxX)) 843 | { 844 | const float minY = fields->mem.minY.get(orderId); 845 | const float maxY = fields->mem.maxY.get(orderId); 846 | if(intersectDim(miny, maxy, minY, maxY)) 847 | { 848 | const float minZ = fields->mem.minZ.get(orderId); 849 | const float maxZ = fields->mem.maxZ.get(orderId); 850 | if(intersectDim(minz, maxz, minZ, maxZ)) 851 | { 852 | 853 | fastSet.iterateSet(partId); 854 | } 855 | } 856 | } 857 | 858 | } 859 | } 860 | const int fsz = fastSet.size(); 861 | for(int i=0;i> findCollisionsAll() 875 | { 876 | 877 | const int resetN = fields->mem.indexParticle.size(); 878 | 879 | fields->mem.allPairsCollmapping.reset(); 880 | fields->mem.allPairsCollmapping.allocate(resetN); 881 | for(int i=0;imem.allPairsCollmapping.getRef(i).reset(); 884 | } 885 | 886 | 887 | 888 | 889 | fields->mem.allPairsColl.reset(); 890 | std::vector> result; 891 | 892 | 893 | const int numLeaf = fields->mem.leafOffset.size(); 894 | 895 | 896 | 897 | for(int leaf=0;leafmem.leafOffset.get(leaf); 905 | const int ptr = fields->mem.index.get(leafOfs); 906 | const int n = fields->mem.index.get(leafOfs+1); 907 | if(n<2) 908 | continue; 909 | //continue; 910 | 911 | alignas(32) 912 | int index[testParticleLimit]; 913 | 914 | alignas(32) 915 | int orderId[testParticleLimit]; 916 | 917 | alignas(32) 918 | int partId[testParticleLimit]; 919 | 920 | alignas(32) 921 | float minx[testParticleLimit]; 922 | 923 | alignas(32) 924 | float miny[testParticleLimit]; 925 | 926 | alignas(32) 927 | float minz[testParticleLimit]; 928 | 929 | alignas(32) 930 | float maxx[testParticleLimit]; 931 | 932 | alignas(32) 933 | float maxy[testParticleLimit]; 934 | 935 | alignas(32) 936 | float maxz[testParticleLimit]; 937 | constexpr int simd = 4; 938 | constexpr int simd1 = simd-1; 939 | const int n8 = n-(n&simd1); 940 | for(int i=0;imem.orderParticle.get(index[i+j]); 946 | for(int j=0;jmem.indexParticle.get(orderId[i+j]); 948 | for(int j=0;jmem.minX.get(orderId[i+j]); 950 | for(int j=0;jmem.minY.get(orderId[i+j]); 952 | for(int j=0;jmem.minZ.get(orderId[i+j]); 954 | for(int j=0;jmem.maxX.get(orderId[i+j]); 956 | for(int j=0;jmem.maxY.get(orderId[i+j]); 958 | for(int j=0;jmem.maxZ.get(orderId[i+j]); 960 | 961 | } 962 | 963 | for(int i=n8;imem.orderParticle.get(index[i]); 967 | partId[i] = fields->mem.indexParticle.get(orderId[i]); 968 | minx[i] = fields->mem.minX.get(orderId[i]); 969 | miny[i] = fields->mem.minY.get(orderId[i]); 970 | minz[i] = fields->mem.minZ.get(orderId[i]); 971 | maxx[i] = fields->mem.maxX.get(orderId[i]); 972 | maxy[i] = fields->mem.maxY.get(orderId[i]); 973 | maxz[i] = fields->mem.maxZ.get(orderId[i]); 974 | } 975 | 976 | for(int i=n;i=n) 998 | break; 999 | 1000 | alignas(32) 1001 | int out[16]; 1002 | 1003 | const bool test[simd]={ orderId[i]>=0, orderId[i+1]>=0, orderId[i+2]>=0, orderId[i+3]>=0 }; 1004 | 1005 | 1006 | FastUnique * map[simd] = { 1007 | test[0]?fields->mem.allPairsCollmapping.getPtr(orderId[i]):nullptr, 1008 | test[1]?fields->mem.allPairsCollmapping.getPtr(orderId[i+1]):nullptr, 1009 | test[2]?fields->mem.allPairsCollmapping.getPtr(orderId[i+2]):nullptr, 1010 | test[3]?fields->mem.allPairsCollmapping.getPtr(orderId[i+3]):nullptr 1011 | }; 1012 | 1013 | 1014 | 1015 | 1016 | alignas(32) 1017 | int tileId1[16]={ 1018 | // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 1019 | partId[i+0],partId[i+1],partId[i+2],partId[i+3] 1020 | }; 1021 | 1022 | for(int k=0;k<4;k++) 1023 | { 1024 | tileId1[k+4]=tileId1[k]; 1025 | tileId1[k+8]=tileId1[k]; 1026 | tileId1[k+12]=tileId1[k]; 1027 | } 1028 | 1029 | 1030 | 1031 | alignas(32) 1032 | float tileMinX1[16]={ 1033 | // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 1034 | minx[i+0],minx[i+1],minx[i+2],minx[i+3] 1035 | }; 1036 | for(int k=0;k<4;k++) 1037 | { 1038 | tileMinX1[k+4]=tileMinX1[k]; 1039 | tileMinX1[k+8]=tileMinX1[k]; 1040 | tileMinX1[k+12]=tileMinX1[k]; 1041 | } 1042 | 1043 | 1044 | alignas(32) 1045 | float tileMinY1[16]={ 1046 | // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 1047 | miny[i+0],miny[i+1],miny[i+2],miny[i+3] 1048 | }; 1049 | 1050 | for(int k=0;k<4;k++) 1051 | { 1052 | tileMinY1[k+4]=tileMinY1[k]; 1053 | tileMinY1[k+8]=tileMinY1[k]; 1054 | tileMinY1[k+12]=tileMinY1[k]; 1055 | } 1056 | 1057 | 1058 | 1059 | alignas(32) 1060 | float tileMinZ1[16]={ 1061 | // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 1062 | minz[i+0],minz[i+1],minz[i+2],minz[i+3] 1063 | 1064 | }; 1065 | 1066 | for(int k=0;k<4;k++) 1067 | { 1068 | tileMinZ1[k+4]=tileMinZ1[k]; 1069 | tileMinZ1[k+8]=tileMinZ1[k]; 1070 | tileMinZ1[k+12]=tileMinZ1[k]; 1071 | } 1072 | 1073 | 1074 | 1075 | 1076 | 1077 | alignas(32) 1078 | float tileMaxX1[16]={ 1079 | // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 1080 | maxx[i+0],maxx[i+1],maxx[i+2],maxx[i+3] 1081 | }; 1082 | 1083 | for(int k=0;k<4;k++) 1084 | { 1085 | tileMaxX1[k+4]=tileMaxX1[k]; 1086 | tileMaxX1[k+8]=tileMaxX1[k]; 1087 | tileMaxX1[k+12]=tileMaxX1[k]; 1088 | } 1089 | 1090 | alignas(32) 1091 | float tileMaxY1[16]={ 1092 | // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 1093 | maxy[i+0],maxy[i+1],maxy[i+2],maxy[i+3] 1094 | }; 1095 | 1096 | for(int k=0;k<4;k++) 1097 | { 1098 | tileMaxY1[k+4]=tileMaxY1[k]; 1099 | tileMaxY1[k+8]=tileMaxY1[k]; 1100 | tileMaxY1[k+12]=tileMaxY1[k]; 1101 | } 1102 | 1103 | alignas(32) 1104 | float tileMaxZ1[16]={ 1105 | // 0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3 1106 | maxz[i+0],maxz[i+1],maxz[i+2],maxz[i+3] 1107 | }; 1108 | 1109 | for(int k=0;k<4;k++) 1110 | { 1111 | tileMaxZ1[k+4]=tileMaxZ1[k]; 1112 | tileMaxZ1[k+8]=tileMaxZ1[k]; 1113 | tileMaxZ1[k+12]=tileMaxZ1[k]; 1114 | } 1115 | 1116 | 1117 | 1118 | 1119 | for(int j=i;j=n) 1122 | break; 1123 | // 0v0, 0v1, 0v2, 0v3, 1124 | // 1v0, 1v1, 1v2, 1v3, 1125 | // 2v0, 2v1, 2v2, 2v3, 1126 | // 3v0, 3v1, 3v2, 3v3, 1127 | 1128 | comp4vs4( tileId1, partId+j, 1129 | tileMinX1, minx+j, 1130 | tileMinY1, miny+j, 1131 | tileMinZ1, minz+j, 1132 | tileMaxX1, maxx+j, 1133 | tileMaxY1, maxy+j, 1134 | tileMaxZ1, maxz+j, 1135 | out 1136 | ); 1137 | 1138 | 1139 | 1140 | 1141 | for(int k=0;k<16;k++) 1142 | { 1143 | const int k3 = k&3; 1144 | const int id2 = j+(k/4); 1145 | if(out[k]) 1146 | { 1147 | if(test[k3]) 1148 | { 1149 | 1150 | map[k3]->insert(orderId[id2]); 1151 | 1152 | } 1153 | 1154 | } 1155 | } 1156 | } 1157 | 1158 | 1159 | } 1160 | } 1161 | 1162 | } 1163 | } 1164 | 1165 | } 1166 | 1167 | 1168 | 1169 | 1170 | { 1171 | 1172 | for(int i=0;i& map = fields->mem.allPairsCollmapping.getRef(i); 1175 | const int ms = map.size(); 1176 | const int allocIdx = fields->mem.allPairsColl.allocate(ms); 1177 | 1178 | for(int j=0;jmem.allPairsColl.set(allocIdx+j,std::pair(fields->mem.indexParticle.get(i),fields->mem.indexParticle.get(map.get(j)))); 1181 | 1182 | } 1183 | } 1184 | 1185 | 1186 | result.resize(fields->mem.allPairsColl.size()); 1187 | fields->mem.allPairsColl.writeTo(result); 1188 | } 1189 | 1190 | return result; 1191 | } 1192 | 1193 | 1194 | 1195 | void buildTree() 1196 | { 1197 | 1198 | 1199 | 1200 | int particleStart = fields->mem.index.get(0); 1201 | int numParticle = fields->mem.index.get(1); 1202 | 1203 | int nodeOffset = 0; 1204 | 1205 | 1206 | 1207 | float minCornerX = fields->mem.nodeMinX.get(0); 1208 | float minCornerY = fields->mem.nodeMinY.get(0); 1209 | float minCornerZ = fields->mem.nodeMinZ.get(0); 1210 | float cellWidthInv = fields->mem.nodeInvWidth.get(0); 1211 | float cellHeightInv = fields->mem.nodeInvHeight.get(0); 1212 | float cellDepthInv = fields->mem.nodeInvDepth.get(0); 1213 | float cellWidth = 1.0f/cellWidthInv; 1214 | float cellHeight = 1.0f/cellHeightInv; 1215 | float cellDepth = 1.0f/cellDepthInv; 1216 | int ctr=0; 1217 | 1218 | int maxNodeOffset = 3; 1219 | 1220 | while(nodeOffset <= maxNodeOffset) 1221 | { 1222 | ctr++; 1223 | 1224 | int ctrTmp[64]={0,0,0,0,0,0,0,0, 1225 | 0,0,0,0,0,0,0,0, 1226 | 0,0,0,0,0,0,0,0, 1227 | 0,0,0,0,0,0,0,0, 1228 | 0,0,0,0,0,0,0,0, 1229 | 0,0,0,0,0,0,0,0, 1230 | 0,0,0,0,0,0,0,0, 1231 | 0,0,0,0,0,0,0,0}; 1232 | 1233 | 1234 | // if child node pointer not set up 1235 | 1236 | //std::cout<<"debug 1 "<mem.index.get(nodeOffset+2)<<" "<mem.index.size()<mem.index.get(nodeOffset+2)mem.index.get(nodeOffset+2)+2)>=0 && fields->mem.index.get(fields->mem.index.get(nodeOffset+2)+2)>=0) 1238 | { 1239 | fields->mem.index.set(fields->mem.index.get(nodeOffset+2)+2,-(nodeOffset+1)); 1240 | } 1241 | //std::cout<<"debug 2"< testParticleLimit) 1246 | { 1247 | 1248 | 1249 | { 1250 | 1251 | for(int zz = 0; zz<4; zz++) 1252 | for(int yy = 0; yy<4; yy++) 1253 | for(int xx = 0; xx<4; xx++) 1254 | { 1255 | // allocate node 1256 | const int index0 = xx+yy*4+zz*16; 1257 | 1258 | fields->mem.orderTmp[index0].reset(); 1259 | fields->mem.orderTmp[index0].allocate(numParticle); 1260 | } 1261 | } 1262 | 1263 | 1264 | { 1265 | 1266 | 1267 | 1268 | 1269 | for(int ii=0;iimem.orderParticle.get(particleStart+ii); 1273 | const float& minx = fields->mem.minX.getRef(orderParticle); 1274 | const float& miny = fields->mem.minY.getRef(orderParticle); 1275 | const float& minz = fields->mem.minZ.getRef(orderParticle); 1276 | const float& maxx = fields->mem.maxX.getRef(orderParticle); 1277 | const float& maxy = fields->mem.maxY.getRef(orderParticle); 1278 | const float& maxz = fields->mem.maxZ.getRef(orderParticle); 1279 | 1280 | const int indexStartX = std::floor((minx - minCornerX)*cellWidthInv); 1281 | const int indexEndX = std::floor((maxx - minCornerX)*cellWidthInv); 1282 | 1283 | const int indexStartY = std::floor((miny - minCornerY)*cellHeightInv); 1284 | const int indexEndY = std::floor((maxy - minCornerY)*cellHeightInv); 1285 | 1286 | const int indexStartZ = std::floor((minz -minCornerZ)*cellDepthInv); 1287 | const int indexEndZ = std::floor((maxz - minCornerZ)*cellDepthInv); 1288 | 1289 | // prepare cell indicator mask (1 bit = has object, 0 bit = empty)) 1290 | 1291 | for(int k=indexStartZ; k<=indexEndZ; k++) 1292 | { 1293 | if(k<0 || k>=4) 1294 | continue; 1295 | for(int j=indexStartY; j<=indexEndY; j++) 1296 | { 1297 | if(j<0 || j>=4) 1298 | continue; 1299 | for(int i=indexStartX; i<=indexEndX; i++) 1300 | { 1301 | if(i<0 || i>=4) 1302 | continue; 1303 | 1304 | 1305 | const int index0 = i+j*4+k*16; 1306 | 1307 | fields->mem.orderTmp[index0].set(ctrTmp[index0],orderParticle); 1308 | ctrTmp[index0]++; 1309 | } 1310 | } 1311 | } 1312 | } 1313 | 1314 | 1315 | } 1316 | 1317 | 1318 | // add all particles in order (from first child node to last child node) 1319 | childNodeCount=0; 1320 | for(int zz = 0; zz<4; zz++) 1321 | for(int yy = 0; yy<4; yy++) 1322 | for(int xx = 0; xx<4; xx++) 1323 | { 1324 | const int index0 = xx+yy*4+zz*16; 1325 | const int sz = ctrTmp[index0]; 1326 | 1327 | 1328 | if(sz>0) 1329 | { 1330 | childNodeCount++; 1331 | 1332 | const int nodeIndexOfs = fields->mem.index.allocate(3); 1333 | const int particleStartCur = nodeIndexOfs; 1334 | const int numParticleCur = nodeIndexOfs+1; 1335 | const int childNodeStartCur = nodeIndexOfs+2; 1336 | 1337 | const int tmpIndex = fields->mem.childNodeCount.allocate(1); 1338 | const int nodeBoundMinXFloat = fields->mem.nodeMinX.allocate(1); 1339 | const int nodeBoundMinYFloat = fields->mem.nodeMinY.allocate(1); 1340 | const int nodeBoundMinZFloat = fields->mem.nodeMinZ.allocate(1); 1341 | 1342 | const int nodeInvWidthFloat = fields->mem.nodeInvWidth.allocate(1); 1343 | const int nodeInvHeightFloat = fields->mem.nodeInvHeight.allocate(1); 1344 | const int nodeInvDepthFloat = fields->mem.nodeInvDepth.allocate(1); 1345 | 1346 | fields->mem.nodeMinX.set(nodeBoundMinXFloat,minCornerX+xx*cellWidth); 1347 | fields->mem.nodeMinY.set(nodeBoundMinYFloat,minCornerY+yy*cellHeight); 1348 | fields->mem.nodeMinZ.set(nodeBoundMinZFloat,minCornerZ+zz*cellDepth); 1349 | 1350 | fields->mem.nodeInvWidth.set(nodeInvWidthFloat,cellWidthInv*4.0f); 1351 | fields->mem.nodeInvHeight.set(nodeInvHeightFloat,cellHeightInv*4.0f); 1352 | fields->mem.nodeInvDepth.set(nodeInvDepthFloat,cellDepthInv*4.0f); 1353 | 1354 | 1355 | const int nodeMaskIndex = fields->mem.nodeCollisionMask.allocate(1); 1356 | uint64_t nodeMask = 0; 1357 | storeBit(nodeMask,1,index0); 1358 | fields->mem.nodeCollisionMask.set(nodeMaskIndex,nodeMask); 1359 | 1360 | //const int allocOffset = fields->mem.indexParticle.allocate(sz); 1361 | const int allocOffset = fields->mem.orderParticle.allocate(sz); 1362 | 1363 | 1364 | //fields->mem.indexParticle.readFrom(fields->mem.idTmp[index0],0,allocOffset,sz); 1365 | fields->mem.orderParticle.readFrom(fields->mem.orderTmp[index0],0,allocOffset,sz); 1366 | 1367 | 1368 | fields->mem.index.set(particleStartCur,allocOffset); 1369 | fields->mem.index.set(numParticleCur,sz); 1370 | fields->mem.index.set(childNodeStartCur,nodeOffset); 1371 | 1372 | 1373 | maxNodeOffset=particleStartCur; 1374 | } 1375 | 1376 | 1377 | 1378 | } 1379 | 1380 | fields->mem.childNodeCount.set(nodeOffset/3,childNodeCount); 1381 | 1382 | } 1383 | else 1384 | { 1385 | fields->mem.childNodeCount.set(nodeOffset/3,0); 1386 | const int idx = fields->mem.leafOffset.allocate(1); 1387 | fields->mem.leafOffset.set(idx,nodeOffset); 1388 | } 1389 | 1390 | nodeOffset += 3; 1391 | numParticle=0; 1392 | if(nodeOffset <= maxNodeOffset) 1393 | { 1394 | particleStart = fields->mem.index.get(nodeOffset); 1395 | numParticle = fields->mem.index.get(nodeOffset+1); 1396 | 1397 | 1398 | minCornerX = fields->mem.nodeMinX.get(nodeOffset/3); 1399 | minCornerY = fields->mem.nodeMinY.get(nodeOffset/3); 1400 | minCornerZ = fields->mem.nodeMinZ.get(nodeOffset/3); 1401 | cellWidthInv = fields->mem.nodeInvWidth.get(nodeOffset/3); 1402 | cellHeightInv = fields->mem.nodeInvHeight.get(nodeOffset/3); 1403 | cellDepthInv = fields->mem.nodeInvDepth.get(nodeOffset/3); 1404 | cellWidth = 1.0f/cellWidthInv; 1405 | cellHeight = 1.0f/cellHeightInv; 1406 | cellDepth = 1.0f/cellDepthInv; 1407 | } 1408 | } 1409 | 1410 | 1411 | 1412 | 1413 | } 1414 | 1415 | private: 1416 | std::shared_ptr fields; 1417 | }; 1418 | 1419 | 1420 | 1421 | 1422 | 1423 | 1424 | template 1425 | class CollisionPair 1426 | { 1427 | public: 1428 | CollisionPair(IParticle* p1Prm=nullptr, IParticle* p2Prm=nullptr) 1429 | { 1430 | p1=p1Prm; 1431 | p2=p2Prm; 1432 | 1433 | } 1434 | 1435 | IParticle* getParticle1() const 1436 | { 1437 | return p1; 1438 | } 1439 | 1440 | IParticle* getParticle2() const 1441 | { 1442 | return p2; 1443 | } 1444 | private: 1445 | IParticle * p1; 1446 | IParticle * p2; 1447 | }; 1448 | 1449 | 1450 | template 1451 | class AdaptiveGrid; 1452 | 1453 | 1454 | using GridDataType = char; 1455 | 1456 | 1457 | // Fixed grid of cells (adaptive if a cell overflows) 1458 | template 1459 | class FixedGridFields 1460 | { 1461 | public: 1462 | FixedGridFields(const int w, const int h, const int d, const int s, 1463 | const CoordType minXp, const CoordType minYp, const CoordType minZp, 1464 | const CoordType maxXp, const CoordType maxYp, const CoordType maxZp): 1465 | width(w),height(h),depth(d),widthDiv1(CoordType(1)/w),heightDiv1(CoordType(1)/h),depthDiv1(CoordType(1)/d),storage(s), 1466 | minX(minXp),minY(minYp),minZ(minZp),maxX(maxXp),maxY(maxYp),maxZ(maxZp) 1467 | { 1468 | 1469 | 1470 | } 1471 | 1472 | ~FixedGridFields() 1473 | { 1474 | 1475 | } 1476 | 1477 | inline 1478 | const int getWidth () const noexcept { return width;}; 1479 | 1480 | inline 1481 | const int getHeight () const noexcept { return height;}; 1482 | 1483 | inline 1484 | const int getDepth () const noexcept { return depth;}; 1485 | 1486 | inline 1487 | const CoordType getWidthDiv1 () const noexcept { return widthDiv1;}; 1488 | 1489 | inline 1490 | const CoordType getHeightDiv1 () const noexcept { return heightDiv1;}; 1491 | 1492 | inline 1493 | const CoordType getDepthDiv1 () const noexcept { return depthDiv1;}; 1494 | 1495 | inline 1496 | const int getStorage () const noexcept { return storage;}; 1497 | 1498 | 1499 | std::vector*> particles; 1500 | std::vector particlesCollisionMask; 1501 | 1502 | std::map*,std::map*,bool>> coll; 1503 | 1504 | 1505 | std::map*,std::map*,bool>> mapping; 1506 | std::vector> result; 1507 | const int width; 1508 | const int height; 1509 | const int depth; 1510 | 1511 | const CoordType widthDiv1; 1512 | const CoordType heightDiv1; 1513 | const CoordType depthDiv1; 1514 | 1515 | const int storage; 1516 | 1517 | const CoordType minX; 1518 | const CoordType minY; 1519 | const CoordType minZ; 1520 | const CoordType maxX; 1521 | const CoordType maxY; 1522 | const CoordType maxZ; 1523 | 1524 | }; 1525 | 1526 | 1527 | template 1528 | struct Cmd 1529 | { 1530 | AdaptiveGrid* grid; 1531 | std::mutex* mut; 1532 | std::map*,std::map*,bool>>* mapping; 1533 | bool* completed; 1534 | }; 1535 | 1536 | 1537 | 1538 | 1539 | template 1540 | class ThreadPoolFields 1541 | { 1542 | public: 1543 | ThreadPoolFields() {ctr=0; } 1544 | int ctr; 1545 | std::vector worker; 1546 | MutexWithoutFalseSharing mut[7]; 1547 | std::vector msg; 1548 | std::vector>>> q; 1549 | ~ThreadPoolFields() 1550 | { 1551 | for(unsigned int i=0;i lg(mut[i].mut); 1554 | msg[i]=0; 1555 | Cmd cmd; 1556 | cmd.grid=nullptr; 1557 | q[i]->push2(cmd); 1558 | } 1559 | 1560 | for(unsigned int i=0;i 1569 | class ThreadPool 1570 | { 1571 | public: 1572 | ThreadPool() 1573 | { 1574 | fields=std::make_shared>(); 1575 | for(int i=0;i<7;i++) 1576 | { 1577 | fields->q.push_back(std::make_shared>>()); 1578 | fields->msg.push_back(1); 1579 | } 1580 | auto ptr = fields.get(); 1581 | for(int i=0;i<7;i++) 1582 | { 1583 | 1584 | fields->worker.push_back(std::thread( 1585 | [&,i,ptr]() 1586 | { 1587 | auto fields = ptr; 1588 | bool work = true; 1589 | while(work) 1590 | { 1591 | { 1592 | { 1593 | std::lock_guard lg(fields->mut[i].mut); 1594 | work=(fields->msg[i]>0); 1595 | } 1596 | 1597 | Cmd cmd = fields->q[i]->pop(); 1598 | if(cmd.grid==nullptr) 1599 | break; 1600 | auto collisions = cmd.grid->getCollisions(); 1601 | 1602 | { 1603 | std::lock_guard lg(*cmd.mut); 1604 | for(auto& c:collisions) 1605 | { 1606 | (*cmd.mapping)[c.getParticle1()][c.getParticle2()]=true; 1607 | } 1608 | *cmd.completed=true; 1609 | } 1610 | 1611 | } 1612 | } 1613 | } 1614 | )); 1615 | } 1616 | } 1617 | 1618 | 1619 | void compute(Cmd cmd) 1620 | { 1621 | fields->q[fields->ctr++%7]->push(cmd); 1622 | } 1623 | 1624 | 1625 | private: 1626 | 1627 | std::shared_ptr> fields; 1628 | }; 1629 | 1630 | template 1631 | class AdaptiveGrid 1632 | { 1633 | 1634 | protected: 1635 | 1636 | AdaptiveGrid(ThreadPool thr, int depthPrm, 1637 | const CoordType minX, const CoordType minY, const CoordType minZ, 1638 | const CoordType maxX, const CoordType maxY, const CoordType maxZ):thrPool(thr) 1639 | { 1640 | 1641 | isLeaf = std::make_shared(); 1642 | *isLeaf=true; 1643 | depth = std::make_shared(); 1644 | 1645 | *depth=depthPrm; 1646 | 1647 | if(*depth<10) 1648 | fields = std::make_shared>(4,4,4,300,minX,minY,minZ,maxX,maxY,maxZ); 1649 | else 1650 | fields = std::make_shared>(4,4,4,400,minX,minY,minZ,maxX,maxY,maxZ); 1651 | 1652 | subGrid = std::make_shared>>(); 1653 | 1654 | } 1655 | 1656 | 1657 | 1658 | // loads a bit from a 8-byte integer at a position 1659 | inline uint64_t loadBitSizeT(const uint64_t & data, const int pos) noexcept 1660 | { 1661 | return (data>>pos)&1; 1662 | } 1663 | 1664 | // stores a bit in a 8-byte integer at a position 1665 | inline void storeBitSizeT(uint64_t & data, const uint64_t value, const int pos) noexcept 1666 | { 1667 | data = (value << pos) | (data & ~(((uint64_t)1) << pos)); 1668 | } 1669 | 1670 | public: 1671 | AdaptiveGrid(ThreadPool thr, 1672 | const CoordType minX, const CoordType minY, const CoordType minZ, 1673 | const CoordType maxX, const CoordType maxY, const CoordType maxZ):thrPool(thr) 1674 | { 1675 | 1676 | isLeaf=std::make_shared(); 1677 | *isLeaf = true; 1678 | depth = std::make_shared(); 1679 | 1680 | *depth=0; 1681 | 1682 | fields = std::make_shared>(4,4,4,100,minX,minY,minZ,maxX,maxY,maxZ); 1683 | subGrid = std::make_shared>>(); 1684 | 1685 | } 1686 | 1687 | 1688 | AdaptiveGrid() 1689 | { 1690 | AdaptiveGrid(ThreadPool(),0,0,0,1,1,1); 1691 | } 1692 | 1693 | 1694 | void clear() 1695 | { 1696 | *isLeaf=true; 1697 | subGrid->clear(); 1698 | fields->particles.clear(); 1699 | fields->particlesCollisionMask.clear(); 1700 | } 1701 | 1702 | 1703 | template 1704 | void add(Derived * particlesPrm, int n) 1705 | { 1706 | for(int i=0;i 1713 | void add(Derived * particlesPrm) 1714 | { 1715 | 1716 | const int w = fields->getWidth(); 1717 | const int h = fields->getHeight(); 1718 | const int d = fields->getDepth(); 1719 | // grid 1720 | const CoordType xDim = fields->maxX - fields->minX; 1721 | const CoordType yDim = fields->maxY - fields->minY; 1722 | const CoordType zDim = fields->maxZ - fields->minZ; 1723 | 1724 | // cell 1725 | const CoordType stepX = xDim/w; 1726 | const CoordType stepY = yDim/h; 1727 | const CoordType stepZ = zDim/d; 1728 | 1729 | const int sto = fields->getStorage(); 1730 | const int nPar = fields->particles.size(); 1731 | 1732 | // if current grid leaf is full, convert it to node with 4x4x4 leaves 1733 | if(*isLeaf && (nPar == sto)) 1734 | { 1735 | *isLeaf = false; 1736 | 1737 | // create leaf nodes (4x4x4=64) 1738 | subGrid->reserve(64); 1739 | for(int zz = 0; zz newGrid(thrPool,*depth+1,fields->minX+stepX*xx,fields->minY+stepY*yy,fields->minZ+stepZ*zz, 1745 | fields->minX+(stepX)*(xx+1),fields->minY+(stepY)*(yy+1),fields->minZ+(stepZ)*(zz+1)); 1746 | 1747 | 1748 | 1749 | subGrid->push_back(newGrid); 1750 | } 1751 | 1752 | 1753 | 1754 | 1755 | for(int ii=0;iiparticles[ii]->getMinX(); 1759 | const CoordType miny = fields->particles[ii]->getMinY(); 1760 | const CoordType minz = fields->particles[ii]->getMinZ(); 1761 | 1762 | const CoordType maxx = fields->particles[ii]->getMaxX(); 1763 | const CoordType maxy = fields->particles[ii]->getMaxY(); 1764 | const CoordType maxz = fields->particles[ii]->getMaxZ(); 1765 | 1766 | const int cellIndexX = std::floor((minx - fields->minX)/stepX); 1767 | const int cellIndexY = std::floor((miny - fields->minY)/stepY); 1768 | const int cellIndexZ = std::floor((minz - fields->minZ)/stepZ); 1769 | 1770 | const int cellIndexX2 = std::floor((maxx - fields->minX)/stepX); 1771 | const int cellIndexY2 = std::floor((maxy - fields->minY)/stepY); 1772 | const int cellIndexZ2 = std::floor((maxz - fields->minZ)/stepZ); 1773 | 1774 | 1775 | for(int zz = cellIndexZ; zz<=cellIndexZ2; zz++) 1776 | for(int yy = cellIndexY; yy<=cellIndexY2; yy++) 1777 | for(int xx = cellIndexX; xx<=cellIndexX2; xx++) 1778 | { 1779 | if(xx<0 || yy<0 || zz<0 || xx>=w || yy>=h || zz>=d) 1780 | continue; 1781 | 1782 | // overlaps with subgrid, add to it 1783 | (*subGrid)[xx+yy*4+zz*4*4].add(fields->particles[ii]); 1784 | } 1785 | } 1786 | 1787 | // clear unused particles 1788 | fields->particles.clear(); 1789 | fields->particlesCollisionMask.clear(); 1790 | } 1791 | 1792 | 1793 | 1794 | { 1795 | // AABB box of particle 1796 | const CoordType minx = (particlesPrm)->getMinX(); 1797 | const CoordType miny = (particlesPrm)->getMinY(); 1798 | const CoordType minz = (particlesPrm)->getMinZ(); 1799 | 1800 | const CoordType maxx = (particlesPrm)->getMaxX(); 1801 | const CoordType maxy = (particlesPrm)->getMaxY(); 1802 | const CoordType maxz = (particlesPrm)->getMaxZ(); 1803 | 1804 | const int cellIndexX = std::floor((minx - fields->minX)/stepX); 1805 | const int cellIndexY = std::floor((miny - fields->minY)/stepY); 1806 | const int cellIndexZ = std::floor((minz - fields->minZ)/stepZ); 1807 | 1808 | const int cellIndexX2 = std::floor((maxx - fields->minX)/stepX); 1809 | const int cellIndexY2 = std::floor((maxy - fields->minY)/stepY); 1810 | const int cellIndexZ2 = std::floor((maxz - fields->minZ)/stepZ); 1811 | 1812 | uint64_t maskCellsFilled; 1813 | // "gather" operations on neighbor cells should be cache-friendly 1814 | 1815 | for(int zz = cellIndexZ; zz<=cellIndexZ2; zz++) 1816 | for(int yy = cellIndexY; yy<=cellIndexY2; yy++) 1817 | for(int xx = cellIndexX; xx<=cellIndexX2; xx++) 1818 | { 1819 | if(xx<0 || yy<0 || zz<0 || xx>=w || yy>=h || zz>=d) 1820 | continue; 1821 | 1822 | storeBitSizeT(maskCellsFilled,1,xx+yy*4+zz*4*4); 1823 | 1824 | if(!*isLeaf) 1825 | { 1826 | (*subGrid)[xx+yy*4+zz*4*4].add(particlesPrm); 1827 | } 1828 | } 1829 | 1830 | if(maskCellsFilled) 1831 | { 1832 | if(*isLeaf) 1833 | { 1834 | fields->particlesCollisionMask.push_back(maskCellsFilled); 1835 | fields->particles.push_back(particlesPrm); 1836 | } 1837 | } 1838 | } 1839 | } 1840 | 1841 | 1842 | 1843 | 1844 | inline 1845 | const bool intersectDim(const CoordType minx, const CoordType maxx, const CoordType minx2, const CoordType maxx2) const noexcept 1846 | { 1847 | return !((maxx < minx2) || (maxx2 < minx)); 1848 | } 1849 | 1850 | // compute collision between given particle and the already-prepared static object grid (after add(..) and getCollisions(..)) 1851 | // also returns self-collisions if same particle was added as static particle before (by add(..)) 1852 | // thread-safe 1853 | std::vector*> getDynamicCollisionListFor(IParticle* particle) 1854 | { 1855 | std::unordered_map*,bool> result; 1856 | const int n2 = fields->particles.size(); 1857 | result.reserve(n2); 1858 | 1859 | // AABB box of particle 1860 | const CoordType minx = particle->getMinX(); 1861 | const CoordType miny = particle->getMinY(); 1862 | const CoordType minz = particle->getMinZ(); 1863 | 1864 | const CoordType maxx = particle->getMaxX(); 1865 | const CoordType maxy = particle->getMaxY(); 1866 | const CoordType maxz = particle->getMaxZ(); 1867 | 1868 | const CoordType xDim = fields->maxX - fields->minX; 1869 | const CoordType yDim = fields->maxY - fields->minY; 1870 | const CoordType zDim = fields->maxZ - fields->minZ; 1871 | 1872 | const int w = fields->getWidth(); 1873 | const int h = fields->getHeight(); 1874 | const int d = fields->getDepth(); 1875 | const CoordType stepX = xDim/w; 1876 | const CoordType stepY = yDim/h; 1877 | const CoordType stepZ = zDim/d; 1878 | 1879 | 1880 | const int cellIndexX = std::floor((minx - fields->minX)/stepX); 1881 | const int cellIndexY = std::floor((miny - fields->minY)/stepY); 1882 | const int cellIndexZ = std::floor((minz - fields->minZ)/stepZ); 1883 | 1884 | const int cellIndexX2 = std::floor((maxx - fields->minX)/stepX); 1885 | const int cellIndexY2 = std::floor((maxy - fields->minY)/stepY); 1886 | const int cellIndexZ2 = std::floor((maxz - fields->minZ)/stepZ); 1887 | uint64_t collisionMask=0; 1888 | for(int zz = cellIndexZ; zz<=cellIndexZ2; zz++) 1889 | for(int yy = cellIndexY; yy<=cellIndexY2; yy++) 1890 | for(int xx = cellIndexX; xx<=cellIndexX2; xx++) 1891 | { 1892 | if(xx<0 || yy<0 || zz<0 || xx>=w || yy>=h || zz>=d) 1893 | continue; 1894 | 1895 | // if selected cell is a cell 1896 | // (if parent is leaf, then it is a cell) 1897 | if(*isLeaf) 1898 | { 1899 | storeBitSizeT(collisionMask,1,xx+yy*4+zz*4*4); 1900 | } 1901 | else // if this is a grid 1902 | { 1903 | 1904 | 1905 | 1906 | auto subResult = (*subGrid)[xx+yy*4+zz*4*4].getDynamicCollisionListFor(particle); 1907 | 1908 | for(auto& subr:subResult) 1909 | { 1910 | result.emplace(subr,true); 1911 | } 1912 | } 1913 | } 1914 | 1915 | // if this is a leaf node 1916 | 1917 | for(int j=0;jparticles[j])==result.end()) 1920 | if(fields->particlesCollisionMask[j] & collisionMask) 1921 | { 1922 | 1923 | const CoordType minx2 = fields->particles[j]->getMinX(); 1924 | const CoordType maxx2 = fields->particles[j]->getMaxX(); 1925 | if(intersectDim(minx, maxx, minx2, maxx2)) 1926 | { 1927 | 1928 | const CoordType miny2 = fields->particles[j]->getMinY(); 1929 | const CoordType maxy2 = fields->particles[j]->getMaxY(); 1930 | if(intersectDim(miny, maxy, miny2, maxy2)) 1931 | { 1932 | 1933 | const CoordType minz2 = fields->particles[j]->getMinZ(); 1934 | const CoordType maxz2 = fields->particles[j]->getMaxZ(); 1935 | if(intersectDim(minz, maxz, minz2, maxz2)) 1936 | { 1937 | result.emplace(fields->particles[j],true); 1938 | } 1939 | 1940 | } 1941 | } 1942 | } 1943 | 1944 | } 1945 | std::vector*> resultVec; 1946 | for(auto& res:result) 1947 | resultVec.push_back(res.first); 1948 | return resultVec; 1949 | } 1950 | 1951 | // returns collision pairs between static objects (and prepares internal data for future dynamic object collision checking), ordered 1952 | std::vector> getCollisions() 1953 | { 1954 | fields->mapping.clear(); 1955 | fields->result.clear(); 1956 | 1957 | 1958 | const int w = fields->getWidth(); 1959 | const int h = fields->getHeight(); 1960 | const int d = fields->getDepth(); 1961 | 1962 | 1963 | // check neighbor cells for a collision of another AABB particle 1964 | 1965 | 1966 | std::mutex mut; 1967 | bool completed[64]; 1968 | if((!*isLeaf) && *depth>0) 1969 | { 1970 | for(int i=0;i<64;i++) 1971 | completed[i]=false; 1972 | } 1973 | int completedCtr = 0; 1974 | // "gather" operations on neighbor cells should be cache-friendly 1975 | 1976 | 1977 | if(!*isLeaf) 1978 | { 1979 | for(int zz = 0; zzdata()[xx+yy*4+zz*4*4].isLeaf) ) && *depth>0) 1987 | { 1988 | Cmd cmd; 1989 | cmd.completed=&completed[completedCtr++]; 1990 | cmd.mapping=&fields->mapping; 1991 | cmd.mut=&mut; 1992 | cmd.grid=subGrid->data()+(xx+yy*4+zz*4*4); 1993 | thrPool.compute(cmd); 1994 | } 1995 | else 1996 | { 1997 | auto collisions = (subGrid->data()+(xx+yy*4+zz*4*4))->getCollisions(); 1998 | 1999 | { 2000 | 2001 | for(auto& c:collisions) 2002 | { 2003 | fields->mapping[c.getParticle1()][c.getParticle2()]=true; 2004 | } 2005 | 2006 | } 2007 | } 2008 | 2009 | } 2010 | } 2011 | 2012 | 2013 | if(*isLeaf) 2014 | { 2015 | std::map*,std::map*,bool>> localMap; 2016 | 2017 | const int nMask = fields->particles.size(); 2018 | std::vector fastTest; 2019 | 2020 | for(int j=0;jparticles[j]->getMinX(); 2023 | const CoordType maxx = fields->particles[j]->getMaxX(); 2024 | const CoordType miny = fields->particles[j]->getMinY(); 2025 | const CoordType maxy = fields->particles[j]->getMaxY(); 2026 | const CoordType minz = fields->particles[j]->getMinZ(); 2027 | const CoordType maxz = fields->particles[j]->getMaxZ(); 2028 | for(int i=j+1;iparticlesCollisionMask[j] & fields->particlesCollisionMask[i]) 2032 | { 2033 | if(fields->particles[j]->getId()particles[i]->getId()) 2034 | { 2035 | 2036 | const CoordType minx2 = fields->particles[i]->getMinX(); 2037 | const CoordType maxx2 = fields->particles[i]->getMaxX(); 2038 | if(intersectDim(minx, maxx, minx2, maxx2)) 2039 | { 2040 | 2041 | const CoordType miny2 = fields->particles[i]->getMinY(); 2042 | const CoordType maxy2 = fields->particles[i]->getMaxY(); 2043 | if(intersectDim(miny, maxy, miny2, maxy2)) 2044 | { 2045 | 2046 | const CoordType minz2 = fields->particles[i]->getMinZ(); 2047 | const CoordType maxz2 = fields->particles[i]->getMaxZ(); 2048 | if(intersectDim(minz, maxz, minz2, maxz2)) 2049 | { 2050 | localMap[fields->particles[j]][fields->particles[i]]=true; 2051 | } 2052 | } 2053 | } 2054 | } 2055 | } 2056 | } 2057 | } 2058 | 2059 | 2060 | { 2061 | std::lock_guard lg(mut); 2062 | for(auto& lm:localMap) 2063 | { 2064 | for(auto& lm2:lm.second) 2065 | fields->mapping[lm.first][lm2.first]=true; 2066 | } 2067 | } 2068 | } 2069 | 2070 | // if at specific layer, wait for threads 2071 | if((!*isLeaf) && *depth>0) 2072 | { 2073 | bool comp = false; 2074 | while(!comp) 2075 | { 2076 | comp=true; 2077 | { 2078 | std::lock_guard lg(mut); 2079 | for(int cmd=0;cmdmapping) 2089 | { 2090 | for(auto& m2:m.second) 2091 | { 2092 | 2093 | fields->result.push_back(CollisionPair(m.first,m2.first)); 2094 | } 2095 | } 2096 | 2097 | 2098 | 2099 | 2100 | return fields->result; 2101 | } 2102 | private: 2103 | std::shared_ptr> fields; 2104 | std::shared_ptr>> subGrid; 2105 | ThreadPool thrPool; 2106 | std::shared_ptr depth; 2107 | std::shared_ptr isLeaf; 2108 | 2109 | }; 2110 | 2111 | 2112 | // axis-aligned bounding-box collision detection 2113 | template 2114 | class BruteForce 2115 | { 2116 | public: 2117 | BruteForce() 2118 | { 2119 | 2120 | } 2121 | 2122 | template 2123 | void add(Derived * particlesPrm, int numParticlesToAdd) 2124 | { 2125 | for(int i=0;i*>(particlesPrm+i)); 2127 | } 2128 | 2129 | std::vector> computeCollisionsSIMD(const int numParticlesToAdd, std::vector orders, 2130 | std::vector ids, 2131 | std::vector minx0, std::vector miny0, std::vector minz0, 2132 | std::vector maxx0, std::vector maxy0, std::vector maxz0 2133 | ) 2134 | { 2135 | std::vector> result; 2136 | std::vector minx,miny,minz,maxx,maxy,maxz; 2137 | std::vector id; 2138 | const int sz = numParticlesToAdd; 2139 | for(int i=0;i(id[i+(k&3)],id[j+k/4])); 2250 | } 2251 | } 2252 | } 2253 | } 2254 | 2255 | for(int i=0;i(id[i],id[j])); 2266 | } 2267 | 2268 | } 2269 | } 2270 | } 2271 | 2272 | return result; 2273 | } 2274 | 2275 | 2276 | 2277 | std::vector> getCollisionsSIMD() 2278 | { 2279 | std::vector> result; 2280 | std::vector minx,miny,minz,maxx,maxy,maxz; 2281 | std::vector id; 2282 | const int sz = particles.size(); 2283 | for(int i=0;i * ptr = particles[i]; 2286 | minx.push_back(ptr->getMinX()); 2287 | miny.push_back(ptr->getMinY()); 2288 | minz.push_back(ptr->getMinZ()); 2289 | maxx.push_back(ptr->getMaxX()); 2290 | maxy.push_back(ptr->getMaxY()); 2291 | maxz.push_back(ptr->getMaxZ()); 2292 | id.push_back(ptr->getId()); 2293 | } 2294 | 2295 | const int sz4 = sz - (sz&3); 2296 | for(int i=0;i(i+(k&3),j+k/4)); 2394 | } 2395 | } 2396 | } 2397 | } 2398 | 2399 | for(int i=0;igetId() < particles[j]->getId()) 2404 | { 2405 | if(intersectDim(particles[i]->getMinX(),particles[i]->getMaxX(),particles[j]->getMinX(),particles[j]->getMaxX())) 2406 | if(intersectDim(particles[i]->getMinY(),particles[i]->getMaxY(),particles[j]->getMinY(),particles[j]->getMaxY())) 2407 | if(intersectDim(particles[i]->getMinZ(),particles[i]->getMaxZ(),particles[j]->getMinZ(),particles[j]->getMaxZ())) 2408 | { 2409 | result.push_back(std::pair(i,j)); 2410 | } 2411 | 2412 | } 2413 | } 2414 | } 2415 | 2416 | return result; 2417 | } 2418 | 2419 | std::vector> getCollisions() 2420 | { 2421 | std::vector> result; 2422 | idMap.clear(); 2423 | collisionPairs.clear(); 2424 | const int sz = particles.size(); 2425 | for(int i=0;igetId()]=particles[i]; 2428 | 2429 | for(int j=i+1;jintersectX(particles[j]) && particles[i]->intersectY(particles[j]) && particles[i]->intersectZ(particles[j])) 2433 | { 2434 | 2435 | collisionPairs.push_back(CollisionPair(particles[i],particles[j])); 2436 | } 2437 | } 2438 | } 2439 | std::sort(collisionPairs.begin(),collisionPairs.end(),[](CollisionPair& c1, CollisionPair& c2){ 2440 | return c1.getParticle1()->getId()getId(); 2441 | }); 2442 | result=collisionPairs; 2443 | return result; 2444 | } 2445 | private: 2446 | 2447 | std::vector*> particles; 2448 | std::vector> collisionPairs; 2449 | std::map*> idMap; 2450 | }; 2451 | } 2452 | 2453 | 2454 | 2455 | #endif /* FASTCOLLISIONDETECTIONLIB_H_ */ 2456 | -------------------------------------------------------------------------------- /Generator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Generator.h 3 | * 4 | * Created on: Mar 8, 2022 5 | * Author: tugrul 6 | */ 7 | 8 | #ifndef GENERATOR_H_ 9 | #define GENERATOR_H_ 10 | 11 | #include 12 | 13 | namespace oofrng 14 | { 15 | 16 | 17 | 18 | template 19 | class Generator 20 | { 21 | public: 22 | Generator() 23 | { 24 | alignedSeedBuf = std::make_shared(); 25 | } 26 | 27 | 28 | const uint32_t generate1() 29 | { 30 | return rnd(alignedSeedBuf->seed); 31 | } 32 | 33 | const uint32_t generate1(const uint32_t limit) 34 | { 35 | return rnd(alignedSeedBuf->seed,limit); 36 | } 37 | 38 | const float generate1Float() 39 | { 40 | return ((float)rnd(alignedSeedBuf->seed))*alignedSeedBuf->multiplier; 41 | } 42 | 43 | const float generate1Float(const float limit) 44 | { 45 | return ((float)rnd(alignedSeedBuf->seed))*alignedSeedBuf->multiplier*limit; 46 | } 47 | 48 | // fills array of length n with values between 0 and max(2^32-1) 49 | void generate(uint32_t * const __restrict__ out, const size_t n) 50 | { 51 | 52 | const size_t nL = n-n%LANES; 53 | for(size_t i=0;iptrL,out+i); 56 | } 57 | 58 | for(size_t i=nL;iseed); 61 | } 62 | } 63 | 64 | // fills array of length n with values in range [0,limit) 65 | void generate(uint32_t * const __restrict__ out, const size_t n, const uint32_t limit) 66 | { 67 | 68 | const size_t nL = n-n%LANES; 69 | for(size_t i=0;iptrL,out+i,limit); 72 | } 73 | 74 | for(size_t i=nL;iseed,limit); 77 | } 78 | } 79 | 80 | // generate [0,1) 81 | void generate(float * const __restrict__ out, const size_t n) 82 | { 83 | 84 | const size_t nL = n-n%LANES; 85 | for(size_t i=0;iptrL,out+i); 88 | } 89 | 90 | for(size_t i=nL;iseed); 93 | } 94 | } 95 | 96 | // generate [0,limit) 97 | void generate(float * const __restrict__ out, const size_t n, const float limit) 98 | { 99 | 100 | const size_t nL = n-n%LANES; 101 | for(size_t i=0;iptrL,out+i,limit); 104 | } 105 | 106 | for(size_t i=nL;iseed,limit); 109 | } 110 | } 111 | 112 | private: 113 | 114 | static uint32_t* computeAlignment(uint32_t* ptr) 115 | { 116 | uint32_t* ptrLTmp = ptr; 117 | 118 | while(((size_t)ptrLTmp)%4096 != 0) 119 | { 120 | ptrLTmp++; 121 | } 122 | return ptrLTmp; 123 | } 124 | 125 | 126 | class AlignedSeedBuffer 127 | { 128 | public: 129 | AlignedSeedBuffer():ptrL(computeAlignment(seedL)),uint32_tmax(((uint32_t)0)-1),multiplier(1.0/uint32_tmax) 130 | { 131 | for(size_t i=0;i alignedSeedBuf; 147 | 148 | 149 | 150 | // generate random number in range [0,max) 151 | const uint32_t rnd(uint32_t& seed) 152 | { 153 | // Thomas Wang's invention 154 | seed = (seed ^ 61) ^ (seed >> 16); 155 | seed *= 9; 156 | seed = seed ^ (seed >> 4); 157 | seed *= 0x27d4eb2d; 158 | seed = seed ^ (seed >> 15); 159 | return seed; 160 | } 161 | 162 | // generate random number in range [0,limit) 163 | const uint32_t rnd(uint32_t& seed, const uint32_t limit) 164 | { 165 | // Thomas Wang's invention 166 | seed = (seed ^ 61) ^ (seed >> 16); 167 | seed *= 9; 168 | seed = seed ^ (seed >> 4); 169 | seed *= 0x27d4eb2d; 170 | seed = seed ^ (seed >> 15); 171 | return seed%limit; 172 | } 173 | 174 | // generate [0,max) 175 | inline 176 | void rndL(uint32_t * const __restrict__ seed, uint32_t * const __restrict__ out) 177 | { 178 | 179 | 180 | for(int i=0;i> 16); 186 | const uint32_t sd2_ = (sd_ ^ 61) ^ (sd_ >> 16); 187 | 188 | const uint32_t sd3 = sd2*9; 189 | const uint32_t sd3_ = sd2_*9; 190 | 191 | const uint32_t sd4 = sd3 ^ (sd3 >> 4); 192 | const uint32_t sd4_ = sd3_ ^ (sd3_ >> 4); 193 | 194 | const uint32_t sd5 = sd4*0x27d4eb2d; 195 | const uint32_t sd5_ = sd4_*0x27d4eb2d; 196 | 197 | const uint32_t sd6 = sd5 ^ (sd5 >> 15); 198 | const uint32_t sd6_ = sd5_ ^ (sd5_ >> 15); 199 | 200 | 201 | out[i]=sd6; 202 | out[i+1]=sd6_; 203 | 204 | seed[i]=sd6; 205 | seed[i+1]=sd6_; 206 | 207 | } 208 | } 209 | 210 | // generate [0,limit) 211 | inline 212 | void rndL(uint32_t * const __restrict__ seed, uint32_t * const __restrict__ out, const uint32_t limit) 213 | { 214 | 215 | 216 | for(int i=0;i> 16); 222 | const uint32_t sd2_ = (sd_ ^ 61) ^ (sd_ >> 16); 223 | 224 | const uint32_t sd3 = sd2*9; 225 | const uint32_t sd3_ = sd2_*9; 226 | 227 | const uint32_t sd4 = sd3 ^ (sd3 >> 4); 228 | const uint32_t sd4_ = sd3_ ^ (sd3_ >> 4); 229 | 230 | const uint32_t sd5 = sd4*0x27d4eb2d; 231 | const uint32_t sd5_ = sd4_*0x27d4eb2d; 232 | 233 | const uint32_t sd6 = sd5 ^ (sd5 >> 15); 234 | const uint32_t sd6_ = sd5_ ^ (sd5_ >> 15); 235 | 236 | 237 | out[i]=sd6%limit; 238 | out[i+1]=sd6_%limit; 239 | 240 | seed[i]=sd6; 241 | seed[i+1]=sd6_; 242 | 243 | } 244 | } 245 | 246 | // generate [0,1) 247 | inline 248 | void rndL(uint32_t * const __restrict__ seed, float * const __restrict__ out) 249 | { 250 | 251 | const float mult = alignedSeedBuf->multiplier; 252 | for(int i=0;i> 16); 260 | const uint32_t sd2_ = (sd_ ^ 61) ^ (sd_ >> 16); 261 | 262 | const uint32_t sd3 = sd2*9; 263 | const uint32_t sd3_ = sd2_*9; 264 | 265 | const uint32_t sd4 = sd3 ^ (sd3 >> 4); 266 | const uint32_t sd4_ = sd3_ ^ (sd3_ >> 4); 267 | 268 | const uint32_t sd5 = sd4*0x27d4eb2d; 269 | const uint32_t sd5_ = sd4_*0x27d4eb2d; 270 | 271 | const uint32_t sd6 = sd5 ^ (sd5 >> 15); 272 | const uint32_t sd6_ = sd5_ ^ (sd5_ >> 15); 273 | 274 | const float sd7 = sd6*mult; 275 | const float sd7_ = sd6_*mult; 276 | 277 | out[i]=sd7; 278 | out[i+1]=sd7_; 279 | 280 | seed[i]=sd6; 281 | seed[i+1]=sd6_; 282 | 283 | } 284 | } 285 | 286 | // generate [0,limit) 287 | inline 288 | void rndL(uint32_t * const __restrict__ seed, float * const __restrict__ out, const float limit) 289 | { 290 | 291 | const float mult = alignedSeedBuf->multiplier*limit; 292 | for(int i=0;i> 16); 300 | const uint32_t sd2_ = (sd_ ^ 61) ^ (sd_ >> 16); 301 | 302 | const uint32_t sd3 = sd2*9; 303 | const uint32_t sd3_ = sd2_*9; 304 | 305 | const uint32_t sd4 = sd3 ^ (sd3 >> 4); 306 | const uint32_t sd4_ = sd3_ ^ (sd3_ >> 4); 307 | 308 | const uint32_t sd5 = sd4*0x27d4eb2d; 309 | const uint32_t sd5_ = sd4_*0x27d4eb2d; 310 | 311 | const uint32_t sd6 = sd5 ^ (sd5 >> 15); 312 | const uint32_t sd6_ = sd5_ ^ (sd5_ >> 15); 313 | 314 | const float sd7 = sd6*mult; 315 | const float sd7_ = sd6_*mult; 316 | 317 | out[i]=sd7; 318 | out[i+1]=sd7_; 319 | 320 | seed[i]=sd6; 321 | seed[i+1]=sd6_; 322 | 323 | } 324 | } 325 | }; 326 | 327 | } 328 | 329 | #endif /* GENERATOR_H_ */ 330 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [2023] [Huseyin Tugrul Buyukisik] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastCollisionDetectionLib 2 | C++ fast collision detection for uniform(and non-uniform)-distributed AABB particles using adaptive grid with implicit vectorization. 3 | 4 | ## Non-Sparse Adaptive Grid 5 | 6 | - 10 million dynamic particles AABB collision check per second against static grid of 8000 particles 7 | - 1000x speedup against naive brute-force algorithm for 40k particles (static vs static), with uniform-distribution in range [0 - 1]). 8 | - - teapot-in-stadium problem is partially solved by "adaptive" grid: 9 | - - 290x speedup when half of AABBs are 10x further than each other [0-1] and [10-11] 10 | - - 230x speedup when half of AABBs are 10x far and a single AABB 100x far: [0-1] x N/2, [10-11] x N/2, [100-101] x1 11 | - Produced collision list does not contain duplicate pairs of collisions 12 | - Particle data is not touched, work done only on pointers internally 13 | - Currently it is adaptive, but needs optimizations on memory handling. 14 | - - On every cell-overflow, it stretches the cell to AABB of all particles and converts to a grid of 4x4x4 cells each with 4 capacity 15 | - Implementation of IParticle is an AABB (axis-aligned bounding box) model 16 | - - In user defined particle (box as example here), methods (getMinX/Y/Z and getMaxX/Y/Z) must return AABB corners of the underlying user-particle 17 | 18 | ## Sparse - Linear - Adaptive Grid 19 | 20 | - More than 2000x speedup against naive brute-force, in single-thread for 10000 particles 21 | - 60 FPS for 20000 particles and ~25000 collision pairs on 2.1GHz FX8150 single-thread 22 | - - 100+ FPS for new CPUs 23 | - Better performance stability compared to non-sparse version 24 | - Better SIMD support on all-pairs computation method using tiled-computing 25 | - Non-zero based object-id values supported (getId() method in IParticle interface) 26 | 27 | ```C++ 28 | // prepare memory pool 29 | FastColDetLib::MemoryPool memPool; 30 | 31 | // map grid to a volume of cube between corners of (0,0,0) and (10005,10005,10005) 32 | FastColDetLib::AdaptiveGridV2 grid(memPool,0,0,0,10005,10005,10005); 33 | 34 | // implement IParticle 35 | struct AABBofPointCloud: public FastColDetLib::IParticle 36 | { 37 | ... 38 | const CoordType getMaxX()const {return xmax;} 39 | const CoordType getMaxY()const {return ymax;} 40 | const CoordType getMaxZ()const {return zmax;} 41 | const CoordType getMinX()const {return xmin;} 42 | const CoordType getMinY()const {return ymin;} 43 | const CoordType getMinZ()const {return zmin;} 44 | const int getId()const {return id;} 45 | ... 46 | }; 47 | 48 | // initialize AABB vector 49 | std::vector AABBs; 50 | 51 | while(simulation) 52 | { 53 | // clear tree data 54 | grid.clear(); 55 | 56 | // add particles that implement IParticle into grid 57 | grid.addParticles(N,AABBs.data()); 58 | 59 | // build tree 60 | grid.buildTree(); 61 | 62 | // compute all-pairs collision array 63 | // 60FPS on FX8150 2.1GHz single-thread for 20000 particles with less than 29000 collisions 64 | // 100FPS on Xeon Gold 5215 2.9GHz single-thread for 20000 particles with less than 29000 collisions 65 | std::vector> vec = grid2_0.findCollisionsAll(); 66 | 67 | // the vec contains id-values of particles that have their AABBs collide so that you can do further fine-grained collision checks between them 68 | } 69 | 70 | ``` 71 | 72 | ## Multithreaded Tree of Sparse - Linear - Adaptive Grid 73 | 74 | - 60 FPS for 40000 particles' AABB all-pair computations 75 | - Bottlenecked by RAM bandwidth and mutex-array locking throughput 76 | - Only zero-based object-id values supported 77 | - Work load is balanced on particles, not volumes, this makes better distribution of particles on threads but causes duplicated work due to merging of results from all leaf nodes 78 | 79 | ```C++ 80 | // 7 threads with load-balancing by a tree, mapped to (0,0,0)-(10005,10005,10005) region 81 | AdaptiveGridTree<7> test(0,0,0,10005,10005,10005); 82 | 83 | for(int i=0;i<100;i++) 84 | { 85 | size_t nano; 86 | { 87 | FastColDetLib::Bench bench(&nano); 88 | 89 | // clear contents of memory pool of adaptive grid tree 90 | test.clear(); 91 | 92 | // AABBs is a vector of objects that implements IParticle interface 93 | // (only zero-based object-id values supported from getId() method of IParticle) 94 | test.addParticles(N,AABBs.data()); 95 | 96 | // non-duplicate pairs of collisions 97 | const auto coll = test.computeAllPairs(); 98 | std::cout<<"c="< interface for querying AABB within the API 114 | class Box: public FastColDetLib::IParticle 115 | { 116 | public: 117 | Box(float xPrm=0, float yPrm=0, float zPrm=0, float x2Prm=0, float y2Prm=0, float z2Prm=0, size_t idPrm=0) 118 | { 119 | x=xPrm; 120 | y=yPrm; 121 | z=zPrm; 122 | 123 | x2=x2Prm; 124 | y2=y2Prm; 125 | z2=z2Prm; 126 | id=idPrm; 127 | } 128 | 129 | const float getMaxX() const { return x>=x2?x:x2;} 130 | const float getMaxY() const { return y>=y2?y:y2;} 131 | const float getMaxZ() const { return z>=z2?z:z2;} 132 | 133 | const float getMinX() const { return x>=x2?x2:x;} 134 | const float getMinY() const { return y>=y2?y2:y;} 135 | const float getMinZ() const { return z>=z2?z2:z;} 136 | const int getId() const {return id;} 137 | 138 | ~Box(){} 139 | 140 | private: 141 | float x,y,z,x2,y2,z2; 142 | size_t id; 143 | }; 144 | 145 | #include 146 | #include 147 | #include"Generator.h" 148 | int main() 149 | { 150 | 151 | oofrng::Generator<64> gen; 152 | 153 | { 154 | // d^3 number of particles 155 | const int d = 20; 156 | const int n=d*d*d; 157 | std::cout<<"n="< box(n); 159 | for(int i=0;i thr; 173 | FastColDetLib::AdaptiveGrid grid(thr,-1,-1,-1,d+1,d+1,d+1); 174 | FastColDetLib::BruteForce bruteForce; 175 | std::vector> coll3D,coll3Dbrute; 176 | 177 | 178 | std::cout<<"add"< rand(1000000*3); 209 | gen.generate(rand.data(),1000000*3); 210 | { 211 | std::mutex mut; 212 | std::vector*> res; 213 | { 214 | FastColDetLib::Bench bench(&nano1); 215 | #pragma omp parallel for 216 | for(int j=0;j<100;j++) 217 | { 218 | std::vector*> resTmp; 219 | for(int i=0;i<1000;i++) 220 | { 221 | auto x = rand[i*3]*d; 222 | auto y = rand[i*3+1]*d; 223 | auto z = rand[i*3+2]*d; 224 | auto item = Box(x,y,z,x+0.25,y+0.25,z+0.25); 225 | auto collisions = grid.getDynamicCollisionListFor(&item); 226 | std::copy(collisions.begin(),collisions.end(),std::back_inserter(resTmp)); 227 | } 228 | std::lock_guard lg(mut); 229 | std::copy(resTmp.begin(),resTmp.end(),std::back_inserter(res)); 230 | } 231 | 232 | } 233 | std::cout<<"grid-compute-dynamic (100k particles AABB): "<getId()!=coll3Dbrute[i].getParticle1()->getId()) && 261 | (coll3D[i].getParticle2()->getId()!=coll3Dbrute[i].getParticle2()->getId()) 262 | ) 263 | { 264 | std::cout<<"ERRRROOOR!"<getId()<<"<-->"<getId()<<" "<getId()<<"<-->"<getId()<getId()!=coll3Dbrute[i].getParticle1()->getId()) && 276 | (coll3D[i].getParticle2()->getId()!=coll3Dbrute[i].getParticle2()->getId()) 277 | ) 278 | { 279 | std::cout<<"ERRRROOOR!"<getId()<<"<-->"<getId()<<" "<getId()<<"<-->"<getId()< 4 | 5 | template 6 | struct Vector3D 7 | { 8 | CoordType x,y,z; 9 | Vector3D crossProduct(Vector3D vec) 10 | { 11 | Vector3D res; 12 | res.x = y*vec.z - z*vec.y; 13 | res.y = z*vec.x - x*vec.z; 14 | res.z = x*vec.y - y*vec.x; 15 | return res; 16 | } 17 | 18 | Vector3D operator - (Vector3D vec) 19 | { 20 | Vector3D result; 21 | result.x = x-vec.x; 22 | result.y = y-vec.y; 23 | result.z = z-vec.z; 24 | return result; 25 | } 26 | 27 | Vector3D operator + (Vector3D vec) 28 | { 29 | Vector3D result; 30 | result.x = x+vec.x; 31 | result.y = y+vec.y; 32 | result.z = z+vec.z; 33 | return result; 34 | } 35 | 36 | Vector3D operator * (CoordType v) 37 | { 38 | Vector3D result; 39 | result.x = x*v; 40 | result.y = y*v; 41 | result.z = z*v; 42 | return result; 43 | } 44 | 45 | CoordType abs() 46 | { 47 | return std::sqrt(x*x+y*y+z*z); 48 | } 49 | 50 | }; 51 | 52 | 53 | template 54 | struct PointCloud 55 | { 56 | CoordType xmin,ymin,zmin; 57 | CoordType xmax,ymax,zmax; 58 | Vector3D point[125]; 59 | PointCloud(CoordType x, CoordType y, CoordType z) 60 | { 61 | xmin=x-2.5f; 62 | ymin=y-2.5f; 63 | zmin=z-2.5f; 64 | xmax=x-2.5f; 65 | ymax=y-2.5f; 66 | zmax=z-2.5f; 67 | for(int i=0;i<125;i++) 68 | { 69 | point[i].x=x+i%5-2.5f; 70 | point[i].y=y+(i/5)%5-2.5f; 71 | point[i].z=z+i/25-2.5f; 72 | if(xmin>point[i].x) 73 | xmin=point[i].x; 74 | if(ymin>point[i].y) 75 | ymin=point[i].y; 76 | if(zmin>point[i].z) 77 | zmin=point[i].z; 78 | if(xmax 89 | bool pointCloudIntersection(PointCloud& cl1, PointCloud& cl2) 90 | { 91 | for(Vector3D& p:cl1.point) 92 | { 93 | for(Vector3D& p2:cl2.point) 94 | { 95 | if((p-p2).abs()<1.0f) 96 | { 97 | return true; 98 | } 99 | } 100 | } 101 | return false; 102 | } 103 | 104 | template 105 | bool intersectDim(const CoordType minx, const CoordType maxx, const CoordType minx2, const CoordType maxx2) 106 | { 107 | return !((maxx < minx2) || (maxx2 < minx)); 108 | } 109 | 110 | int main() 111 | { 112 | 113 | using cotype = float; 114 | PointCloud ico1(0,0,0); 115 | // heating the CPU for benchmarking 116 | for(int i=0;i<10000;i++) 117 | { 118 | PointCloud ico2(0,0.1f,i*0.1f); 119 | pointCloudIntersection(ico1,ico2); 120 | } 121 | 122 | const int N = 10000; 123 | std::vector> objects; 124 | oofrng::Generator<64> gen; 125 | for(int i=0;i(gen.generate1Float()*45,gen.generate1Float()*45,gen.generate1Float()*45)); 128 | } 129 | 130 | // benchmark begin 131 | size_t nano; 132 | std::map> collisionMatrix; 133 | { 134 | FastColDetLib::Bench bench(&nano); 135 | for(int i=0;i 4 | 5 | template 6 | struct Vector3D 7 | { 8 | CoordType x,y,z; 9 | Vector3D crossProduct(Vector3D vec) 10 | { 11 | Vector3D res; 12 | res.x = y*vec.z - z*vec.y; 13 | res.y = z*vec.x - x*vec.z; 14 | res.z = x*vec.y - y*vec.x; 15 | return res; 16 | } 17 | 18 | Vector3D operator - (Vector3D vec) 19 | { 20 | Vector3D result; 21 | result.x = x-vec.x; 22 | result.y = y-vec.y; 23 | result.z = z-vec.z; 24 | return result; 25 | } 26 | 27 | Vector3D operator + (Vector3D vec) 28 | { 29 | Vector3D result; 30 | result.x = x+vec.x; 31 | result.y = y+vec.y; 32 | result.z = z+vec.z; 33 | return result; 34 | } 35 | 36 | Vector3D operator * (CoordType v) 37 | { 38 | Vector3D result; 39 | result.x = x*v; 40 | result.y = y*v; 41 | result.z = z*v; 42 | return result; 43 | } 44 | 45 | CoordType abs() 46 | { 47 | return std::sqrt(x*x+y*y+z*z); 48 | } 49 | 50 | }; 51 | 52 | 53 | template 54 | struct PointCloud 55 | { 56 | Vector3D point[125]; 57 | PointCloud(CoordType x, CoordType y, CoordType z) 58 | { 59 | for(int i=0;i<125;i++) 60 | { 61 | point[i].x=x+i%5-2.5f; 62 | point[i].y=y+(i/5)%5-2.5f; 63 | point[i].z=z+i/25-2.5f; 64 | } 65 | } 66 | }; 67 | 68 | template 69 | bool pointCloudIntersection(PointCloud& cl1, PointCloud& cl2) 70 | { 71 | for(Vector3D& p:cl1.point) 72 | { 73 | for(Vector3D& p2:cl2.point) 74 | { 75 | if((p-p2).abs()<1.0f) 76 | { 77 | return true; 78 | } 79 | } 80 | } 81 | return false; 82 | } 83 | 84 | template 85 | bool intersectDim(const CoordType minx, const CoordType maxx, const CoordType minx2, const CoordType maxx2) 86 | { 87 | return !((maxx < minx2) || (maxx2 < minx)); 88 | } 89 | #include"Generator.h" 90 | 91 | template 92 | struct AABBofPointCloud 93 | { 94 | AABBofPointCloud(int idPrm, PointCloud * pCloudPrm) 95 | { 96 | id=idPrm; 97 | pCloud = pCloudPrm; 98 | xmin=pCloud->point[0].x; 99 | ymin=pCloud->point[0].y; 100 | zmin=pCloud->point[0].z; 101 | xmax=pCloud->point[0].x; 102 | ymax=pCloud->point[0].y; 103 | zmax=pCloud->point[0].z; 104 | for(int i=0;i<125;i++) 105 | { 106 | if(xmin>pCloud->point[i].x) 107 | xmin=pCloud->point[i].x; 108 | if(ymin>pCloud->point[i].y) 109 | ymin=pCloud->point[i].y; 110 | if(zmin>pCloud->point[i].z) 111 | zmin=pCloud->point[i].z; 112 | if(xmaxpoint[i].x) 113 | xmax=pCloud->point[i].x; 114 | if(ymaxpoint[i].y) 115 | ymax=pCloud->point[i].y; 116 | if(zmaxpoint[i].z) 117 | zmax=pCloud->point[i].z; 118 | } 119 | } 120 | int id; 121 | PointCloud* pCloud; 122 | CoordType xmin; 123 | CoordType ymin; 124 | CoordType zmin; 125 | CoordType xmax; 126 | CoordType ymax; 127 | CoordType zmax; 128 | }; 129 | 130 | 131 | template 132 | class Grid 133 | { 134 | public: 135 | Grid(CoordType minCor, CoordType maxCor) 136 | { 137 | id=0; 138 | mincorner=minCor; 139 | maxcorner=maxCor; 140 | cellData.resize(Size*Size*Size*(ObjectsPerCell+1)); 141 | for(int i=0;i 146 | void forEachCellColliding(AABBofPointCloud* aabb, const Func& func) 147 | { 148 | // calculate cell size (equal for all dimensions for now) 149 | const CoordType step = (maxcorner - mincorner)/Size; 150 | 151 | // calculate overlapping region's cell indices 152 | const int mincornerstartx = std::floor((aabb->xmin - mincorner) / step); 153 | const int maxcornerendx = std::floor((aabb->xmax - mincorner) / step); 154 | const int mincornerstarty = std::floor((aabb->ymin - mincorner) / step); 155 | const int maxcornerendy = std::floor((aabb->ymax - mincorner) / step); 156 | const int mincornerstartz = std::floor((aabb->zmin - mincorner) / step); 157 | const int maxcornerendz = std::floor((aabb->zmax - mincorner) / step); 158 | for(int i=mincornerstartz;i<=maxcornerendz;i++) 159 | for(int j=mincornerstarty;j<=maxcornerendy;j++) 160 | for(int k=mincornerstartx;k<=maxcornerendx;k++) 161 | { 162 | if(i<0 || i>=Size || j<0 || j>=Size || k<0 || k>=Size) 163 | continue; 164 | func(k,j,i,aabb); 165 | } 166 | } 167 | 168 | void addObject(AABBofPointCloud* aabb) 169 | { 170 | forEachCellColliding(aabb, [&](int k, int j, int i, AABBofPointCloud* aabb){ 171 | const int collidingCellIndex = (k+j*Size+i*Size*Size)*(ObjectsPerCell+1); 172 | const int lastUsedIndex = cellData[collidingCellIndex]++; 173 | cellData[collidingCellIndex+lastUsedIndex+1]=id; 174 | idMapping[id++]=aabb; 175 | }); 176 | } 177 | 178 | std::vector*> checkCollisionsWithSingleAABB(AABBofPointCloud* aabb) 179 | { 180 | std::vector*> result; 181 | forEachCellColliding(aabb, [&](int k, int j, int i, AABBofPointCloud* aabb){ 182 | const int collidingCellIndex = (k+j*Size+i*Size*Size)*(ObjectsPerCell+1); 183 | const int numObjectsInCell = cellData[collidingCellIndex]; 184 | for(int p=0;p* aabbPtr = idMapping[idObj]; 188 | // evade self-collision and duplicated collisions 189 | if( aabb->id < aabbPtr->id) 190 | if(intersectDim(aabb->xmin, aabb->xmax, aabbPtr->xmin, aabbPtr->xmax)) 191 | if(intersectDim(aabb->ymin, aabb->ymax, aabbPtr->ymin, aabbPtr->ymax)) 192 | if(intersectDim(aabb->zmin, aabb->zmax, aabbPtr->zmin, aabbPtr->zmax)) 193 | 194 | { 195 | result.push_back(aabbPtr); 196 | } 197 | } 198 | }); 199 | return result; 200 | } 201 | 202 | 203 | private: 204 | int id; 205 | CoordType mincorner,maxcorner; 206 | std::map*> idMapping; 207 | std::vector cellData; 208 | }; 209 | int main() 210 | { 211 | 212 | using cotype = float; 213 | PointCloud ico1(0,0,0); 214 | // heating the CPU for benchmarking 215 | 216 | for(int i=0;i<10000;i++) 217 | { 218 | PointCloud ico2(0,0.1f,i*0.1f); 219 | pointCloudIntersection(ico1,ico2); 220 | } 221 | 222 | const int N = 10000; 223 | std::vector> objects; 224 | oofrng::Generator<64> gen; 225 | for(int i=0;i(gen.generate1Float()*450,gen.generate1Float()*450,gen.generate1Float()*450)); 228 | } 229 | 230 | std::vector> AABBs; 231 | for(int i=0;i(i,&objects[i])); 234 | } 235 | 236 | 237 | // benchmark begin 238 | size_t nano; 239 | std::map> collisionMatrix; 240 | { 241 | FastColDetLib::Bench bench(&nano); 242 | 243 | // uniform grid for 32x32x32 cells each with 30 objects max 244 | // mapped to (0,0,0) - (450,450,450) cube 245 | Grid grid(0,450); 246 | 247 | // add AABBs to grid 248 | for(int i=0;i*> collisions = grid.checkCollisionsWithSingleAABB(&AABBs[i]); 257 | 258 | for(AABBofPointCloud* aabb:collisions) 259 | { 260 | if(pointCloudIntersection(*aabb->pCloud, *AABBs[i].pCloud)) 261 | { 262 | collisionMatrix[AABBs[i].id][aabb->id]=true; 263 | collisionMatrix[aabb->id][AABBs[i].id]=true; 264 | } 265 | } 266 | } 267 | 268 | } 269 | std::cout< 4 | 5 | template 6 | struct Vector3D 7 | { 8 | CoordType x,y,z; 9 | Vector3D crossProduct(Vector3D vec) 10 | { 11 | Vector3D res; 12 | res.x = y*vec.z - z*vec.y; 13 | res.y = z*vec.x - x*vec.z; 14 | res.z = x*vec.y - y*vec.x; 15 | return res; 16 | } 17 | 18 | Vector3D operator - (Vector3D vec) 19 | { 20 | Vector3D result; 21 | result.x = x-vec.x; 22 | result.y = y-vec.y; 23 | result.z = z-vec.z; 24 | return result; 25 | } 26 | 27 | Vector3D operator + (Vector3D vec) 28 | { 29 | Vector3D result; 30 | result.x = x+vec.x; 31 | result.y = y+vec.y; 32 | result.z = z+vec.z; 33 | return result; 34 | } 35 | 36 | Vector3D operator * (CoordType v) 37 | { 38 | Vector3D result; 39 | result.x = x*v; 40 | result.y = y*v; 41 | result.z = z*v; 42 | return result; 43 | } 44 | 45 | CoordType abs() 46 | { 47 | return std::sqrt(x*x+y*y+z*z); 48 | } 49 | 50 | }; 51 | 52 | 53 | template