├── CMakeLists.txt ├── LICENSE ├── README.md └── src ├── CMakeLists.txt ├── executable ├── CMakeLists.txt └── msufsort │ ├── CMakeLists.txt │ └── main.cpp ├── include ├── endian.h └── endian │ ├── byte_swap.h │ ├── endian.h │ ├── endian_swap.h │ └── endian_type.h └── library ├── CMakeLists.txt ├── msufsort.h └── msufsort ├── CMakeLists.txt ├── msufsort.cpp └── msufsort.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16.3) 2 | 3 | project(msufsort) 4 | 5 | if(NOT CMAKE_BUILD_TYPE) 6 | set(CMAKE_BUILD_TYPE Release) 7 | message("*** Build type not set. defaulting to Release") 8 | endif() 9 | 10 | if (CMAKE_BUILD_TYPE STREQUAL "Debug") 11 | add_compile_options( 12 | -g 13 | -O0 14 | -march=native 15 | ) 16 | else() 17 | add_compile_options( 18 | -O3 19 | -march=native 20 | ) 21 | endif() 22 | 23 | option(MSUFSORT_BUILD_DEMO "Build the CLI demo" OFF) 24 | 25 | add_subdirectory(src) 26 | 27 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 28 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 29 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 30 | 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Michael Maniscalco 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # msufsort 2 | msufsort suffix array construction algorithm 3 | 4 | high performance, multi-threaded, suffix array, bwt/unbwt, lcp construction algorithm 5 | 6 | **** this is a pre-release demo **** 7 | **** this version is incomplete and lacks induction sorting which can result in sub optimal performance on some pathological inputs **** 8 | 9 | 10 | ====================================================================== 11 | 12 | To compile: 13 | 14 | ``` 15 | mkdir build 16 | cd build 17 | cmake .. 18 | make 19 | ``` 20 | 21 | To build demo: 22 | 23 | ``` 24 | mkdir build 25 | cd build 26 | cmake -DMSUFSORT_BUILD_DEMO=ON .. 27 | make 28 | ``` 29 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(_msufsort_include_dir ${CMAKE_CURRENT_SOURCE_DIR}) 2 | 3 | add_subdirectory(library) 4 | add_subdirectory(executable) 5 | -------------------------------------------------------------------------------- /src/executable/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if (MSUFSORT_BUILD_DEMO) 2 | add_subdirectory(msufsort) 3 | endif() -------------------------------------------------------------------------------- /src/executable/msufsort/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | find_library(LIBCXX_LIB c++) 3 | find_package(Threads) 4 | find_library(LIBCXXABI_LIB c++abi) 5 | 6 | link_libraries( 7 | ${LIBCXX_LIB} 8 | ${LIBCXXABI_LIB} 9 | ) 10 | 11 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 12 | 13 | add_executable(msufsort_demo main.cpp) 14 | 15 | target_link_libraries(msufsort_demo ${CMAKE_THREAD_LIBS_INIT} msufsort) -------------------------------------------------------------------------------- /src/executable/msufsort/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | namespace 13 | { 14 | 15 | //============================================================================== 16 | inline int32_t match_length 17 | ( 18 | int8_t const * beginInput, 19 | int8_t const * endInput, 20 | int32_t indexA, 21 | int32_t indexB, 22 | int32_t matchLength 23 | ) 24 | { 25 | if (indexA > indexB) 26 | std::swap(indexA, indexB); 27 | 28 | int8_t const * inputA = beginInput + indexA + matchLength; 29 | int8_t const * inputB = beginInput + indexB + matchLength; 30 | endInput -= sizeof(int64_t); 31 | while ((inputB < endInput) && (*(int64_t const *)inputA == *(int64_t const *)inputB)) 32 | inputA += sizeof(int64_t), inputB += sizeof(int64_t); 33 | endInput += sizeof(int64_t); 34 | while ((inputB < endInput) && (*inputA == *inputB)) 35 | ++inputA, ++inputB; 36 | return (inputA - (beginInput + indexA)); 37 | } 38 | 39 | 40 | //============================================================================== 41 | void lcp 42 | ( 43 | int8_t const * beginInput, 44 | int8_t const * endInput, 45 | int32_t * begin, 46 | int32_t size, 47 | std::size_t currentMatchLength 48 | ) 49 | { 50 | if (size <= 4) 51 | { 52 | for (auto i = 0; i < size; ++i) 53 | begin[i] = match_length(beginInput, endInput, begin[i], begin[i + 1], currentMatchLength); 54 | } 55 | else 56 | { 57 | auto mid = (size / 2); 58 | auto nextMatchLength = match_length(beginInput, endInput, begin[0], begin[mid], currentMatchLength); 59 | lcp(beginInput, endInput, begin, mid, nextMatchLength); 60 | lcp(beginInput, endInput, begin + mid, size - mid, currentMatchLength); 61 | } 62 | } 63 | 64 | 65 | //============================================================================== 66 | void lcp_multithreaded 67 | ( 68 | int8_t const * beginInput, 69 | int8_t const * endInput, 70 | int32_t * begin, 71 | int32_t size, 72 | int32_t numThreads 73 | ) 74 | { 75 | auto perThread = ((size + numThreads - 1) / numThreads); 76 | std::vector threads(numThreads); 77 | int32_t temp[numThreads]; 78 | 79 | auto n = 0; 80 | for (auto i = 0; i < numThreads; ++i) 81 | { 82 | auto s = perThread; 83 | if ((n + s) > size) 84 | s = (size - n); 85 | temp[i] = match_length(beginInput, endInput, begin[n + s - 1], begin[n + s], 0); 86 | threads[i] = std::thread(lcp, beginInput, endInput, begin + n, s - 1, 0); 87 | n += s; 88 | } 89 | 90 | for (auto & e : threads) 91 | e.join(); 92 | n = 0; 93 | for (auto i = 0; i < numThreads; ++i) 94 | { 95 | auto s = perThread; 96 | if ((n + s) > size) 97 | s = (size - n); 98 | begin[n + s - 1] = temp[i]; 99 | n += s; 100 | } 101 | } 102 | 103 | 104 | 105 | //============================================================================== 106 | void validate_lcp 107 | ( 108 | int8_t const * beginInput, 109 | int8_t const * endInput, 110 | int32_t const * sa, 111 | int32_t size, 112 | int32_t const * lcp 113 | ) 114 | { 115 | auto numSuffixes = size; 116 | auto errorCount = 0; 117 | auto updateInterval = ((numSuffixes + 99) / 100); 118 | auto nextUpdate = 0; 119 | auto counter = 0; 120 | 121 | for (auto i = 0; i < size; ++i) 122 | { 123 | if (counter++ >= nextUpdate) 124 | { 125 | nextUpdate += updateInterval; 126 | if (errorCount) 127 | std::cout << "**** ERRORS DETECTED (" << errorCount << ") **** "; 128 | std::cout << (counter / updateInterval) << "% verified" << (char)13 << std::flush; 129 | } 130 | 131 | auto m = match_length(beginInput, endInput, sa[i], sa[i + 1], 0); 132 | if (m != lcp[i]) 133 | errorCount++; 134 | } 135 | if (errorCount > 0) 136 | std::cout << "lcp array error count = " << errorCount << std::endl; 137 | else 138 | std::cout << "lcp array validated" << std::endl; 139 | } 140 | 141 | 142 | //========================================================================== 143 | void make_lcp_array 144 | ( 145 | std::vector const & suffixArray, 146 | std::vector const & input, 147 | int32_t numThreads 148 | ) 149 | { 150 | // lcp can be computed using the existing suffix array space but we make a copy instead 151 | // so that we can validate the lcp using the suffix array. 152 | std::vector output(suffixArray.begin() + 1, suffixArray.end()); 153 | auto start = std::chrono::system_clock::now(); 154 | lcp_multithreaded(input.data(), input.data() + input.size(), output.data(), output.size(), numThreads); 155 | auto finish = std::chrono::system_clock::now(); 156 | auto elapsed = std::chrono::duration_cast(finish - start); 157 | std::cout << "lcp array completed - total elapsed time: " << elapsed.count() << " ms" << std::endl; 158 | validate_lcp(input.data(), input.data() + input.size(), suffixArray.data() + 1, suffixArray.size() - 1, output.data()); 159 | } 160 | 161 | 162 | //============================================================================== 163 | std::vector load_file 164 | ( 165 | std::string const & inputPath 166 | ) 167 | { 168 | std::vector input; 169 | std::ifstream inputStream(inputPath, std::ios_base::in | std::ios_base::binary); 170 | if (inputStream) 171 | { 172 | inputStream.seekg(0, std::ios_base::end); 173 | int64_t size = inputStream.tellg(); 174 | input.reserve(size); 175 | input.resize(size); 176 | inputStream.seekg(0, std::ios_base::beg); 177 | inputStream.read((char *)input.data(), input.size()); 178 | inputStream.close(); 179 | } 180 | else 181 | { 182 | std::cout << "failed to load file: " << inputPath << std::endl; 183 | throw std::exception(); 184 | } 185 | return input; 186 | } 187 | 188 | 189 | //============================================================================== 190 | template 191 | bool write_file 192 | ( 193 | std::string const & outputPath, 194 | input_iter begin, 195 | input_iter end 196 | ) 197 | { 198 | std::ofstream outputStream(outputPath, std::ios_base::out | std::ios_base::binary); 199 | if (outputStream) 200 | { 201 | outputStream.write((char const *)&*begin, std::distance(begin, end)); 202 | outputStream.close(); 203 | return true; 204 | } 205 | return false; 206 | } 207 | 208 | 209 | //============================================================================== 210 | template 211 | int compare 212 | ( 213 | InputIter a, 214 | InputIter b, 215 | InputIter end 216 | ) 217 | { 218 | uint8_t const * pA = (uint8_t const *)&*a; 219 | uint8_t const * pB = (uint8_t const *)&*b; 220 | uint8_t const * pEnd = (uint8_t const *)&*end; 221 | 222 | while ((pA < pEnd) && (pB < pEnd) && (*pA == *pB)) 223 | { 224 | ++pA; 225 | ++pB; 226 | } 227 | if (pA == pEnd) 228 | return -1; 229 | if (pB == pEnd) 230 | return 1; 231 | return (*pA < *pB) ? -1 : 1; 232 | } 233 | 234 | 235 | //============================================================================== 236 | int32_t validate_suffix_array 237 | ( 238 | std::vector const & input, 239 | std::vector const & suffixArray 240 | ) 241 | { 242 | if (suffixArray[0] != (int32_t)input.size()) 243 | return 1; // first entry in SA should be sentinel 244 | 245 | auto numSuffixes = input.size(); 246 | auto errorCount = 0; 247 | auto updateInterval = ((numSuffixes + 99) / 100); 248 | auto nextUpdate = 0; 249 | auto counter = 0; 250 | 251 | for (auto i = 2; i < (int32_t)suffixArray.size(); ++i) 252 | { 253 | if (counter++ >= nextUpdate) 254 | { 255 | nextUpdate += updateInterval; 256 | if (errorCount) 257 | std::cout << "**** ERRORS DETECTED (" << errorCount << ") **** "; 258 | std::cout << (counter / updateInterval) << "% verified" << (char)13 << std::flush; 259 | } 260 | 261 | auto suffixA = input.data() + suffixArray[i - 1]; 262 | auto suffixB = input.data() + suffixArray[i]; 263 | int32_t c = compare(suffixA, suffixB, input.data() + input.size()); 264 | if (c != -1) 265 | { 266 | ++errorCount; 267 | } 268 | } 269 | return errorCount; 270 | } 271 | 272 | 273 | //============================================================================== 274 | std::vector make_input 275 | ( 276 | int32_t numUniqueSymbols, 277 | int32_t size 278 | ) 279 | { 280 | std::vector input; 281 | input.reserve(size); 282 | input.resize(size); 283 | for (auto & e : input) 284 | e = rand() % numUniqueSymbols; 285 | return input; 286 | } 287 | 288 | 289 | //============================================================================== 290 | void print_usage 291 | ( 292 | ) 293 | { 294 | std::cout << "================================================================" << std::endl; 295 | std::cout << "msufsort - version 4a-demo" << std::endl; 296 | std::cout << "author: Michael A Maniscalco" << std::endl; 297 | std::cout << "**** this is a pre-release demo ****" << std::endl; 298 | std::cout << "**** this version is incomplete and lacks induction sorting ****" << std::endl; 299 | std::cout << "================================================================" << std::endl << std::endl; 300 | 301 | std::cout << "usage: msufsort [b|s|l] input [num threads]" << std::endl; 302 | std::cout << "\tb = bwt" << std::endl; 303 | std::cout << "\ts = suffix array" << std::endl; 304 | std::cout << "\tl = lcp array" << std::endl; 305 | } 306 | 307 | } 308 | 309 | 310 | //============================================================================== 311 | int32_t main 312 | ( 313 | int32_t argumentCount, 314 | char const ** inputArguments 315 | ) 316 | { 317 | try 318 | { 319 | if (argumentCount < 3) 320 | { 321 | print_usage(); 322 | return 0; 323 | } 324 | 325 | enum task_type 326 | { 327 | burrows_wheeler_transform, 328 | suffix_array, 329 | lcp_array, 330 | test_mode, 331 | invalid 332 | }; 333 | 334 | task_type taskType = invalid; 335 | 336 | std::string task(inputArguments[1]); 337 | if ((task == "b") || (task == "B")) 338 | taskType = burrows_wheeler_transform; 339 | if ((task == "s") || (task == "S")) 340 | taskType = suffix_array; 341 | if ((task == "l") || (task == "L")) 342 | taskType = lcp_array; 343 | if ((task == "t") || (task == "T")) 344 | taskType = test_mode; 345 | if (taskType == invalid) 346 | { 347 | print_usage(); 348 | return 0; 349 | } 350 | 351 | std::string inputPath = inputArguments[2]; 352 | std::vector input; 353 | if (taskType != test_mode) 354 | { 355 | input = load_file(inputPath); 356 | 357 | int32_t inputSize = input.size(); 358 | std::cout << "================================================================" << std::endl; 359 | std::cout << "msufsort - version 4a-demo" << std::endl; 360 | std::cout << "author: Michael A Maniscalco" << std::endl; 361 | std::cout << "**** this is a pre-release demo ****" << std::endl; 362 | std::cout << "**** this version is incomplete and lacks induction sorting ****" << std::endl; 363 | std::cout << "================================================================" << std::endl << std::endl; 364 | 365 | std::cout << "loaded " << inputSize << " bytes" << std::endl; 366 | } 367 | else 368 | { 369 | std::cout << "test mode ... " << std::endl; 370 | } 371 | 372 | auto numWorkerThreads = 1; 373 | if (argumentCount >= 4) 374 | { 375 | try 376 | { 377 | numWorkerThreads = std::stoi(inputArguments[3]); 378 | } 379 | catch (...) 380 | { 381 | std::cout << "INVALID THREAD COUNT: " << inputArguments[3] << std::endl; 382 | throw std::exception(); 383 | } 384 | } 385 | 386 | auto start = std::chrono::system_clock::now(); 387 | switch (taskType) 388 | { 389 | case test_mode: 390 | { 391 | auto errorCount = 0; 392 | 393 | for (auto numUniqueSymbols = 1; ((!errorCount) && (numUniqueSymbols < 0x100)); ++numUniqueSymbols) 394 | { 395 | for (auto inputSize = 1; ((!errorCount) && (inputSize < (1 << 10))); ++inputSize) 396 | { 397 | for (int32_t numWorkerThreads = 1; numWorkerThreads < (int32_t)std::thread::hardware_concurrency(); ++numWorkerThreads) 398 | { 399 | srand(numUniqueSymbols * inputSize * numWorkerThreads); 400 | std::cout << "sa test: num unique symbols = " << numUniqueSymbols << ", input size = " << inputSize << ", num threads = " << numWorkerThreads << std::endl; 401 | 402 | auto input = make_input(numUniqueSymbols, inputSize); 403 | auto suffixArray = ::maniscalco::make_suffix_array(input.begin(), input.end(), numWorkerThreads); 404 | // validate 405 | errorCount = validate_suffix_array(input, suffixArray); 406 | if (errorCount) 407 | std::cout << "**** ERRORS DETECTED (" << errorCount << ") **** " << std::endl; 408 | } 409 | } 410 | } 411 | 412 | for (auto numUniqueSymbols = 1; ((!errorCount) && (numUniqueSymbols < 0x100)); ++numUniqueSymbols) 413 | { 414 | for (auto inputSize = 1; ((!errorCount) && (inputSize < (1 << 10))); ++inputSize) 415 | { 416 | for (int32_t numWorkerThreads = 1; ((!errorCount) && (numWorkerThreads <= (int32_t)std::thread::hardware_concurrency())); ++numWorkerThreads) 417 | { 418 | srand(numUniqueSymbols * inputSize * numWorkerThreads); 419 | std::cout << "bwt test: num unique symbols = " << numUniqueSymbols << ", input size = " << inputSize << ", num threads = " << numWorkerThreads << std::endl; 420 | 421 | auto input = make_input(numUniqueSymbols, inputSize); 422 | auto copyOfInput = input; 423 | auto sentinelIndex = ::maniscalco::forward_burrows_wheeler_transform(input.begin(), input.end(), numWorkerThreads); 424 | // validate 425 | ::maniscalco::reverse_burrows_wheeler_transform(input.begin(), input.end(), sentinelIndex, numWorkerThreads); 426 | if (input != copyOfInput) 427 | { 428 | std::cout << "**** BWT ERROR DETECTED" << std::endl; 429 | errorCount++; 430 | } 431 | } 432 | } 433 | } 434 | break; 435 | } 436 | 437 | case suffix_array: 438 | { 439 | std::cout << "computing suffix array" << std::endl; 440 | auto suffixArray = ::maniscalco::make_suffix_array(input.begin(), input.end(), numWorkerThreads); 441 | auto finish = std::chrono::system_clock::now(); 442 | auto elapsed = std::chrono::duration_cast(finish - start); 443 | std::cout << "suffix array completed - total elapsed time: " << elapsed.count() << " ms" << std::endl; 444 | 445 | // validate 446 | std::cout << "validating suffix array" << std::endl; 447 | auto errorCount = validate_suffix_array(input, suffixArray); 448 | if (errorCount) 449 | std::cout << "**** ERRORS DETECTED (" << errorCount << ") **** " << std::endl; 450 | else 451 | std::cout << "test completed and results validated successfully" << std::endl; 452 | break; 453 | } 454 | 455 | case lcp_array: 456 | { 457 | std::cout << "computing lcp array" << std::endl; 458 | auto suffixArray = ::maniscalco::make_suffix_array(input.begin(), input.end(), numWorkerThreads); 459 | auto finish = std::chrono::system_clock::now(); 460 | auto elapsed = std::chrono::duration_cast(finish - start); 461 | std::cout << "suffix array completed - total elapsed time: " << elapsed.count() << " ms" << std::endl; 462 | make_lcp_array(suffixArray, input, numWorkerThreads); 463 | break; 464 | } 465 | 466 | case burrows_wheeler_transform: 467 | { 468 | auto copyOfInput = input; 469 | std::cout << "computing burrows wheeler transform" << std::endl; 470 | auto sentinelIndex = ::maniscalco::forward_burrows_wheeler_transform(input.begin(), input.end(), numWorkerThreads); 471 | auto finish = std::chrono::system_clock::now(); 472 | auto elapsed = std::chrono::duration_cast(finish - start); 473 | std::cout << "burrows wheeler transform completed - total elapsed time: " << elapsed.count() << " ms" << std::endl; 474 | 475 | // validate 476 | start = std::chrono::system_clock::now(); 477 | ::maniscalco::reverse_burrows_wheeler_transform(input.begin(), input.end(), sentinelIndex, numWorkerThreads); 478 | finish = std::chrono::system_clock::now(); 479 | elapsed = std::chrono::duration_cast(finish - start); 480 | std::cout << "inverse burrows wheeler transform completed - total elapsed time: " << elapsed.count() << " ms" << std::endl; 481 | 482 | if (input != copyOfInput) 483 | std::cout << "**** BWT ERROR DETECTED" << std::endl; 484 | else 485 | std::cout << "test completed and results validated successfully" << std::endl; 486 | break; 487 | } 488 | 489 | default: 490 | { 491 | print_usage(); 492 | break; 493 | } 494 | } 495 | } 496 | catch (...) 497 | { 498 | std::cout << "caught exception" << std::endl; 499 | } 500 | 501 | return 0; 502 | } 503 | 504 | -------------------------------------------------------------------------------- /src/include/endian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #include "./endian/endian.h" 5 | 6 | -------------------------------------------------------------------------------- /src/include/endian/byte_swap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #ifdef __APPLE__ 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | 13 | namespace maniscalco 14 | { 15 | 16 | //============================================================================== 17 | template 18 | auto byte_swap 19 | ( 20 | T value 21 | ) -> typename std::enable_if::type 22 | { 23 | return value; 24 | } 25 | 26 | 27 | //============================================================================== 28 | template 29 | auto byte_swap 30 | ( 31 | T value 32 | ) -> typename std::enable_if::type 33 | { 34 | auto v = static_cast(value); 35 | return static_cast((v >> 8) | (v << 8)); 36 | } 37 | 38 | 39 | //============================================================================== 40 | template 41 | auto byte_swap 42 | ( 43 | T value 44 | ) -> typename std::enable_if::type 45 | { 46 | #ifdef __APPLE__ 47 | return static_cast(OSSwapInt32(static_cast(value))); 48 | #else 49 | return static_cast(__builtin_bswap32(static_cast(value))); 50 | #endif 51 | } 52 | 53 | 54 | //============================================================================== 55 | template 56 | auto byte_swap 57 | ( 58 | T value 59 | ) -> typename std::enable_if::type 60 | { 61 | #ifdef __APPLE__ 62 | return static_cast(OSSwapInt64(static_cast(value))); 63 | #else 64 | return static_cast(__builtin_bswap64(static_cast(value))); 65 | #endif 66 | } 67 | 68 | } // namespace maniscalco 69 | -------------------------------------------------------------------------------- /src/include/endian/endian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | 5 | namespace maniscalco 6 | { 7 | 8 | template class endian; 9 | 10 | } // namespace maniscalco 11 | 12 | 13 | #include "./byte_swap.h" 14 | #include "./endian_type.h" 15 | #include "./endian_swap.h" 16 | 17 | #include 18 | 19 | 20 | namespace maniscalco 21 | { 22 | 23 | //============================================================================== 24 | template 25 | class endian 26 | { 27 | public: 28 | 29 | using underlying_type = data_type; 30 | using type = endian_type; 31 | 32 | template 33 | friend class endian; 34 | 35 | endian(); 36 | 37 | endian 38 | ( 39 | endian const & 40 | ); 41 | 42 | endian 43 | ( 44 | endian && 45 | ); 46 | 47 | endian 48 | ( 49 | underlying_type 50 | ); 51 | 52 | endian & operator = 53 | ( 54 | endian const & 55 | ); 56 | 57 | endian & operator = 58 | ( 59 | endian && 60 | ); 61 | 62 | endian & operator = 63 | ( 64 | underlying_type 65 | ); 66 | 67 | operator underlying_type() const; 68 | 69 | underlying_type get() const; 70 | 71 | protected: 72 | 73 | private: 74 | 75 | underlying_type value_; 76 | 77 | }; 78 | 79 | template using big_endian = endian; 80 | template using little_endian = endian; 81 | template using network_order = endian; 82 | template using host_order = endian; 83 | 84 | // global operator overloads involving endian types 85 | template inline static bool operator < (input_type a, endian b){return (a < (data_type)b);} 86 | template inline static bool operator < (endian a, data_type b){return ((data_type)a < b);} 87 | template inline static bool operator <= (data_type a, endian b){return (a <= (data_type)b);} 88 | template inline static bool operator <= (endian a, data_type b){return ((data_type)a <= b);} 89 | template inline static bool operator == (data_type a, endian b){return (a == (data_type)b);} 90 | template inline static bool operator == (endian a, data_type b){return ((data_type)a == b);} 91 | template inline static bool operator >= (data_type a, endian b){return (a >= (data_type)b);} 92 | template inline static bool operator >= (endian a, data_type b){return ((data_type)a >= b);} 93 | template inline static bool operator > (data_type a, endian b){return (a > (data_type)b);} 94 | template inline static bool operator > (endian a, data_type b){return ((data_type)a > b);} 95 | template inline static bool operator != (data_type a, endian b){return (a != (data_type)b);} 96 | template inline static bool operator != (endian a, data_type b){return ((data_type)a != b);} 97 | 98 | // static make functions 99 | template big_endian make_big_endian(endian); 100 | template big_endian make_big_endian(data_type); 101 | template little_endian make_little_endian(endian); 102 | template little_endian make_little_endian(data_type); 103 | template host_order make_host_order(endian); 104 | template host_order make_host_order(data_type); 105 | template network_order make_network_order(endian); 106 | template network_order make_network_order(data_type); 107 | 108 | } 109 | 110 | 111 | //============================================================================== 112 | template 113 | maniscalco::endian::endian 114 | ( 115 | ): 116 | value_() 117 | { 118 | } 119 | 120 | 121 | //============================================================================== 122 | template 123 | maniscalco::endian::endian 124 | ( 125 | endian && input 126 | ): 127 | value_(input.value_) 128 | { 129 | } 130 | 131 | 132 | //============================================================================== 133 | template 134 | maniscalco::endian::endian 135 | ( 136 | endian const & input 137 | ): 138 | value_(input.value_) 139 | { 140 | } 141 | 142 | 143 | //============================================================================== 144 | template 145 | maniscalco::endian::endian 146 | ( 147 | data_type input 148 | ): 149 | value_(endian_swap(input)) 150 | { 151 | } 152 | 153 | 154 | //============================================================================== 155 | template 156 | auto maniscalco::endian::operator = 157 | ( 158 | endian const & input 159 | ) -> endian & 160 | { 161 | value_ = input.value_; 162 | return *this; 163 | } 164 | 165 | 166 | //============================================================================== 167 | template 168 | auto maniscalco::endian::operator = 169 | ( 170 | endian && input 171 | ) -> endian & 172 | { 173 | value_ = input.value_; 174 | return *this; 175 | } 176 | 177 | 178 | //============================================================================== 179 | template 180 | auto maniscalco::endian::operator = 181 | ( 182 | data_type input 183 | ) -> endian & 184 | { 185 | value_ = endian_swap(input); 186 | return *this; 187 | } 188 | 189 | 190 | //============================================================================== 191 | template 192 | maniscalco::endian::operator underlying_type 193 | ( 194 | ) const 195 | { 196 | return endian_swap(value_); 197 | } 198 | 199 | 200 | //============================================================================== 201 | template 202 | auto maniscalco::endian::get 203 | ( 204 | ) const -> underlying_type 205 | { 206 | return endian_swap(value_); 207 | } 208 | 209 | 210 | //============================================================================== 211 | template 212 | auto maniscalco::make_big_endian 213 | ( 214 | maniscalco::endian value 215 | ) -> big_endian 216 | { 217 | return big_endian((T)value); 218 | } 219 | 220 | 221 | //============================================================================== 222 | template 223 | auto maniscalco::make_big_endian 224 | ( 225 | T value 226 | ) -> big_endian 227 | { 228 | return big_endian((T)value); 229 | } 230 | 231 | 232 | //============================================================================== 233 | template 234 | auto maniscalco::make_little_endian 235 | ( 236 | endian value 237 | ) -> little_endian 238 | { 239 | return little_endian((T)value); 240 | } 241 | 242 | 243 | //============================================================================== 244 | template 245 | auto maniscalco::make_little_endian 246 | ( 247 | T value 248 | ) -> little_endian 249 | { 250 | return little_endian((T)value); 251 | } 252 | 253 | 254 | //============================================================================== 255 | template 256 | auto maniscalco::make_host_order 257 | ( 258 | endian value 259 | ) -> host_order 260 | { 261 | return host_order((T)value); 262 | } 263 | 264 | 265 | //============================================================================== 266 | template 267 | auto maniscalco::make_host_order 268 | ( 269 | T value 270 | ) -> host_order 271 | { 272 | return host_order((T)value); 273 | } 274 | 275 | 276 | //============================================================================== 277 | template 278 | auto maniscalco::make_network_order 279 | ( 280 | endian value 281 | ) -> network_order 282 | { 283 | return network_order((T)value); 284 | } 285 | 286 | 287 | //============================================================================== 288 | template 289 | auto maniscalco::make_network_order 290 | ( 291 | T value 292 | ) -> network_order 293 | { 294 | return network_order((T)value); 295 | } 296 | -------------------------------------------------------------------------------- /src/include/endian/endian_swap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "./endian_type.h" 4 | #include "./byte_swap.h" 5 | #include 6 | 7 | 8 | namespace maniscalco 9 | { 10 | 11 | namespace impl 12 | { 13 | 14 | template 15 | < 16 | typename, 17 | typename, 18 | typename = void 19 | > 20 | struct endian_swap; 21 | 22 | 23 | //====================================================================== 24 | // specialization for from == to (no byte swap) 25 | template 26 | < 27 | typename from_endian, 28 | typename to_endian 29 | > 30 | struct endian_swap 31 | < 32 | from_endian, 33 | to_endian, 34 | typename std::enable_if 35 | < 36 | std::is_same 37 | < 38 | from_endian, 39 | to_endian 40 | >::value 41 | >::type 42 | > 43 | { 44 | template 45 | inline data_type operator() 46 | ( 47 | data_type input 48 | ) const 49 | { 50 | return input; 51 | } 52 | }; 53 | 54 | 55 | //====================================================================== 56 | // specialization for from != to (do byte swap) 57 | template 58 | < 59 | typename from_endian, 60 | typename to_endian 61 | > 62 | struct endian_swap 63 | < 64 | from_endian, 65 | to_endian, 66 | typename std::enable_if 67 | < 68 | !std::is_same 69 | < 70 | from_endian, 71 | to_endian 72 | >::value 73 | >::type 74 | > 75 | { 76 | template 77 | inline data_type operator() 78 | ( 79 | data_type input 80 | ) const 81 | { 82 | return byte_swap(input); 83 | } 84 | }; 85 | 86 | } 87 | 88 | 89 | //========================================================================== 90 | // static 91 | // do a byte swap from one endian to another as speicified 92 | template 93 | < 94 | typename from_endian, 95 | typename to_endian, 96 | typename data_type 97 | > 98 | static inline data_type endian_swap 99 | ( 100 | data_type input 101 | ) 102 | { 103 | return maniscalco::impl::endian_swap()(input); 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /src/include/endian/endian_type.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | namespace maniscalco 5 | { 6 | 7 | struct big_endian_type; 8 | struct little_endian_type; 9 | 10 | using network_order_type = big_endian_type; 11 | using host_order_type = little_endian_type; 12 | // using host_order_type = big_endian_type; 13 | } 14 | -------------------------------------------------------------------------------- /src/library/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(msufsort) -------------------------------------------------------------------------------- /src/library/msufsort.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "./msufsort/msufsort.h" 4 | 5 | -------------------------------------------------------------------------------- /src/library/msufsort/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_library(LIBCXX_LIB c++) 2 | find_package(Threads) 3 | find_library(LIBCXXABI_LIB c++abi) 4 | 5 | link_libraries( 6 | ${LIBCXX_LIB} 7 | ${LIBCXXABI_LIB} 8 | ) 9 | 10 | add_library(msufsort 11 | msufsort.cpp 12 | ) 13 | 14 | target_link_libraries(msufsort ${CMAKE_THREAD_LIBS_INIT}) 15 | 16 | target_include_directories(msufsort 17 | PUBLIC 18 | $ 19 | $ 20 | ) 21 | 22 | target_compile_features(msufsort PUBLIC cxx_std_17) 23 | -------------------------------------------------------------------------------- /src/library/msufsort/msufsort.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Michael A Maniscalco 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | 26 | //#define VERBOSE 27 | 28 | #include "./msufsort.h" 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | 38 | //============================================================================== 39 | maniscalco::msufsort::msufsort 40 | ( 41 | int32_t numThreads 42 | ): 43 | inputBegin_(nullptr), 44 | inputEnd_(nullptr), 45 | inputSize_(), 46 | getValueEnd_(nullptr), 47 | getValueMaxIndex_(), 48 | copyEnd_(), 49 | suffixArrayBegin_(nullptr), 50 | suffixArrayEnd_(nullptr), 51 | inverseSuffixArrayBegin_(nullptr), 52 | inverseSuffixArrayEnd_(nullptr), 53 | frontBucketOffset_(), 54 | backBucketOffset_(new std::int32_t *[0x10000]{}), 55 | aCount_(), 56 | bCount_(), 57 | workerThreads_(new worker_thread[numThreads - 1]), 58 | numWorkerThreads_(numThreads - 1) 59 | { 60 | } 61 | 62 | 63 | //============================================================================== 64 | maniscalco::msufsort::~msufsort 65 | ( 66 | ) 67 | { 68 | } 69 | 70 | 71 | //============================================================================== 72 | template 73 | void maniscalco::msufsort::post_task_to_thread 74 | ( 75 | // private: 76 | // post task to the specified worker thread. 77 | // if threadId == the number of worker threads then use this thread instead 78 | int32_t threadId, 79 | F && function, 80 | argument_types && ... arguments 81 | ) 82 | { 83 | if (threadId == numWorkerThreads_) 84 | std::bind(std::forward(function), std::forward(arguments) ...)(); 85 | else 86 | workerThreads_[threadId].post_task(function, std::forward(arguments) ...); 87 | } 88 | 89 | 90 | //============================================================================== 91 | void maniscalco::msufsort::wait_for_all_tasks_completed 92 | ( 93 | // private: 94 | // wait for all currently posted tasks to be completed. 95 | ) const 96 | { 97 | for (auto threadId = 0; threadId < numWorkerThreads_; ++threadId) 98 | workerThreads_[threadId].wait(); 99 | } 100 | 101 | 102 | //============================================================================== 103 | inline auto maniscalco::msufsort::get_suffix_type 104 | ( 105 | uint8_t const * suffix 106 | ) -> suffix_type 107 | { 108 | if ((suffix + 1) >= inputEnd_) 109 | return a; 110 | if (suffix[0] >= suffix[1]) 111 | { 112 | auto p = suffix + 1; 113 | while ((p < inputEnd_) && (*p == suffix[0])) 114 | ++p; 115 | if ((p == inputEnd_) || (suffix[0] > p[0])) 116 | return a; 117 | return b; 118 | } 119 | auto p = suffix + 2; 120 | while ((p < inputEnd_) && (*p == suffix[1])) 121 | ++p; 122 | if ((p == inputEnd_) || (suffix[1] > p[0])) 123 | return bStar; 124 | return b; 125 | } 126 | 127 | 128 | //============================================================================== 129 | inline auto maniscalco::msufsort::get_value 130 | ( 131 | uint8_t const * inputCurrent, 132 | std::int32_t index 133 | ) const -> suffix_value 134 | { 135 | inputCurrent += (index & sa_index_mask); 136 | if (inputCurrent >= getValueEnd_) 137 | { 138 | if (inputCurrent >= inputEnd_) 139 | return 0; 140 | inputCurrent = (copyEnd_ + (sizeof(suffix_value) - std::distance(inputCurrent, inputEnd_))); 141 | } 142 | return endian_swap(*(suffix_value const *)(inputCurrent)); 143 | } 144 | 145 | 146 | //============================================================================== 147 | inline bool maniscalco::msufsort::compare_suffixes 148 | ( 149 | // optimized compare_suffixes for when two suffixes have long common match lengths 150 | std::uint8_t const * inputBegin, 151 | std::int32_t indexA, 152 | std::int32_t indexB 153 | ) const 154 | { 155 | indexA &= sa_index_mask; 156 | indexB &= sa_index_mask; 157 | 158 | if (indexA > indexB) 159 | return !compare_suffixes(inputBegin, indexB, indexA); 160 | 161 | auto inputCurrentA = inputBegin + indexA; 162 | auto inputCurrentB = inputBegin + indexB; 163 | while ((inputCurrentB <= getValueEnd_) && (*(suffix_value const *)inputCurrentB == *(suffix_value const *)inputCurrentA)) 164 | { 165 | inputCurrentB += sizeof(suffix_value); 166 | inputCurrentA += sizeof(suffix_value); 167 | } 168 | if (inputCurrentB >= getValueEnd_) 169 | { 170 | if (inputCurrentB >= inputEnd_) 171 | return true; 172 | inputCurrentB = (copyEnd_ + (sizeof(suffix_value) - std::distance(inputCurrentB, inputEnd_))); 173 | } 174 | auto valueB = endian_swap(*(suffix_value const *)(inputCurrentB)); 175 | 176 | if (inputCurrentA >= getValueEnd_) 177 | inputCurrentA = (copyEnd_ + (sizeof(suffix_value) - std::distance(inputCurrentA, inputEnd_))); 178 | auto valueA = endian_swap(*(suffix_value const *)(inputCurrentA)); 179 | return (valueA >= valueB); 180 | } 181 | 182 | 183 | //============================================================================== 184 | inline int maniscalco::msufsort::compare_suffixes 185 | ( 186 | // optimized compare_suffixes for when two suffixes have long common match lengths 187 | std::uint8_t const * inputBegin, 188 | std::int32_t indexA, 189 | std::int32_t indexB, 190 | std::size_t maxLength 191 | ) const 192 | { 193 | indexA &= sa_index_mask; 194 | indexB &= sa_index_mask; 195 | 196 | if (indexA > indexB) 197 | return -compare_suffixes(inputBegin, indexB, indexA); 198 | 199 | auto inputCurrentA = inputBegin + indexA; 200 | auto inputCurrentB = inputBegin + indexB; 201 | while ((maxLength >= 4) && (inputCurrentB <= getValueEnd_) && (*(suffix_value const *)inputCurrentB == *(suffix_value const *)inputCurrentA)) 202 | { 203 | inputCurrentB += sizeof(suffix_value); 204 | inputCurrentA += sizeof(suffix_value); 205 | maxLength -= 4; 206 | } 207 | if (inputCurrentB >= getValueEnd_) 208 | { 209 | if (inputCurrentB >= inputEnd_) 210 | return -1; 211 | inputCurrentB = (copyEnd_ + (sizeof(suffix_value) - std::distance(inputCurrentB, inputEnd_))); 212 | } 213 | auto valueB = endian_swap(*(suffix_value const *)(inputCurrentB)); 214 | 215 | if (inputCurrentA >= getValueEnd_) 216 | inputCurrentA = (copyEnd_ + (sizeof(suffix_value) - std::distance(inputCurrentA, inputEnd_))); 217 | auto valueA = endian_swap(*(suffix_value const *)(inputCurrentA)); 218 | return (valueB - valueA); 219 | } 220 | 221 | 222 | //============================================================================== 223 | void maniscalco::msufsort::multikey_insertion_sort 224 | ( 225 | // private: 226 | // sorts the suffixes by insertion sort 227 | std::int32_t * partitionBegin, 228 | std::int32_t * partitionEnd, 229 | std::int32_t currentMatchLength, 230 | suffix_value startingPattern, 231 | std::array endingPattern, 232 | std::vector & tandemRepeatStack 233 | ) 234 | { 235 | std::int32_t partitionSize = (std::int32_t)std::distance(partitionBegin, partitionEnd); 236 | if (partitionSize < 2) 237 | return; 238 | struct partition_info 239 | { 240 | std::int32_t currentMatchLength_; 241 | std::int32_t size_; 242 | suffix_value startingPattern_; 243 | suffix_value endingPattern_; 244 | bool hasPotentialTandemRepeats_; 245 | }; 246 | partition_info stack[insertion_sort_threshold]; 247 | stack[0] = {currentMatchLength, partitionSize, startingPattern, endingPattern[0], false}; 248 | auto stackTop = stack + 1; 249 | 250 | while (stackTop-- != stack) 251 | { 252 | auto currentMatchLength = stackTop->currentMatchLength_; 253 | auto size = stackTop->size_; 254 | endingPattern[0] = stackTop->endingPattern_; 255 | auto hasPotentialTandemRepeats = stackTop->hasPotentialTandemRepeats_; 256 | startingPattern = stackTop->startingPattern_; 257 | 258 | if (size <= 2) 259 | { 260 | if ((size == 2) && (compare_suffixes(inputBegin_ + currentMatchLength, partitionBegin[0], partitionBegin[1]))) 261 | std::swap(partitionBegin[0], partitionBegin[1]); 262 | partitionBegin += size; 263 | } 264 | else 265 | { 266 | if (currentMatchLength >= min_match_length_for_tandem_repeats) 267 | { 268 | if (hasPotentialTandemRepeats) 269 | { 270 | auto tandemRepeatCount = partition_tandem_repeats(partitionBegin, partitionBegin + size, currentMatchLength, tandemRepeatStack); 271 | size -= tandemRepeatCount; 272 | partitionBegin += tandemRepeatCount; 273 | if (size == 0) 274 | continue; 275 | } 276 | } 277 | 278 | suffix_value value[insertion_sort_threshold]; 279 | value[0] = get_value(inputBegin_ + currentMatchLength, partitionBegin[0]); 280 | for (std::int32_t i = 1; i < size; ++i) 281 | { 282 | auto currentIndex = partitionBegin[i]; 283 | suffix_value currentValue = get_value(inputBegin_ + currentMatchLength, partitionBegin[i]); 284 | auto j = i; 285 | while ((j > 0) && (value[j - 1] > currentValue)) 286 | { 287 | value[j] = value[j - 1]; 288 | partitionBegin[j] = partitionBegin[j - 1]; 289 | --j; 290 | } 291 | value[j] = currentValue; 292 | partitionBegin[j] = currentIndex; 293 | } 294 | 295 | auto i = (std::int32_t)size - 1; 296 | auto nextMatchLength = currentMatchLength + (std::int32_t)sizeof(suffix_value); 297 | while (i >= 0) 298 | { 299 | std::int32_t start = i--; 300 | auto startValue = value[start]; 301 | while ((i >= 0) && (value[i] == startValue)) 302 | --i; 303 | auto partitionSize = (start - i); 304 | auto potentialTandemRepeats = has_potential_tandem_repeats(startingPattern, {endingPattern[0], startValue}); 305 | if (nextMatchLength == (2 + sizeof(suffix_value))) 306 | startingPattern = get_value(inputBegin_, *partitionBegin); 307 | *stackTop++ = partition_info{nextMatchLength, partitionSize, startingPattern, startValue, potentialTandemRepeats}; 308 | } 309 | 310 | } 311 | } 312 | } 313 | 314 | 315 | //============================================================================== 316 | inline bool maniscalco::msufsort::has_potential_tandem_repeats 317 | ( 318 | suffix_value startingPattern, 319 | std::array endingPattern 320 | ) const 321 | { 322 | if (!tandemRepeatSortEnabled_) 323 | return false; 324 | std::int8_t const * end = (std::int8_t const *)endingPattern.data(); 325 | std::int8_t const * begin = end + sizeof(suffix_value); 326 | while (begin > end) 327 | if (*(suffix_value const *)--begin == *(suffix_value *)&startingPattern) 328 | return true; 329 | return false; 330 | } 331 | 332 | 333 | //============================================================================== 334 | std::size_t maniscalco::msufsort::partition_tandem_repeats 335 | ( 336 | // private: 337 | // the tandem repeat sort. determines if the suffixes provided are tandem repeats 338 | // of other suffixes from within the same group. If so, sorts the non tandem 339 | // repeat suffixes and then induces the sorted order of the suffixes which are 340 | // tandem repeats. 341 | std::int32_t * partitionBegin, 342 | std::int32_t * partitionEnd, 343 | std::int32_t currentMatchLength, 344 | std::vector & tandemRepeatStack 345 | ) 346 | { 347 | auto parititionSize = std::distance(partitionBegin, partitionEnd); 348 | std::sort(partitionBegin, partitionEnd, [](std::int32_t a, std::int32_t b) -> bool{return ((a & sa_index_mask) < (b & sa_index_mask));}); 349 | std::int32_t tandemRepeatLength = 0; 350 | auto const halfCurrentMatchLength = (currentMatchLength >> 1); 351 | 352 | // determine if there are tandem repeats and, if so, what the tandem repeat length is. 353 | auto previousSuffixIndex = (partitionBegin[0] & sa_index_mask); 354 | for (auto cur = partitionBegin + 1; ((tandemRepeatLength == 0) && (cur < partitionEnd)); ++cur) 355 | { 356 | auto currentSuffixIndex = (*cur & sa_index_mask); 357 | if ((previousSuffixIndex + halfCurrentMatchLength) >= currentSuffixIndex) 358 | tandemRepeatLength = (currentSuffixIndex - previousSuffixIndex); 359 | previousSuffixIndex = currentSuffixIndex; 360 | } 361 | if (tandemRepeatLength == 0) 362 | return 0; // no tandem repeats were found 363 | // tandem repeats detected. 364 | std::int32_t * terminatorsEnd = partitionEnd - 1; 365 | previousSuffixIndex = (partitionEnd[-1] & sa_index_mask); 366 | for (auto cur = partitionEnd - 2; cur >= partitionBegin; --cur) 367 | { 368 | auto currentSuffixIndex = (*cur & sa_index_mask); 369 | if ((previousSuffixIndex - currentSuffixIndex) == tandemRepeatLength) 370 | std::swap(*terminatorsEnd--, *cur);// suffix is a tandem repeat 371 | previousSuffixIndex = currentSuffixIndex; 372 | } 373 | auto numTerminators = (std::distance(partitionBegin, terminatorsEnd) + 1); 374 | std::reverse(partitionBegin, partitionEnd); 375 | tandemRepeatStack.push_back(tandem_repeat_info(partitionBegin, partitionEnd, (std::int32_t)numTerminators, tandemRepeatLength)); 376 | return (parititionSize - numTerminators); 377 | } 378 | 379 | 380 | //====================================================================================================================== 381 | void maniscalco::msufsort::complete_tandem_repeats 382 | ( 383 | std::vector & tandemRepeatStack 384 | ) 385 | { 386 | while (!tandemRepeatStack.empty()) 387 | { 388 | tandem_repeat_info tandemRepeat = tandemRepeatStack.back(); 389 | tandemRepeatStack.pop_back(); 390 | complete_tandem_repeat(tandemRepeat.partitionBegin_, tandemRepeat.partitionEnd_, tandemRepeat.numTerminators_, tandemRepeat.tandemRepeatLength_); 391 | } 392 | } 393 | 394 | 395 | //====================================================================================================================== 396 | inline void maniscalco::msufsort::complete_tandem_repeat 397 | ( 398 | std::int32_t * partitionBegin, 399 | std::int32_t * partitionEnd, 400 | std::int32_t numTerminators, 401 | std::int32_t tandemRepeatLength 402 | ) 403 | { 404 | std::int32_t * terminatorsBegin = partitionEnd - numTerminators; 405 | for (auto cur = terminatorsBegin - 1; cur >= partitionBegin; --cur) 406 | { 407 | auto currentSuffixIndex = (*cur & sa_index_mask); 408 | inverseSuffixArrayBegin_[currentSuffixIndex >> 1] = (tandemRepeatLength | is_tandem_repeat_length); 409 | } 410 | // now use sorted order of terminators to determine sorted order of repeats. 411 | // figure out how many terminators sort before the repeat and how 412 | // many sort after the repeat. put them on left and right extremes of the array. 413 | std::int32_t m = 0; 414 | std::int32_t a = 0; 415 | std::int32_t b = numTerminators - 1; 416 | std::int32_t numTypeA = 0; 417 | while (a <= b) 418 | { 419 | m = (a + b) >> 1; 420 | if (!compare_suffixes(inputBegin_, terminatorsBegin[m], terminatorsBegin[m] + tandemRepeatLength)) 421 | { 422 | numTypeA = m; 423 | b = m - 1; 424 | } 425 | else 426 | { 427 | numTypeA = m + 1; 428 | a = m + 1; 429 | } 430 | } 431 | if (numTypeA > numTerminators) 432 | numTypeA = numTerminators; 433 | std::int32_t numTypeB = (numTerminators - numTypeA); 434 | 435 | for (std::int32_t i = 0; i < numTypeA; ++i) 436 | partitionBegin[i] = terminatorsBegin[i]; 437 | 438 | // type A repeats 439 | auto current = partitionBegin; 440 | auto currentEnd = current + numTypeA; 441 | auto next = currentEnd; 442 | while (current != currentEnd) 443 | { 444 | while (current != currentEnd) 445 | { 446 | auto index = (*current++ & sa_index_mask); 447 | if (index >= tandemRepeatLength) 448 | { 449 | auto potentialTandemRepeatIndex = index - tandemRepeatLength; 450 | auto isaValue = inverseSuffixArrayBegin_[potentialTandemRepeatIndex >> 1]; 451 | if ((isaValue & is_tandem_repeat_length) && ((isaValue & isa_index_mask) == tandemRepeatLength)) 452 | { 453 | auto flag = ((potentialTandemRepeatIndex > 0) && (inputBegin_[potentialTandemRepeatIndex - 1] <= inputBegin_[potentialTandemRepeatIndex])) ? 0 : preceding_suffix_is_type_a_flag; 454 | *(next) = (potentialTandemRepeatIndex | flag); 455 | ++next; 456 | } 457 | } 458 | } 459 | currentEnd = next; 460 | } 461 | // type B repeats 462 | current = partitionEnd - 1; 463 | currentEnd = current - numTypeB; 464 | next = currentEnd; 465 | while (current != currentEnd) 466 | { 467 | while (current != currentEnd) 468 | { 469 | auto index = (*current-- & sa_index_mask); 470 | if (index >= tandemRepeatLength) 471 | { 472 | auto potentialTandemRepeatIndex = index - tandemRepeatLength; 473 | auto isaValue = inverseSuffixArrayBegin_[potentialTandemRepeatIndex >> 1]; 474 | if ((isaValue & is_tandem_repeat_length) && ((isaValue & isa_index_mask) == tandemRepeatLength)) 475 | { 476 | auto flag = ((potentialTandemRepeatIndex > 0) && (inputBegin_[potentialTandemRepeatIndex - 1] <= inputBegin_[potentialTandemRepeatIndex])) ? 0 : preceding_suffix_is_type_a_flag; 477 | *(next) = (potentialTandemRepeatIndex | flag); 478 | --next; 479 | } 480 | } 481 | } 482 | currentEnd = next; 483 | } 484 | } 485 | 486 | 487 | //============================================================================== 488 | auto maniscalco::msufsort::multikey_quicksort 489 | ( 490 | // private: 491 | // multi key quicksort on the input data provided 492 | std::int32_t * suffixArrayBegin, 493 | std::int32_t * suffixArrayEnd, 494 | std::size_t currentMatchLength, 495 | suffix_value startingPattern, 496 | std::array endingPattern, 497 | std::vector & tandemRepeatStack 498 | ) -> std::int32_t * 499 | { 500 | std::vector stack; 501 | stack.reserve((1 << 10) * 32); 502 | stack.push_back({suffixArrayBegin, suffixArrayEnd, currentMatchLength, startingPattern, endingPattern, tandemRepeatStack}); 503 | 504 | while (!stack.empty()) 505 | { 506 | auto & s = stack.back(); 507 | suffixArrayBegin = s.suffixArrayBegin; 508 | suffixArrayEnd = s.suffixArrayEnd; 509 | currentMatchLength = s.currentMatchLength; 510 | startingPattern = s.startingPattern; 511 | endingPattern = s.endingPattern; 512 | tandemRepeatStack = s.tandemRepeatStack; 513 | stack.pop_back(); 514 | 515 | std::uint64_t partitionSize = std::distance(suffixArrayBegin, suffixArrayEnd); 516 | if (partitionSize < 2) 517 | continue; 518 | 519 | if (currentMatchLength >= min_match_length_for_tandem_repeats) 520 | { 521 | if (currentMatchLength == min_match_length_for_tandem_repeats) 522 | startingPattern = get_value(inputBegin_, *suffixArrayBegin); 523 | if ((partitionSize > 1) && (has_potential_tandem_repeats(startingPattern, endingPattern))) 524 | suffixArrayBegin += partition_tandem_repeats(suffixArrayBegin, suffixArrayEnd, currentMatchLength, tandemRepeatStack); 525 | partitionSize = std::distance(suffixArrayBegin, suffixArrayEnd); 526 | } 527 | 528 | if (partitionSize < insertion_sort_threshold) 529 | { 530 | multikey_insertion_sort(suffixArrayBegin, suffixArrayEnd, currentMatchLength, startingPattern, endingPattern, tandemRepeatStack); 531 | continue; 532 | } 533 | 534 | // select three pivots 535 | auto offsetInputBegin = inputBegin_ + currentMatchLength; 536 | auto oneSixthOfPartitionSize = (partitionSize / 6); 537 | auto pivotCandidate1 = suffixArrayBegin + oneSixthOfPartitionSize; 538 | auto pivotCandidate2 = pivotCandidate1 + oneSixthOfPartitionSize; 539 | auto pivotCandidate3 = pivotCandidate2 + oneSixthOfPartitionSize; 540 | auto pivotCandidate4 = pivotCandidate3 + oneSixthOfPartitionSize; 541 | auto pivotCandidate5 = pivotCandidate4 + oneSixthOfPartitionSize; 542 | auto pivotCandidateValue1 = get_value(offsetInputBegin, *pivotCandidate1); 543 | auto pivotCandidateValue2 = get_value(offsetInputBegin, *pivotCandidate2); 544 | auto pivotCandidateValue3 = get_value(offsetInputBegin, *pivotCandidate3); 545 | auto pivotCandidateValue4 = get_value(offsetInputBegin, *pivotCandidate4); 546 | auto pivotCandidateValue5 = get_value(offsetInputBegin, *pivotCandidate5); 547 | if (pivotCandidateValue1 > pivotCandidateValue2) 548 | std::swap(*pivotCandidate1, *pivotCandidate2), std::swap(pivotCandidateValue1, pivotCandidateValue2); 549 | if (pivotCandidateValue4 > pivotCandidateValue5) 550 | std::swap(*pivotCandidate4, *pivotCandidate5), std::swap(pivotCandidateValue4, pivotCandidateValue5); 551 | if (pivotCandidateValue1 > pivotCandidateValue3) 552 | std::swap(*pivotCandidate1, *pivotCandidate3), std::swap(pivotCandidateValue1, pivotCandidateValue3); 553 | if (pivotCandidateValue2 > pivotCandidateValue3) 554 | std::swap(*pivotCandidate2, *pivotCandidate3), std::swap(pivotCandidateValue2, pivotCandidateValue3); 555 | if (pivotCandidateValue1 > pivotCandidateValue4) 556 | std::swap(*pivotCandidate1, *pivotCandidate4), std::swap(pivotCandidateValue1, pivotCandidateValue4); 557 | if (pivotCandidateValue3 > pivotCandidateValue4) 558 | std::swap(*pivotCandidate3, *pivotCandidate4), std::swap(pivotCandidateValue3, pivotCandidateValue4); 559 | if (pivotCandidateValue2 > pivotCandidateValue5) 560 | std::swap(*pivotCandidate2, *pivotCandidate5), std::swap(pivotCandidateValue2, pivotCandidateValue5); 561 | if (pivotCandidateValue2 > pivotCandidateValue3) 562 | std::swap(*pivotCandidate2, *pivotCandidate3), std::swap(pivotCandidateValue2, pivotCandidateValue3); 563 | if (pivotCandidateValue4 > pivotCandidateValue5) 564 | std::swap(*pivotCandidate4, *pivotCandidate5), std::swap(pivotCandidateValue4, pivotCandidateValue5); 565 | auto pivot1 = pivotCandidateValue1; 566 | auto pivot2 = pivotCandidateValue3; 567 | auto pivot3 = pivotCandidateValue5; 568 | 569 | // partition seven ways 570 | auto curSuffix = suffixArrayBegin; 571 | auto beginPivot1 = suffixArrayBegin; 572 | auto endPivot1 = suffixArrayBegin; 573 | auto beginPivot2 = suffixArrayBegin; 574 | auto endPivot2 = suffixArrayEnd - 1; 575 | auto beginPivot3 = endPivot2; 576 | auto endPivot3 = endPivot2; 577 | 578 | std::swap(*curSuffix++, *pivotCandidate1); 579 | beginPivot2 += (pivot1 != pivot2); 580 | endPivot1 += (pivot1 != pivot2); 581 | std::swap(*curSuffix++, *pivotCandidate3); 582 | if (pivot2 != pivot3) 583 | { 584 | std::swap(*endPivot2--, *pivotCandidate5); 585 | --beginPivot3; 586 | } 587 | auto currentValue = get_value(offsetInputBegin, *curSuffix); 588 | auto nextValue = get_value(offsetInputBegin, curSuffix[1]); 589 | auto nextDValue = get_value(offsetInputBegin, *endPivot2); 590 | 591 | while (curSuffix <= endPivot2) 592 | { 593 | if (currentValue <= pivot2) 594 | { 595 | auto temp = nextValue; 596 | nextValue = get_value(offsetInputBegin, curSuffix[2]); 597 | if (currentValue < pivot2) 598 | { 599 | std::swap(*beginPivot2, *curSuffix); 600 | if (currentValue <= pivot1) 601 | { 602 | if (currentValue < pivot1) 603 | std::swap(*beginPivot1++, *beginPivot2); 604 | std::swap(*endPivot1++, *beginPivot2); 605 | } 606 | ++beginPivot2; 607 | } 608 | ++curSuffix; 609 | currentValue = temp; 610 | } 611 | else 612 | { 613 | auto nextValue = get_value(offsetInputBegin, endPivot2[-1]); 614 | std::swap(*endPivot2, *curSuffix); 615 | if (currentValue >= pivot3) 616 | { 617 | if (currentValue > pivot3) 618 | std::swap(*endPivot2, *endPivot3--); 619 | std::swap(*endPivot2, *beginPivot3--); 620 | } 621 | --endPivot2; 622 | currentValue = nextDValue; 623 | nextDValue = nextValue; 624 | } 625 | } 626 | if (++endPivot3 != suffixArrayEnd) 627 | stack.push_back({endPivot3, suffixArrayEnd, currentMatchLength, startingPattern, endingPattern, tandemRepeatStack}); 628 | if (++beginPivot3 != endPivot3) 629 | stack.push_back({beginPivot3, endPivot3, (currentMatchLength + sizeof(suffix_value)), startingPattern, {endingPattern[1], pivot3}, tandemRepeatStack}); 630 | if (++endPivot2 != beginPivot3) 631 | stack.push_back({endPivot2, beginPivot3, currentMatchLength, startingPattern, endingPattern, tandemRepeatStack}); 632 | if (beginPivot2 != endPivot2) 633 | stack.push_back({beginPivot2, endPivot2, (currentMatchLength + sizeof(suffix_value)), startingPattern, {endingPattern[1], pivot2}, tandemRepeatStack}); 634 | if (endPivot1 != beginPivot2) 635 | stack.push_back({endPivot1, beginPivot2, currentMatchLength, startingPattern, endingPattern, tandemRepeatStack}); 636 | if (beginPivot1 != endPivot1) 637 | stack.push_back({beginPivot1, endPivot1, (currentMatchLength + sizeof(suffix_value)), startingPattern, {endingPattern[1], pivot1}, tandemRepeatStack}); 638 | if (suffixArrayBegin != beginPivot1) 639 | stack.push_back({suffixArrayBegin, beginPivot1, currentMatchLength, startingPattern, endingPattern, tandemRepeatStack}); 640 | } 641 | return suffixArrayEnd; 642 | } 643 | 644 | 645 | //============================================================================== 646 | void maniscalco::msufsort::second_stage_its_right_to_left_pass_multi_threaded 647 | ( 648 | // private: 649 | // induce sorted position of B suffixes from sorted B* suffixes 650 | // This is the first half of the second stage of the ITS ... the 'right to left' pass 651 | ) 652 | { 653 | auto numThreads = (int32_t)(numWorkerThreads_ + 1); // +1 for main thread 654 | auto max_cache_size = (1 << 12); 655 | struct entry_type 656 | { 657 | uint8_t precedingSuffix_; 658 | int32_t precedingSuffixIndex_; 659 | }; 660 | std::unique_ptr cache[numThreads]; 661 | for (auto i = 0; i < numThreads; ++i) 662 | cache[i].reset(new entry_type[max_cache_size]); 663 | int32_t numSuffixes[numThreads]; 664 | for (auto & e : numSuffixes) 665 | e = 0; 666 | int32_t sCount[numThreads][0x100]; 667 | for (auto & e1 : sCount) 668 | for (auto & e2 : e1) 669 | e2 = 0; 670 | std::int32_t * dest[numThreads][0x100]; 671 | for (auto & e1 : dest) 672 | for (auto & e2 : e1) 673 | e2 = 0; 674 | auto currentSuffix = suffixArrayBegin_ + inputSize_; 675 | for (auto symbol = 0xff; symbol >= 0; --symbol) 676 | { 677 | auto backBucketOffset = &backBucketOffset_[symbol << 8]; 678 | auto endSuffix = currentSuffix - bCount_[symbol]; 679 | 680 | while (currentSuffix > endSuffix) 681 | { 682 | // determine how many B/B* suffixes are safe to process during this pass 683 | auto maxEnd = currentSuffix - (max_cache_size * numThreads); 684 | if (maxEnd < suffixArrayBegin_) 685 | maxEnd = suffixArrayBegin_; 686 | if (maxEnd < endSuffix) 687 | maxEnd = endSuffix; 688 | auto temp = currentSuffix; 689 | while ((temp > maxEnd) && (*temp != suffix_is_unsorted_b_type)) 690 | --temp; 691 | auto totalSuffixesPerThread = ((std::distance(temp, currentSuffix) + numThreads - 1) / numThreads); 692 | 693 | // process suffixes 694 | for (auto threadId = 0; threadId < numThreads; ++threadId) 695 | { 696 | numSuffixes[threadId] = 0; 697 | auto endForThisThread = currentSuffix - totalSuffixesPerThread; 698 | if (endForThisThread < temp) 699 | endForThisThread = temp; 700 | post_task_to_thread 701 | ( 702 | threadId, 703 | []( 704 | uint8_t const * inputBegin, 705 | std::int32_t * begin, 706 | std::int32_t * end, 707 | entry_type * cache, 708 | int32_t & numSuffixes, 709 | int32_t * suffixCount 710 | ) 711 | { 712 | auto curCache = cache; 713 | ++begin; 714 | uint8_t currentPrecedingSymbol = 0; 715 | int32_t currentPrecedingSymbolCount = 0; 716 | while (--begin > end) 717 | { 718 | if ((*begin & preceding_suffix_is_type_a_flag) == 0) 719 | { 720 | int32_t precedingSuffixIndex = ((*begin & sa_index_mask) - 1); 721 | auto precedingSuffix = (inputBegin + precedingSuffixIndex); 722 | auto precedingSymbol = precedingSuffix[0]; 723 | int32_t flag = ((precedingSuffixIndex > 0) && (precedingSuffix[-1] <= precedingSymbol)) ? 0 : preceding_suffix_is_type_a_flag; 724 | *curCache++ = {precedingSymbol, precedingSuffixIndex | flag}; 725 | if (precedingSymbol != currentPrecedingSymbol) 726 | { 727 | suffixCount[currentPrecedingSymbol] += currentPrecedingSymbolCount; 728 | currentPrecedingSymbol = precedingSymbol; 729 | currentPrecedingSymbolCount = 0; 730 | } 731 | ++currentPrecedingSymbolCount; 732 | } 733 | } 734 | suffixCount[currentPrecedingSymbol] += currentPrecedingSymbolCount; 735 | numSuffixes = std::distance(cache, curCache); 736 | }, inputBegin_, currentSuffix, endForThisThread, cache[threadId].get(), std::ref(numSuffixes[threadId]), sCount[threadId] 737 | ); 738 | currentSuffix = endForThisThread; 739 | } 740 | wait_for_all_tasks_completed(); 741 | 742 | // 743 | for (auto threadId = 0, begin = 0, numSymbolsPerThread = ((0x100 + numThreads - 1) / numThreads); threadId < numThreads; ++threadId) 744 | { 745 | auto end = begin + numSymbolsPerThread; 746 | if (end > 0x100) 747 | end = 0x100; 748 | post_task_to_thread 749 | ( 750 | threadId, 751 | [&dest, &backBucketOffset, &sCount, numThreads] 752 | ( 753 | int32_t begin, 754 | int32_t end 755 | ) 756 | { 757 | for (auto threadId = 0; threadId < numThreads; ++threadId) 758 | for (auto symbol = begin; symbol < end; ++symbol) 759 | { 760 | dest[threadId][symbol] = backBucketOffset[symbol]; 761 | backBucketOffset[symbol] -= sCount[threadId][symbol]; 762 | sCount[threadId][symbol] = 0; 763 | } 764 | }, begin, end 765 | ); 766 | begin = end; 767 | } 768 | wait_for_all_tasks_completed(); 769 | 770 | // 771 | for (auto threadId = 0; threadId < numThreads; ++threadId) 772 | post_task_to_thread 773 | ( 774 | threadId, 775 | [&]( 776 | std::int32_t * dest[0x100], 777 | entry_type const * begin, 778 | entry_type const * end 779 | ) 780 | { 781 | --begin; 782 | while (++begin < end) 783 | *(--dest[begin->precedingSuffix_]) = begin->precedingSuffixIndex_; 784 | }, 785 | dest[threadId], cache[threadId].get(), cache[threadId].get() + numSuffixes[threadId] 786 | ); 787 | wait_for_all_tasks_completed(); 788 | } 789 | currentSuffix -= aCount_[symbol]; 790 | } 791 | } 792 | 793 | 794 | //============================================================================== 795 | void maniscalco::msufsort::second_stage_its_right_to_left_pass_single_threaded 796 | ( 797 | // private: 798 | // induce sorted position of B suffixes from sorted B* suffixes 799 | // This is the first half of the second stage of the ITS ... the 'right to left' pass 800 | ) 801 | { 802 | auto currentSuffix = suffixArrayBegin_ + inputSize_; 803 | for (auto i = 0xff; i >= 0; --i) 804 | { 805 | auto backBucketOffset = &backBucketOffset_[i << 8]; 806 | auto prevWrite = backBucketOffset; 807 | int32_t previousPrecedingSymbol = 0; 808 | auto endSuffix = currentSuffix - bCount_[i]; 809 | while (currentSuffix > endSuffix) 810 | { 811 | if ((*currentSuffix & preceding_suffix_is_type_a_flag) == 0) 812 | { 813 | int32_t precedingSuffixIndex = ((*currentSuffix & sa_index_mask) - 1); 814 | auto precedingSuffix = (inputBegin_ + precedingSuffixIndex); 815 | auto precedingSymbol = precedingSuffix[0]; 816 | int32_t flag = ((precedingSuffixIndex > 0) && (precedingSuffix[-1] <= precedingSymbol)) ? 0 : preceding_suffix_is_type_a_flag; 817 | if (precedingSymbol != previousPrecedingSymbol) 818 | { 819 | previousPrecedingSymbol = precedingSymbol; 820 | prevWrite = backBucketOffset + previousPrecedingSymbol; 821 | } 822 | *(--*prevWrite) = (precedingSuffixIndex | flag); 823 | } 824 | --currentSuffix; 825 | } 826 | currentSuffix -= aCount_[i]; 827 | } 828 | } 829 | 830 | 831 | //============================================================================== 832 | void maniscalco::msufsort::second_stage_its_left_to_right_pass_single_threaded 833 | ( 834 | // private: 835 | // induce sorted position of A suffixes from sorted B suffixes 836 | // This is the second half of the second stage of the ITS ... the 'left to right' pass 837 | ) 838 | { 839 | auto currentSuffix = suffixArrayBegin_ - 1; 840 | uint8_t previousPrecedingSymbol = 0; 841 | auto previousFrontBucketOffset = frontBucketOffset_; 842 | while (++currentSuffix < suffixArrayEnd_) 843 | { 844 | auto currentSuffixIndex = *currentSuffix; 845 | if (currentSuffixIndex & preceding_suffix_is_type_a_flag) 846 | { 847 | if ((currentSuffixIndex & sa_index_mask) != 0) 848 | { 849 | int32_t precedingSuffixIndex = ((currentSuffixIndex & sa_index_mask) - 1); 850 | auto precedingSuffix = (inputBegin_ + precedingSuffixIndex); 851 | auto precedingSymbol = precedingSuffix[0]; 852 | int32_t flag = ((precedingSuffixIndex > 0) && (precedingSuffix[-1] >= precedingSymbol)) ? preceding_suffix_is_type_a_flag : 0; 853 | if (precedingSymbol != previousPrecedingSymbol) 854 | { 855 | previousPrecedingSymbol = precedingSymbol; 856 | previousFrontBucketOffset = frontBucketOffset_ + previousPrecedingSymbol; 857 | } 858 | *((*previousFrontBucketOffset)++) = (precedingSuffixIndex | flag); 859 | } 860 | *(currentSuffix) &= sa_index_mask; 861 | } 862 | } 863 | } 864 | 865 | 866 | //============================================================================== 867 | void maniscalco::msufsort::second_stage_its_left_to_right_pass_multi_threaded 868 | ( 869 | // private: 870 | // induce sorted position of A suffixes from sorted B suffixes 871 | // This is the second half of the second stage of the ITS ... the 'left to right' pass 872 | ) 873 | { 874 | auto numThreads = (int32_t)(numWorkerThreads_ + 1); // +1 for main thread 875 | auto currentSuffix = suffixArrayBegin_; 876 | auto max_cache_size = (1 << 12); 877 | struct entry_type 878 | { 879 | uint8_t precedingSuffix_; 880 | int32_t precedingSuffixIndex_; 881 | }; 882 | std::unique_ptr cache[numThreads]; 883 | for (auto i = 0; i < numThreads; ++i) 884 | cache[i].reset(new entry_type[max_cache_size]); 885 | int32_t numSuffixes[numThreads]; 886 | for (auto & e1 : numSuffixes) 887 | e1 = 0; 888 | int32_t sCount[numThreads][0x100]; 889 | for (auto & e1 : sCount) 890 | for (auto & e2 : e1) 891 | e2 = 0; 892 | std::int32_t * dest[numThreads][0x100]; 893 | for (auto & e1 : dest) 894 | for (auto & e2 : e1) 895 | e2 = 0; 896 | 897 | while (currentSuffix < suffixArrayEnd_) 898 | { 899 | // calculate current 'safe' suffixes to process 900 | while ((currentSuffix < suffixArrayEnd_) && (!(*currentSuffix & preceding_suffix_is_type_a_flag))) 901 | ++currentSuffix; 902 | if (currentSuffix >= suffixArrayEnd_) 903 | break; 904 | 905 | auto begin = currentSuffix; 906 | auto maxEnd = begin + (max_cache_size * numThreads); 907 | if (maxEnd > suffixArrayEnd_) 908 | maxEnd = suffixArrayEnd_; 909 | currentSuffix += (currentSuffix != maxEnd); 910 | while ((currentSuffix != maxEnd) && (*currentSuffix != (int32_t)0x80000000)) 911 | ++currentSuffix; 912 | auto end = currentSuffix; 913 | auto totalSuffixes = std::distance(begin, end); 914 | auto totalSuffixesPerThread = ((totalSuffixes + numThreads - 1) / numThreads); 915 | 916 | 917 | for (auto threadId = 0; threadId < numThreads; ++threadId) 918 | { 919 | numSuffixes[threadId] = 0; 920 | auto endForThisThread = begin + totalSuffixesPerThread; 921 | if (endForThisThread > end) 922 | endForThisThread = end; 923 | post_task_to_thread 924 | ( 925 | threadId, 926 | []( 927 | uint8_t const * inputBegin, 928 | std::int32_t * begin, 929 | std::int32_t * end, 930 | entry_type * cache, 931 | int32_t & numSuffixes, 932 | int32_t * suffixCount 933 | ) 934 | { 935 | auto current = begin; 936 | auto curCache = cache; 937 | --current; 938 | uint8_t currentPrecedingSymbol = 0; 939 | int32_t currentPrecedingSymbolCount = 0; 940 | while (++current != end) 941 | { 942 | auto currentSuffixIndex = *current; 943 | if (currentSuffixIndex & preceding_suffix_is_type_a_flag) 944 | { 945 | currentSuffixIndex &= sa_index_mask; 946 | if (currentSuffixIndex != 0) 947 | { 948 | int32_t precedingSuffixIndex = (currentSuffixIndex - 1); 949 | auto precedingSuffix = (inputBegin + precedingSuffixIndex); 950 | auto precedingSymbol = precedingSuffix[0]; 951 | int32_t flag = ((precedingSuffixIndex > 0) && (precedingSuffix[-1] >= precedingSymbol)) ? preceding_suffix_is_type_a_flag : 0; 952 | *curCache++ = {precedingSymbol, precedingSuffixIndex | flag}; 953 | if (precedingSymbol != currentPrecedingSymbol) 954 | { 955 | suffixCount[currentPrecedingSymbol] += currentPrecedingSymbolCount; 956 | currentPrecedingSymbol = precedingSymbol; 957 | currentPrecedingSymbolCount = 0; 958 | } 959 | ++currentPrecedingSymbolCount; 960 | } 961 | *current = currentSuffixIndex; 962 | } 963 | } 964 | suffixCount[currentPrecedingSymbol] += currentPrecedingSymbolCount; 965 | numSuffixes = std::distance(cache, curCache); 966 | }, inputBegin_, begin, endForThisThread, cache[threadId].get(), std::ref(numSuffixes[threadId]), sCount[threadId] 967 | ); 968 | begin = endForThisThread; 969 | } 970 | wait_for_all_tasks_completed(); 971 | 972 | for (auto threadId = 0, begin = 0, numSymbolsPerThread = ((0x100 + numThreads - 1) / numThreads); threadId < numThreads; ++threadId) 973 | { 974 | auto end = begin + numSymbolsPerThread; 975 | if (end > 0x100) 976 | end = 0x100; 977 | post_task_to_thread 978 | ( 979 | threadId, 980 | [&dest, this, &sCount, numThreads] 981 | ( 982 | int32_t begin, 983 | int32_t end 984 | ) 985 | { 986 | for (auto threadId = 0; threadId < numThreads; ++threadId) 987 | for (auto symbol = begin; symbol < end; ++symbol) 988 | { 989 | dest[threadId][symbol] = frontBucketOffset_[symbol]; 990 | frontBucketOffset_[symbol] += sCount[threadId][symbol]; 991 | sCount[threadId][symbol] = 0; 992 | } 993 | }, begin, end 994 | ); 995 | begin = end; 996 | } 997 | wait_for_all_tasks_completed(); 998 | 999 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1000 | post_task_to_thread 1001 | ( 1002 | threadId, 1003 | []( 1004 | std::int32_t * dest[0x100], 1005 | entry_type const * begin, 1006 | entry_type const * end 1007 | ) 1008 | { 1009 | --begin; 1010 | while (++begin != end) 1011 | *(dest[begin->precedingSuffix_]++) = begin->precedingSuffixIndex_; 1012 | }, 1013 | dest[threadId], cache[threadId].get(), cache[threadId].get() + numSuffixes[threadId] 1014 | ); 1015 | wait_for_all_tasks_completed(); 1016 | } 1017 | } 1018 | 1019 | 1020 | //============================================================================== 1021 | void maniscalco::msufsort::second_stage_its 1022 | ( 1023 | // private: 1024 | // performs the the second stage of the improved two stage sort. 1025 | ) 1026 | { 1027 | if (numWorkerThreads_ == 0) 1028 | { 1029 | auto start = std::chrono::system_clock::now(); 1030 | second_stage_its_right_to_left_pass_single_threaded(); 1031 | auto finish = std::chrono::system_clock::now(); 1032 | #ifdef VERBOSE 1033 | std::cout << "second stage right to left pass time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1034 | #endif 1035 | start = std::chrono::system_clock::now(); 1036 | second_stage_its_left_to_right_pass_single_threaded(); 1037 | finish = std::chrono::system_clock::now(); 1038 | #ifdef VERBOSE 1039 | std::cout << "second stage left to right pass time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1040 | #endif 1041 | } 1042 | else 1043 | { 1044 | auto start = std::chrono::system_clock::now(); 1045 | second_stage_its_right_to_left_pass_multi_threaded(); 1046 | auto finish = std::chrono::system_clock::now(); 1047 | #ifdef VERBOSE 1048 | std::cout << "second stage right to left pass time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1049 | #endif 1050 | start = std::chrono::system_clock::now(); 1051 | second_stage_its_left_to_right_pass_multi_threaded(); 1052 | finish = std::chrono::system_clock::now(); 1053 | #ifdef VERBOSE 1054 | std::cout << "second stage left to right pass time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1055 | #endif 1056 | } 1057 | } 1058 | 1059 | 1060 | //============================================================================== 1061 | void maniscalco::msufsort::second_stage_its_as_burrows_wheeler_transform_right_to_left_pass_single_threaded 1062 | ( 1063 | // private: 1064 | // induce sorted position of B suffixes from sorted B* suffixes 1065 | // This is the first half of the second stage of the ITS ... the 'right to left' pass 1066 | ) 1067 | { 1068 | auto currentSuffix = suffixArrayBegin_ + inputSize_; 1069 | for (auto i = 0xff; i >= 0; --i) 1070 | { 1071 | auto backBucketOffset = &backBucketOffset_[i << 8]; 1072 | auto prevWrite = backBucketOffset; 1073 | int32_t previousPrecedingSymbol = 0; 1074 | auto endSuffix = currentSuffix - bCount_[i]; 1075 | while (currentSuffix > endSuffix) 1076 | { 1077 | int32_t precedingSuffixIndex = ((*currentSuffix & sa_index_mask) - 1); 1078 | auto precedingSuffix = (inputBegin_ + precedingSuffixIndex); 1079 | auto precedingSymbol = precedingSuffix[0]; 1080 | if ((*currentSuffix & preceding_suffix_is_type_a_flag) == 0) 1081 | { 1082 | int32_t flag = ((precedingSuffixIndex > 0) && (precedingSuffix[-1] <= precedingSymbol)) ? 0 : preceding_suffix_is_type_a_flag; 1083 | if (precedingSymbol != previousPrecedingSymbol) 1084 | { 1085 | previousPrecedingSymbol = precedingSymbol; 1086 | prevWrite = backBucketOffset + previousPrecedingSymbol; 1087 | } 1088 | *(--*prevWrite) = (precedingSuffixIndex | flag); 1089 | if (precedingSuffix >= inputBegin_) 1090 | *currentSuffix = precedingSymbol; 1091 | } 1092 | --currentSuffix; 1093 | } 1094 | currentSuffix -= aCount_[i]; 1095 | } 1096 | } 1097 | 1098 | 1099 | //============================================================================== 1100 | void maniscalco::msufsort::second_stage_its_as_burrows_wheeler_transform_right_to_left_pass_multi_threaded 1101 | ( 1102 | // private: 1103 | // induce sorted position of B suffixes from sorted B* suffixes 1104 | // This is the first half of the second stage of the ITS ... the 'right to left' pass 1105 | ) 1106 | { 1107 | auto numThreads = (int32_t)(numWorkerThreads_ + 1); // +1 for main thread 1108 | auto max_cache_size = (1 << 12); 1109 | struct entry_type 1110 | { 1111 | uint8_t precedingSuffix_; 1112 | int32_t precedingSuffixIndex_; 1113 | }; 1114 | std::unique_ptr cache[numThreads]; 1115 | for (auto i = 0; i < numThreads; ++i) 1116 | cache[i].reset(new entry_type[max_cache_size]); 1117 | int32_t numSuffixes[numThreads]; 1118 | for (auto & e : numSuffixes) 1119 | e = 0; 1120 | int32_t sCount[numThreads][0x100]; 1121 | for (auto & e1 : sCount) 1122 | for (auto & e2 : e1) 1123 | e2 = 0; 1124 | std::int32_t * dest[numThreads][0x100]; 1125 | for (auto & e1 : dest) 1126 | for (auto & e2 : e1) 1127 | e2 = 0; 1128 | 1129 | auto currentSuffix = suffixArrayBegin_ + inputSize_; 1130 | for (auto symbol = 0xff; symbol >= 0; --symbol) 1131 | { 1132 | auto backBucketOffset = &backBucketOffset_[symbol << 8]; 1133 | auto endSuffix = currentSuffix - bCount_[symbol]; 1134 | 1135 | while (currentSuffix > endSuffix) 1136 | { 1137 | // determine how many B/B* suffixes are safe to process during this pass 1138 | auto maxEnd = currentSuffix - (max_cache_size * numThreads); 1139 | if (maxEnd < suffixArrayBegin_) 1140 | maxEnd = suffixArrayBegin_; 1141 | if (maxEnd < endSuffix) 1142 | maxEnd = endSuffix; 1143 | auto temp = currentSuffix; 1144 | while ((temp > maxEnd) && (*temp != suffix_is_unsorted_b_type)) 1145 | --temp; 1146 | auto totalSuffixesPerThread = ((std::distance(temp, currentSuffix) + numThreads - 1) / numThreads); 1147 | 1148 | // process suffixes 1149 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1150 | { 1151 | numSuffixes[threadId] = 0; 1152 | auto endForThisThread = currentSuffix - totalSuffixesPerThread; 1153 | if (endForThisThread < temp) 1154 | endForThisThread = temp; 1155 | post_task_to_thread 1156 | ( 1157 | threadId, 1158 | []( 1159 | uint8_t const * inputBegin, 1160 | std::int32_t * begin, 1161 | std::int32_t * end, 1162 | entry_type * cache, 1163 | int32_t & numSuffixes, 1164 | int32_t * suffixCount 1165 | ) 1166 | { 1167 | ++begin; 1168 | auto curCache = cache; 1169 | uint8_t currentPrecedingSymbol = 0; 1170 | int32_t currentPrecedingSymbolCount = 0; 1171 | while (--begin > end) 1172 | { 1173 | auto currentSuffixIndex = *begin; 1174 | if ((currentSuffixIndex & preceding_suffix_is_type_a_flag) == 0) 1175 | { 1176 | int32_t precedingSuffixIndex = ((currentSuffixIndex & sa_index_mask) - 1); 1177 | auto precedingSuffix = (inputBegin + precedingSuffixIndex); 1178 | auto precedingSymbol = precedingSuffix[0]; 1179 | int32_t flag = ((precedingSuffixIndex > 0) && (precedingSuffix[-1] <= precedingSymbol)) ? 0 : preceding_suffix_is_type_a_flag; 1180 | *curCache++ = {precedingSymbol, precedingSuffixIndex | flag}; 1181 | if (precedingSymbol != currentPrecedingSymbol) 1182 | { 1183 | suffixCount[currentPrecedingSymbol] += currentPrecedingSymbolCount; 1184 | currentPrecedingSymbol = precedingSymbol; 1185 | currentPrecedingSymbolCount = 0; 1186 | } 1187 | ++currentPrecedingSymbolCount; 1188 | if (precedingSuffixIndex >= 0) 1189 | *begin = precedingSymbol; 1190 | } 1191 | } 1192 | suffixCount[currentPrecedingSymbol] += currentPrecedingSymbolCount; 1193 | numSuffixes = std::distance(cache, curCache); 1194 | }, inputBegin_, currentSuffix, endForThisThread, cache[threadId].get(), std::ref(numSuffixes[threadId]), sCount[threadId] 1195 | ); 1196 | currentSuffix = endForThisThread; 1197 | } 1198 | wait_for_all_tasks_completed(); 1199 | 1200 | for (auto threadId = 0, begin = 0, numSymbolsPerThread = ((0x100 + numThreads - 1) / numThreads); threadId < numThreads; ++threadId) 1201 | { 1202 | auto end = begin + numSymbolsPerThread; 1203 | if (end > 0x100) 1204 | end = 0x100; 1205 | post_task_to_thread 1206 | ( 1207 | threadId, 1208 | [&dest, &backBucketOffset, &sCount, numThreads] 1209 | ( 1210 | int32_t begin, 1211 | int32_t end 1212 | ) 1213 | { 1214 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1215 | for (auto symbol = begin; symbol < end; ++symbol) 1216 | { 1217 | dest[threadId][symbol] = backBucketOffset[symbol]; 1218 | backBucketOffset[symbol] -= sCount[threadId][symbol]; 1219 | sCount[threadId][symbol] = 0; 1220 | } 1221 | }, begin, end 1222 | ); 1223 | begin = end; 1224 | } 1225 | wait_for_all_tasks_completed(); 1226 | 1227 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1228 | post_task_to_thread 1229 | ( 1230 | threadId, 1231 | [&]( 1232 | std::int32_t * dest[0x100], 1233 | entry_type const * begin, 1234 | entry_type const * end 1235 | ) 1236 | { 1237 | --begin; 1238 | while (++begin < end) 1239 | *(--dest[begin->precedingSuffix_]) = begin->precedingSuffixIndex_; 1240 | }, 1241 | dest[threadId], cache[threadId].get(), cache[threadId].get() + numSuffixes[threadId] 1242 | ); 1243 | wait_for_all_tasks_completed(); 1244 | } 1245 | currentSuffix -= aCount_[symbol]; 1246 | } 1247 | } 1248 | 1249 | 1250 | //============================================================================== 1251 | int32_t maniscalco::msufsort::second_stage_its_as_burrows_wheeler_transform_left_to_right_pass_single_threaded 1252 | ( 1253 | // private: 1254 | // induce sorted position of A suffixes from sorted B suffixes 1255 | // This is the second half of the second stage of the ITS ... the 'left to right' pass 1256 | ) 1257 | { 1258 | auto sentinel = suffixArrayBegin_; 1259 | auto currentSuffix = suffixArrayBegin_ - 1; 1260 | uint8_t previousPrecedingSymbol = 0; 1261 | auto previousFrontBucketOffset = frontBucketOffset_; 1262 | while (++currentSuffix < suffixArrayEnd_) 1263 | { 1264 | auto currentSuffixIndex = *currentSuffix; 1265 | if (currentSuffixIndex & preceding_suffix_is_type_a_flag) 1266 | { 1267 | int32_t precedingSuffixIndex = ((currentSuffixIndex & sa_index_mask) - 1); 1268 | auto precedingSuffix = (inputBegin_ + precedingSuffixIndex); 1269 | if ((currentSuffixIndex & sa_index_mask) != 0) 1270 | { 1271 | auto precedingSymbol = precedingSuffix[0]; 1272 | int32_t flag = ((precedingSuffixIndex > 0) && (precedingSuffix[-1] >= precedingSymbol)) ? preceding_suffix_is_type_a_flag : 0; 1273 | if (precedingSymbol != previousPrecedingSymbol) 1274 | { 1275 | previousPrecedingSymbol = precedingSymbol; 1276 | previousFrontBucketOffset = frontBucketOffset_ + previousPrecedingSymbol; 1277 | } 1278 | if (flag) 1279 | *((*previousFrontBucketOffset)++) = (precedingSuffixIndex | flag); 1280 | else 1281 | *((*previousFrontBucketOffset)++) = ((precedingSuffixIndex > 0) ? precedingSuffix[-1] : preceding_suffix_is_type_a_flag); 1282 | } 1283 | if (precedingSuffixIndex >= 0) 1284 | *currentSuffix = *precedingSuffix; 1285 | else 1286 | sentinel = currentSuffix; 1287 | } 1288 | } 1289 | int32_t sentinelIndex = (int32_t)std::distance(suffixArrayBegin_, sentinel); 1290 | return sentinelIndex; 1291 | } 1292 | 1293 | 1294 | //============================================================================== 1295 | int32_t maniscalco::msufsort::second_stage_its_as_burrows_wheeler_transform_left_to_right_pass_multi_threaded 1296 | ( 1297 | // private: 1298 | // induce sorted position of A suffixes from sorted B suffixes 1299 | // This is the second half of the second stage of the ITS ... the 'left to right' pass 1300 | ) 1301 | { 1302 | auto sentinel = suffixArrayBegin_; 1303 | auto numThreads = (int32_t)(numWorkerThreads_ + 1); // +1 for main thread 1304 | auto currentSuffix = suffixArrayBegin_; 1305 | auto max_cache_size = (1 << 12); 1306 | struct entry_type 1307 | { 1308 | uint8_t precedingSuffix_; 1309 | int32_t precedingSuffixIndex_; 1310 | }; 1311 | std::unique_ptr cache[numThreads]; 1312 | for (auto i = 0; i < numThreads; ++i) 1313 | cache[i].reset(new entry_type[max_cache_size]); 1314 | int32_t numSuffixes[numThreads]; 1315 | for (auto & e : numSuffixes) 1316 | e = 0; 1317 | int32_t sCount[numThreads][0x100]; 1318 | for (auto & e1 : sCount) 1319 | for (auto & e2 : e1) 1320 | e2 = 0; 1321 | std::int32_t * dest[numThreads][0x100]; 1322 | for (auto & e1 : dest) 1323 | for (auto & e2 : e1) 1324 | e2 = nullptr; 1325 | 1326 | while (currentSuffix < suffixArrayEnd_) 1327 | { 1328 | // calculate current 'safe' suffixes to process 1329 | auto begin = currentSuffix; 1330 | auto maxEnd = begin + (max_cache_size * numThreads); 1331 | if (maxEnd > suffixArrayEnd_) 1332 | maxEnd = suffixArrayEnd_; 1333 | currentSuffix += (currentSuffix != maxEnd); 1334 | while ((currentSuffix != maxEnd) && (*currentSuffix != (int32_t)0x80000000)) 1335 | ++currentSuffix; 1336 | auto end = currentSuffix; 1337 | auto totalSuffixes = std::distance(begin, end); 1338 | auto totalSuffixesPerThread = ((totalSuffixes + numThreads - 1) / numThreads); 1339 | 1340 | // process suffixes 1341 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1342 | { 1343 | numSuffixes[threadId] = 0; 1344 | auto endForThisThread = begin + totalSuffixesPerThread; 1345 | if (endForThisThread > end) 1346 | endForThisThread = end; 1347 | post_task_to_thread 1348 | ( 1349 | threadId, 1350 | [&sentinel]( 1351 | uint8_t const * inputBegin, 1352 | std::int32_t * begin, 1353 | std::int32_t * end, 1354 | entry_type * cache, 1355 | int32_t & numSuffixes, 1356 | int32_t * suffixCount 1357 | ) 1358 | { 1359 | auto current = begin; 1360 | auto curCache = cache; 1361 | --current; 1362 | uint8_t currentPrecedingSymbol = 0; 1363 | int32_t currentPrecedingSymbolCount = 0; 1364 | while (++current != end) 1365 | { 1366 | auto currentSuffixIndex = *current; 1367 | if (currentSuffixIndex & preceding_suffix_is_type_a_flag) 1368 | { 1369 | int32_t precedingSuffixIndex = ((currentSuffixIndex & sa_index_mask) - 1); 1370 | auto precedingSuffix = (inputBegin + precedingSuffixIndex); 1371 | if ((currentSuffixIndex & sa_index_mask) != 0) 1372 | { 1373 | auto precedingSymbol = precedingSuffix[0]; 1374 | bool precedingSuffixIsTypeA = ((precedingSuffixIndex == 0) || (precedingSuffix[-1] >= precedingSymbol)); 1375 | int32_t flag = (precedingSuffixIsTypeA) ? preceding_suffix_is_type_a_flag : 0; 1376 | if (flag) 1377 | *curCache++ = {precedingSymbol, precedingSuffixIndex | flag}; 1378 | else 1379 | *curCache++ = {precedingSymbol, (precedingSuffixIndex > 0) ? precedingSuffix[-1] : 0}; 1380 | if (precedingSymbol != currentPrecedingSymbol) 1381 | { 1382 | suffixCount[currentPrecedingSymbol] += currentPrecedingSymbolCount; 1383 | currentPrecedingSymbol = precedingSymbol; 1384 | currentPrecedingSymbolCount = 0; 1385 | } 1386 | ++currentPrecedingSymbolCount; 1387 | } 1388 | if (precedingSuffixIndex >= 0) 1389 | *current = precedingSuffix[0]; 1390 | else 1391 | sentinel = current; 1392 | } 1393 | } 1394 | suffixCount[currentPrecedingSymbol] += currentPrecedingSymbolCount; 1395 | numSuffixes = std::distance(cache, curCache); 1396 | }, inputBegin_, begin, endForThisThread, cache[threadId].get(), std::ref(numSuffixes[threadId]), sCount[threadId] 1397 | ); 1398 | begin = endForThisThread; 1399 | } 1400 | wait_for_all_tasks_completed(); 1401 | 1402 | for (auto threadId = 0, begin = 0, numSymbolsPerThread = ((0x100 + numThreads - 1) / numThreads); threadId < numThreads; ++threadId) 1403 | { 1404 | auto end = begin + numSymbolsPerThread; 1405 | if (end > 0x100) 1406 | end = 0x100; 1407 | post_task_to_thread 1408 | ( 1409 | threadId, 1410 | [&dest, this, &sCount, numThreads] 1411 | ( 1412 | int32_t begin, 1413 | int32_t end 1414 | ) 1415 | { 1416 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1417 | for (auto symbol = begin; symbol < end; ++symbol) 1418 | { 1419 | dest[threadId][symbol] = frontBucketOffset_[symbol]; 1420 | frontBucketOffset_[symbol] += sCount[threadId][symbol]; 1421 | sCount[threadId][symbol] = 0; 1422 | } 1423 | }, begin, end 1424 | ); 1425 | begin = end; 1426 | } 1427 | wait_for_all_tasks_completed(); 1428 | 1429 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1430 | post_task_to_thread 1431 | ( 1432 | threadId, 1433 | []( 1434 | std::int32_t * dest[0x100], 1435 | entry_type const * begin, 1436 | entry_type const * end 1437 | ) 1438 | { 1439 | --begin; 1440 | while (++begin != end) 1441 | *(dest[begin->precedingSuffix_]++) = begin->precedingSuffixIndex_; 1442 | }, 1443 | dest[threadId], cache[threadId].get(), cache[threadId].get() + numSuffixes[threadId] 1444 | ); 1445 | wait_for_all_tasks_completed(); 1446 | } 1447 | int32_t sentinelIndex = (int32_t)std::distance(suffixArrayBegin_, sentinel); 1448 | return sentinelIndex; 1449 | } 1450 | 1451 | 1452 | //============================================================================== 1453 | int32_t maniscalco::msufsort::second_stage_its_as_burrows_wheeler_transform 1454 | ( 1455 | // private: 1456 | // creates the burrows wheeler transform while completing the second stage 1457 | // of the improved two stage sort. 1458 | ) 1459 | { 1460 | if (numWorkerThreads_ == 0) 1461 | { 1462 | auto start = std::chrono::system_clock::now(); 1463 | second_stage_its_as_burrows_wheeler_transform_right_to_left_pass_single_threaded(); 1464 | auto finish = std::chrono::system_clock::now(); 1465 | #ifdef VERBOSE 1466 | std::cout << "second stage right to left pass time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1467 | #endif 1468 | start = std::chrono::system_clock::now(); 1469 | auto sentinelIndex = second_stage_its_as_burrows_wheeler_transform_left_to_right_pass_single_threaded(); 1470 | finish = std::chrono::system_clock::now(); 1471 | #ifdef VERBOSE 1472 | std::cout << "second stage left to right pass time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1473 | #endif 1474 | return sentinelIndex; 1475 | } 1476 | else 1477 | { 1478 | auto start = std::chrono::system_clock::now(); 1479 | second_stage_its_as_burrows_wheeler_transform_right_to_left_pass_multi_threaded(); 1480 | auto finish = std::chrono::system_clock::now(); 1481 | #ifdef VERBOSE 1482 | std::cout << "second stage right to left pass time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1483 | #endif 1484 | start = std::chrono::system_clock::now(); 1485 | auto sentinelIndex = second_stage_its_as_burrows_wheeler_transform_left_to_right_pass_multi_threaded(); 1486 | finish = std::chrono::system_clock::now(); 1487 | #ifdef VERBOSE 1488 | std::cout << "second stage left to right pass time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1489 | #endif 1490 | return sentinelIndex; 1491 | } 1492 | } 1493 | 1494 | 1495 | //============================================================================== 1496 | void maniscalco::msufsort::count_suffixes 1497 | ( 1498 | uint8_t const * begin, 1499 | uint8_t const * end, 1500 | std::array count 1501 | ) 1502 | { 1503 | if (begin < end) 1504 | return; 1505 | std::uint32_t state = 0; 1506 | switch (get_suffix_type(begin)) 1507 | { 1508 | case suffix_type::a: state = 1; break; 1509 | case suffix_type::b: state = 0; break; 1510 | case suffix_type::bStar: state = 2; break; 1511 | } 1512 | auto current = begin; 1513 | while (true) 1514 | { 1515 | ++count[state & 0x03][endian_swap(*(uint16_t const *)current)]; 1516 | if (--current < end) 1517 | break; 1518 | state <<= ((current[0] != current[1]) | ((state & 0x01) == 0)); 1519 | state |= (current[0] > current[1]); 1520 | } 1521 | } 1522 | 1523 | 1524 | //============================================================================== 1525 | void maniscalco::msufsort::initial_two_byte_radix_sort 1526 | ( 1527 | uint8_t const * begin, 1528 | uint8_t const * end, 1529 | int32_t * bStarOffset 1530 | ) 1531 | { 1532 | if (begin < end) 1533 | return; 1534 | std::uint32_t state = 0; 1535 | switch (get_suffix_type(begin)) 1536 | { 1537 | case suffix_type::a: state = 1; break; 1538 | case suffix_type::b: state = 0; break; 1539 | case suffix_type::bStar: state = 2; break; 1540 | } 1541 | auto current = begin; 1542 | while (true) 1543 | { 1544 | if ((state & 0x03) == 2) 1545 | { 1546 | int32_t flag = ((current > inputBegin_) && (current[-1] <= current[0])) ? 0 : preceding_suffix_is_type_a_flag; 1547 | suffixArrayBegin_[bStarOffset[endian_swap(*(uint16_t const *)current)]++] = 1548 | (std::distance(inputBegin_, current) | flag); 1549 | } 1550 | if (--current < end) 1551 | break; 1552 | state <<= ((current[0] != current[1]) | ((state & 0x01) == 0)); 1553 | state |= (current[0] > current[1]); 1554 | } 1555 | } 1556 | 1557 | 1558 | //============================================================================== 1559 | void maniscalco::msufsort::first_stage_its 1560 | ( 1561 | // private: 1562 | // does the first stage of the improved two stage sort 1563 | ) 1564 | { 1565 | auto numThreads = (int32_t)(numWorkerThreads_ + 1); // +1 for main thread 1566 | auto start = std::chrono::system_clock::now(); 1567 | std::unique_ptr bCount(new int32_t[0x10000]{}); 1568 | std::unique_ptr aCount(new int32_t[0x10000]{}); 1569 | std::unique_ptr bStarCount(new int32_t[numThreads * 0x10000]{}); 1570 | auto numSuffixesPerThread = ((inputSize_ + numThreads - 1) / numThreads); 1571 | 1572 | { 1573 | std::unique_ptr threadBCount(new int32_t[numThreads * 0x10000]{}); 1574 | std::unique_ptr threadACount(new int32_t[numThreads * 0x10000]{}); 1575 | auto inputCurrent = inputBegin_; 1576 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1577 | { 1578 | auto inputEnd = inputCurrent + numSuffixesPerThread; 1579 | if (inputEnd > (inputEnd_ - 1)) 1580 | inputEnd = (inputEnd_ - 1); 1581 | auto arrayOffset = (threadId * 0x10000); 1582 | std::array c({threadBCount.get() + arrayOffset, threadACount.get() + arrayOffset, bStarCount.get() + arrayOffset, threadACount.get() + arrayOffset}); 1583 | post_task_to_thread(threadId, &msufsort::count_suffixes, this, inputEnd - 1, inputCurrent, c); 1584 | inputCurrent = inputEnd; 1585 | } 1586 | wait_for_all_tasks_completed(); 1587 | 1588 | ++aCount[((uint16_t)inputEnd_[-1]) << 8]; 1589 | ++aCount_[inputEnd_[-1]]; 1590 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1591 | { 1592 | auto arrayOffset = (threadId * 0x10000); 1593 | for (auto j = 0; j < 0x10000; ++j) 1594 | { 1595 | bCount[j] += threadBCount[arrayOffset + j]; 1596 | bCount_[j >> 8] += threadBCount[arrayOffset + j] + bStarCount[arrayOffset + j]; 1597 | aCount[j] += threadACount[arrayOffset + j]; 1598 | aCount_[j >> 8] += threadACount[arrayOffset + j]; 1599 | } 1600 | } 1601 | } 1602 | 1603 | // compute bucket offsets into suffix array 1604 | int32_t total = 1; // 1 for sentinel 1605 | int32_t bStarTotal = 0; 1606 | std::unique_ptr totalBStarCount(new int32_t[0x10000]{}); 1607 | std::unique_ptr bStarOffset(new int32_t[numThreads * 0x10000]{}); 1608 | std::unique_ptr []> partitions(new std::tuple[0x10000]{}); 1609 | 1610 | auto numPartitions = 0; 1611 | for (int32_t i = 0; i < 0x100; ++i) 1612 | { 1613 | int32_t s = (i << 8); 1614 | frontBucketOffset_[i] = (suffixArrayBegin_ + total); 1615 | for (int32_t j = 0; j < 0x100; ++j, ++s) 1616 | { 1617 | auto partitionStartIndex = bStarTotal; 1618 | for (int32_t threadId = 0; threadId < numThreads; ++threadId) 1619 | { 1620 | bStarOffset[(threadId * 0x10000) + s] = bStarTotal; 1621 | totalBStarCount[s] += bStarCount[(threadId * 0x10000) + s]; 1622 | bStarTotal += bStarCount[(threadId * 0x10000) + s]; 1623 | bCount[s] += bStarCount[(threadId * 0x10000) + s]; 1624 | } 1625 | total += (bCount[s] + aCount[s]); 1626 | backBucketOffset_[(j << 8) | i] = suffixArrayBegin_ + total; 1627 | if (totalBStarCount[s] > 0) 1628 | partitions[numPartitions++] = std::make_tuple(partitionStartIndex, totalBStarCount[s], (suffix_value)(s | j)); 1629 | } 1630 | } 1631 | 1632 | // multi threaded two byte radix sort forms initial partitions which 1633 | // will be fully sorted by multikey quicksort 1634 | auto inputCurrent = inputBegin_; 1635 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1636 | { 1637 | auto inputEnd = inputCurrent + numSuffixesPerThread; 1638 | if (inputEnd > (inputEnd_ - 1)) 1639 | inputEnd = (inputEnd_ - 1); 1640 | post_task_to_thread(threadId, &msufsort::initial_two_byte_radix_sort, this, inputEnd - 1, inputCurrent, &bStarOffset[threadId * 0x10000]); 1641 | inputCurrent = inputEnd; 1642 | } 1643 | wait_for_all_tasks_completed(); 1644 | 1645 | auto finish = std::chrono::system_clock::now(); 1646 | #ifdef VERBOSE 1647 | std::cout << "direct sort initial 16 bit sort time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1648 | #endif 1649 | start = std::chrono::system_clock::now(); 1650 | 1651 | // multikey quicksort on B* parititions 1652 | std::atomic partitionCount(numPartitions); 1653 | std::vector tandemRepeatStack[numThreads]; 1654 | // sort the partitions by size to ensure that the largest partitinos are not sorted last. 1655 | // this prevents the case where the last thread is assigned a large partition while all other 1656 | // threads exit due to no more partitions to sort. 1657 | std::sort(partitions.get(), partitions.get() + partitionCount.load(), [](std::tuple const & a, std::tuple const & b) -> bool{return (std::get<1>(a) < std::get<1>(b));}); 1658 | 1659 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1660 | { 1661 | tandemRepeatStack[threadId].reserve(1024); 1662 | post_task_to_thread 1663 | ( 1664 | threadId, 1665 | [&] 1666 | ( 1667 | std::vector & tandemRepeatStack 1668 | ) 1669 | { 1670 | while (true) 1671 | { 1672 | std::int32_t partitionIndex = --partitionCount; 1673 | if (partitionIndex < 0) 1674 | break; 1675 | auto const & partition = partitions[partitionIndex]; 1676 | multikey_quicksort(suffixArrayBegin_ + std::get<0>(partition), suffixArrayBegin_ + std::get<0>(partition) + 1677 | std::get<1>(partition), 2, 0, {0, std::get<2>(partition)}, tandemRepeatStack); 1678 | } 1679 | }, 1680 | std::ref(tandemRepeatStack[threadId]) 1681 | ); 1682 | } 1683 | wait_for_all_tasks_completed(); 1684 | 1685 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1686 | { 1687 | post_task_to_thread 1688 | ( 1689 | threadId, 1690 | [&] 1691 | ( 1692 | std::vector & tandemRepeatStack 1693 | ) 1694 | { 1695 | complete_tandem_repeats(tandemRepeatStack); 1696 | }, 1697 | std::ref(tandemRepeatStack[threadId]) 1698 | ); 1699 | } 1700 | wait_for_all_tasks_completed(); 1701 | 1702 | // spread b* to their final locations in suffix array 1703 | auto destination = suffixArrayBegin_ + total; 1704 | auto source = suffixArrayBegin_ + bStarTotal; 1705 | for (int32_t i = 0xffff; i >= 0; --i) 1706 | { 1707 | if (bCount[i] || aCount[i]) 1708 | { 1709 | destination -= bCount[i]; 1710 | source -= totalBStarCount[i]; 1711 | for (auto j = totalBStarCount[i] - 1; j >= 0; --j) 1712 | destination[j] = source[j]; 1713 | for (auto j = totalBStarCount[i]; j < bCount[i]; ++j) 1714 | destination[j] = suffix_is_unsorted_b_type; 1715 | destination -= aCount[i]; 1716 | for (auto j = 0; j < aCount[i]; ++j) 1717 | destination[j] = preceding_suffix_is_type_a_flag; 1718 | } 1719 | } 1720 | suffixArrayBegin_[0] = (inputSize_ | preceding_suffix_is_type_a_flag); // sa[0] = sentinel 1721 | 1722 | finish = std::chrono::system_clock::now(); 1723 | #ifdef VERBOSE 1724 | std::cout << "direct sort time: " << std::chrono::duration_cast(finish - start).count() << " ms " << std::endl; 1725 | #endif 1726 | } 1727 | 1728 | 1729 | //============================================================================== 1730 | auto maniscalco::msufsort::make_suffix_array 1731 | ( 1732 | // public: 1733 | // computes the suffix array for the input data 1734 | uint8_t const * inputBegin, 1735 | uint8_t const * inputEnd 1736 | ) -> suffix_array 1737 | { 1738 | inputBegin_ = inputBegin; 1739 | inputEnd_ = inputEnd; 1740 | inputSize_ = std::distance(inputBegin_, inputEnd_); 1741 | getValueEnd_ = (inputEnd_ - sizeof(suffix_value)); 1742 | getValueMaxIndex_ = (inputSize_ - sizeof(suffix_value)); 1743 | for (auto & e : copyEnd_) 1744 | e = 0x00; 1745 | auto source = inputEnd_ - sizeof(suffix_value); 1746 | auto dest = copyEnd_; 1747 | if (source < inputBegin_) 1748 | { 1749 | auto n = std::distance(source, inputBegin); 1750 | source += n; 1751 | dest += n; 1752 | } 1753 | std::copy(source, inputEnd_, dest); 1754 | suffix_array suffixArray; 1755 | auto suffixArraySize = (inputSize_ + 1); 1756 | suffixArray.resize(suffixArraySize); 1757 | for (auto & e : suffixArray) 1758 | e = 0; 1759 | suffixArrayBegin_ = suffixArray.data(); 1760 | suffixArrayEnd_ = suffixArrayBegin_ + suffixArraySize; 1761 | inverseSuffixArrayBegin_ = (suffixArrayBegin_ + ((inputSize_ + 1) >> 1)); 1762 | inverseSuffixArrayEnd_ = suffixArrayEnd_; 1763 | 1764 | first_stage_its(); 1765 | second_stage_its(); 1766 | return suffixArray; 1767 | } 1768 | 1769 | 1770 | //============================================================================== 1771 | int32_t maniscalco::msufsort::forward_burrows_wheeler_transform 1772 | ( 1773 | // public: 1774 | // computes the burrows wheeler transform for the input data and 1775 | // replaces the input data with that transformed result. 1776 | // returns the index of the sentinel character (which is removed from the 1777 | // transformed data). 1778 | uint8_t * inputBegin, 1779 | uint8_t * inputEnd 1780 | ) 1781 | { 1782 | inputBegin_ = inputBegin; 1783 | inputEnd_ = inputEnd; 1784 | inputSize_ = std::distance(inputBegin_, inputEnd_); 1785 | 1786 | getValueEnd_ = (inputEnd_ - sizeof(suffix_value)); 1787 | getValueMaxIndex_ = (inputSize_ - sizeof(suffix_value)); 1788 | for (auto & e : copyEnd_) 1789 | e = 0x00; 1790 | auto source = inputEnd_ - sizeof(suffix_value); 1791 | auto dest = copyEnd_; 1792 | if (source < inputBegin_) 1793 | { 1794 | auto n = std::distance((uint8_t const *)source, (uint8_t const *)inputBegin); 1795 | source += n; 1796 | dest += n; 1797 | } 1798 | std::copy(source, inputEnd_, dest); 1799 | suffix_array suffixArray; 1800 | auto suffixArraySize = (inputSize_ + 1); 1801 | suffixArray.resize(suffixArraySize); 1802 | for (auto & e : suffixArray) 1803 | e = 0; 1804 | suffixArrayBegin_ = suffixArray.data(); 1805 | suffixArrayEnd_ = suffixArrayBegin_ + suffixArraySize; 1806 | inverseSuffixArrayBegin_ = (suffixArrayBegin_ + ((inputSize_ + 1) >> 1)); 1807 | inverseSuffixArrayEnd_ = suffixArrayEnd_; 1808 | 1809 | first_stage_its(); 1810 | int32_t sentinelIndex = second_stage_its_as_burrows_wheeler_transform(); 1811 | for (int32_t i = 0; i < (inputSize_ + 1); ++i) 1812 | { 1813 | if (i != sentinelIndex) 1814 | *inputBegin++ = (uint8_t)suffixArray[i]; 1815 | } 1816 | return sentinelIndex; 1817 | } 1818 | 1819 | 1820 | //============================================================================== 1821 | void maniscalco::msufsort::reverse_burrows_wheeler_transform 1822 | ( 1823 | uint8_t * inputBegin, 1824 | uint8_t * inputEnd, 1825 | int32_t sentinelIndex, 1826 | int32_t numThreads 1827 | ) 1828 | { 1829 | #pragma pack(push, 1) 1830 | struct index_type 1831 | { 1832 | index_type(){} 1833 | index_type(std::int32_t v, std::uint8_t s){value_ = v; symbol_ = s;} 1834 | std::int32_t value_; 1835 | std::uint8_t symbol_; 1836 | }; 1837 | #pragma pack(pop) 1838 | auto inputSize = std::distance(inputBegin, inputEnd); 1839 | std::vector index; 1840 | index.resize(inputSize + 1); 1841 | 1842 | { 1843 | // populate 'index' 1844 | std::int32_t symbolRange[numThreads][0x100]; 1845 | for (auto & e1 : symbolRange) 1846 | for (auto & e2 : e1) 1847 | e2 = 0; 1848 | auto bytesPerThread = ((inputSize + numThreads - 1) / numThreads); 1849 | std::vector threads; 1850 | threads.resize(numThreads); 1851 | 1852 | auto bytesProcessed = 0; 1853 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1854 | { 1855 | auto bytesForThisThread = bytesPerThread; 1856 | if ((bytesProcessed + bytesForThisThread) > inputSize) 1857 | bytesForThisThread = (inputSize - bytesProcessed); 1858 | threads[threadId] = std::thread([] 1859 | ( 1860 | uint8_t const * data, 1861 | int32_t size, 1862 | int32_t * result 1863 | ) 1864 | { 1865 | std::int32_t symbolCount[0x10000] = {}; 1866 | for (auto i = 0; i < size - 1; i += 2) 1867 | ++symbolCount[*(uint16_t const *)(data + i)]; 1868 | for (auto i = 0; i < 0x10000; ++i) 1869 | { 1870 | result[i & 0xff] += symbolCount[i]; 1871 | result[i >> 8] += symbolCount[i]; 1872 | } 1873 | if (size & 1) 1874 | ++result[data[size - 1]]; 1875 | }, inputBegin + bytesProcessed, bytesForThisThread, symbolRange[threadId]); 1876 | bytesProcessed += bytesForThisThread; 1877 | } 1878 | for (auto & e : threads) 1879 | e.join(); 1880 | int32_t n = 1; 1881 | for (auto i = 0; i < 0x100; ++i) 1882 | { 1883 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1884 | { 1885 | auto temp = symbolRange[threadId][i]; 1886 | symbolRange[threadId][i] = n; 1887 | n += temp; 1888 | } 1889 | } 1890 | 1891 | index[0] = {sentinelIndex, inputBegin[0]}; 1892 | bytesProcessed = 0; 1893 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1894 | { 1895 | auto bytesForThisThread = bytesPerThread; 1896 | if ((bytesProcessed + bytesForThisThread) > inputSize) 1897 | bytesForThisThread = (inputSize - bytesProcessed); 1898 | threads[threadId] = std::thread([sentinelIndex] 1899 | ( 1900 | uint8_t const * data, 1901 | int32_t begin, 1902 | int32_t end, 1903 | int32_t * symbolRange, 1904 | index_type * index 1905 | ) 1906 | { 1907 | auto n = begin; 1908 | n += (begin > sentinelIndex); 1909 | for (auto i = begin; i < end; ++i, ++n) 1910 | { 1911 | n += (i == sentinelIndex); 1912 | auto k = symbolRange[(uint32_t)data[i]]++; 1913 | index[k] = {n, data[k - (k >= sentinelIndex)]}; 1914 | } 1915 | }, inputBegin, bytesProcessed, bytesProcessed + bytesForThisThread, symbolRange[threadId], index.data()); 1916 | bytesProcessed += bytesForThisThread; 1917 | } 1918 | for (auto & e : threads) 1919 | e.join(); 1920 | } 1921 | 1922 | std::size_t maxPartitionsPerThread = 256; 1923 | std::vector ibwtPartitionInfo; 1924 | std::size_t partitionCount = (numThreads * maxPartitionsPerThread); 1925 | if (partitionCount > index.size()) 1926 | partitionCount = index.size(); 1927 | ibwtPartitionInfo.reserve(partitionCount + 8192); 1928 | std::size_t maxBytesPerPartition = (((index.size() << 1) - 1) / partitionCount); 1929 | 1930 | auto firstDecodeIndex = index[0].value_; 1931 | auto outputCurrent = inputBegin; 1932 | auto currentIndex = 0; 1933 | while (currentIndex < (std::int32_t)index.size()) 1934 | { 1935 | auto partitionSize = maxBytesPerPartition; 1936 | if ((currentIndex + partitionSize) > index.size()) 1937 | partitionSize = (index.size() - currentIndex); 1938 | ibwtPartitionInfo.push_back({index[currentIndex].value_, index[currentIndex].value_, outputCurrent, outputCurrent, 1939 | ((outputCurrent + partitionSize) <= inputEnd) ? (outputCurrent + partitionSize) : inputEnd}); 1940 | index[currentIndex].value_ |= 0x80000000; 1941 | currentIndex += partitionSize; 1942 | outputCurrent += partitionSize; 1943 | } 1944 | partitionCount = ibwtPartitionInfo.size(); 1945 | 1946 | std::vector threads; 1947 | threads.resize(numThreads); 1948 | struct decoded_info 1949 | { 1950 | decoded_info(){} 1951 | decoded_info 1952 | ( 1953 | std::uint8_t const * begin, 1954 | std::uint8_t const * end, 1955 | std::int32_t startIndex, 1956 | std::int32_t endIndex 1957 | ): 1958 | begin_(begin), 1959 | end_(end), 1960 | startIndex_(startIndex), 1961 | endIndex_(endIndex) 1962 | { 1963 | } 1964 | std::uint8_t const * begin_; 1965 | std::uint8_t const * end_; 1966 | std::int32_t startIndex_; 1967 | std::int32_t endIndex_; 1968 | }; 1969 | 1970 | std::vector decodedInfo; 1971 | decodedInfo.reserve(8192); 1972 | 1973 | std::vector> availableDecodeSpace; 1974 | availableDecodeSpace.reserve(2048); 1975 | 1976 | while (!ibwtPartitionInfo.empty()) 1977 | { 1978 | auto partitionsRemaining = ibwtPartitionInfo.size(); 1979 | maxPartitionsPerThread = (((partitionsRemaining << 1) - 1) / numThreads); 1980 | if (maxPartitionsPerThread < 1) 1981 | maxPartitionsPerThread = 1; 1982 | for (auto threadId = 0; threadId < numThreads; ++threadId) 1983 | { 1984 | auto numPartitions = maxPartitionsPerThread; 1985 | if (numPartitions > partitionsRemaining) 1986 | numPartitions = partitionsRemaining; 1987 | partitionsRemaining -= numPartitions; 1988 | threads[threadId] = std::thread( 1989 | []( 1990 | index_type * indexBegin, 1991 | std::int32_t sentinelIndex, 1992 | ibwt_partition_info * partitionBegin, 1993 | ibwt_partition_info * partitionEnd 1994 | ) 1995 | { 1996 | bool done = false; 1997 | while (!done) 1998 | { 1999 | done = true; 2000 | for (auto partitionCurrent = partitionBegin; partitionCurrent < partitionEnd; ++partitionCurrent) 2001 | { 2002 | auto & e = *partitionCurrent; 2003 | if (((e.currentIndex_ & (std::int32_t)0x80000000) == 0) && (e.currentOutput_ < e.endOutput_)) 2004 | { 2005 | done = false; 2006 | auto i = e.currentIndex_; 2007 | *e.currentOutput_ = indexBegin[i].symbol_; 2008 | e.currentOutput_ += (i != sentinelIndex); 2009 | e.currentIndex_ = indexBegin[i].value_; 2010 | } 2011 | } 2012 | } 2013 | }, 2014 | index.data(), sentinelIndex, ibwtPartitionInfo.data() + partitionsRemaining, 2015 | ibwtPartitionInfo.data() + partitionsRemaining + numPartitions); 2016 | } 2017 | for (auto & e : threads) 2018 | e.join(); 2019 | 2020 | for (auto iter = ibwtPartitionInfo.begin(); iter != ibwtPartitionInfo.end(); ) 2021 | { 2022 | if (iter->currentOutput_ != nullptr) 2023 | { 2024 | auto startIndex = iter->startIndex_; 2025 | auto endIndex = (iter->currentIndex_ & 0x7fffffff); 2026 | if ((iter->currentIndex_ & 0x80000000) || (iter->beginOutput_ != iter->currentOutput_)) 2027 | { 2028 | decodedInfo.push_back({iter->beginOutput_, iter->currentOutput_, startIndex, endIndex}); 2029 | iter->startIndex_ = endIndex; 2030 | } 2031 | } 2032 | if (iter->currentIndex_ & 0x80000000) 2033 | { 2034 | if (iter->currentOutput_ < iter->endOutput_) 2035 | availableDecodeSpace.push_back(std::make_pair(iter->currentOutput_, iter->endOutput_)); 2036 | iter = ibwtPartitionInfo.erase(iter); 2037 | } 2038 | else 2039 | { 2040 | ++iter; 2041 | } 2042 | } 2043 | 2044 | if (!ibwtPartitionInfo.empty()) 2045 | { 2046 | for (auto & e : ibwtPartitionInfo) 2047 | { 2048 | if (!availableDecodeSpace.empty()) 2049 | { 2050 | auto a = availableDecodeSpace.back(); 2051 | availableDecodeSpace.pop_back(); 2052 | e.beginOutput_ = a.first; 2053 | e.currentOutput_ = a.first; 2054 | e.endOutput_ = a.second; 2055 | } 2056 | else 2057 | { 2058 | e.currentOutput_ = nullptr; 2059 | e.endOutput_ = nullptr; 2060 | } 2061 | } 2062 | } 2063 | } 2064 | 2065 | std::uint8_t const * curDec = nullptr; 2066 | std::uint8_t const * curDecEnd = nullptr; 2067 | std::int32_t curEndIndex = 0; 2068 | 2069 | for (std::size_t i = 0; i < decodedInfo.size(); ++i) 2070 | if (decodedInfo[i].startIndex_ == firstDecodeIndex) 2071 | { 2072 | curDec = decodedInfo[i].begin_; 2073 | curDecEnd = decodedInfo[i].end_; 2074 | curEndIndex = decodedInfo[i].endIndex_; 2075 | break; 2076 | } 2077 | 2078 | auto beginWrite = (std::uint8_t *)index.data(); 2079 | auto currentWrite = beginWrite; 2080 | auto endWrite = (currentWrite + inputSize); 2081 | while (currentWrite < endWrite) 2082 | { 2083 | while ((currentWrite < endWrite) && (curDec < curDecEnd)) 2084 | *currentWrite++ = *curDec++; 2085 | for (std::size_t j = 0; j < decodedInfo.size(); ++j) 2086 | if (decodedInfo[j].startIndex_ == curEndIndex) 2087 | { 2088 | curDec = decodedInfo[j].begin_; 2089 | curDecEnd = decodedInfo[j].end_; 2090 | curEndIndex = decodedInfo[j].endIndex_; 2091 | break; 2092 | } 2093 | } 2094 | 2095 | std::copy(beginWrite, endWrite, inputBegin); 2096 | } 2097 | 2098 | -------------------------------------------------------------------------------- /src/library/msufsort/msufsort.h: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Michael A Maniscalco 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | 26 | 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | 39 | namespace maniscalco 40 | { 41 | 42 | class msufsort 43 | { 44 | public: 45 | 46 | static auto constexpr max_radix_size = (1 << 16); 47 | using suffix_index = std::int32_t; 48 | using suffix_array = std::vector; 49 | 50 | msufsort 51 | ( 52 | std::int32_t = 1 53 | ); 54 | 55 | ~msufsort(); 56 | 57 | suffix_array make_suffix_array 58 | ( 59 | std::uint8_t const *, 60 | std::uint8_t const * 61 | ); 62 | 63 | int32_t forward_burrows_wheeler_transform 64 | ( 65 | std::uint8_t *, 66 | std::uint8_t * 67 | ); 68 | 69 | static void reverse_burrows_wheeler_transform 70 | ( 71 | std::uint8_t *, 72 | std::uint8_t *, 73 | std::int32_t, 74 | std::int32_t 75 | ); 76 | 77 | protected: 78 | 79 | private: 80 | 81 | using suffix_value = std::uint32_t; 82 | 83 | // flags used in ISA 84 | static std::int32_t constexpr is_induced_sort = 0x40000000; 85 | static std::int32_t constexpr is_tandem_repeat_length = 0x80000000; 86 | static std::int32_t constexpr isa_flag_mask = is_induced_sort | is_tandem_repeat_length; 87 | static std::int32_t constexpr isa_index_mask = ~isa_flag_mask; 88 | 89 | // flags used in SA 90 | static std::int32_t constexpr preceding_suffix_is_type_a_flag = 0x80000000; 91 | static std::int32_t constexpr mark_isa_when_sorted = 0x40000000; 92 | static std::int32_t constexpr sa_index_mask = ~(preceding_suffix_is_type_a_flag | mark_isa_when_sorted); 93 | static std::int32_t constexpr suffix_is_unsorted_b_type = sa_index_mask; 94 | 95 | static constexpr std::int32_t insertion_sort_threshold = 16; 96 | static std::int32_t constexpr min_match_length_for_tandem_repeats = (2 + sizeof(suffix_value) + sizeof(suffix_value)); 97 | 98 | enum suffix_type 99 | { 100 | a, 101 | b, 102 | bStar 103 | }; 104 | 105 | struct tandem_repeat_info 106 | { 107 | tandem_repeat_info 108 | ( 109 | suffix_index * partitionBegin, 110 | suffix_index * partitionEnd, 111 | std::int32_t numTerminators, 112 | std::int32_t tandemRepeatLength 113 | ): 114 | partitionBegin_(partitionBegin), 115 | partitionEnd_(partitionEnd), 116 | numTerminators_(numTerminators), 117 | tandemRepeatLength_(tandemRepeatLength) 118 | { 119 | } 120 | 121 | suffix_index * partitionBegin_; 122 | suffix_index * partitionEnd_; 123 | std::int32_t numTerminators_; 124 | std::int32_t tandemRepeatLength_; 125 | }; 126 | 127 | suffix_value get_value 128 | ( 129 | uint8_t const *, 130 | suffix_index 131 | ) const; 132 | 133 | suffix_type get_suffix_type 134 | ( 135 | uint8_t const * 136 | ); 137 | 138 | bool compare_suffixes 139 | ( 140 | std::uint8_t const *, 141 | suffix_index, 142 | suffix_index 143 | ) const; 144 | 145 | int compare_suffixes 146 | ( 147 | std::uint8_t const *, 148 | suffix_index, 149 | suffix_index, 150 | std::size_t 151 | ) const; 152 | 153 | void insertion_sort 154 | ( 155 | suffix_index *, 156 | suffix_index *, 157 | int32_t, 158 | uint64_t 159 | ); 160 | 161 | void multikey_insertion_sort 162 | ( 163 | suffix_index *, 164 | suffix_index *, 165 | std::int32_t, 166 | suffix_value, 167 | std::array, 168 | std::vector & 169 | ); 170 | 171 | std::size_t partition_tandem_repeats 172 | ( 173 | suffix_index *, 174 | suffix_index *, 175 | std::int32_t, 176 | std::vector & 177 | ); 178 | 179 | void count_suffixes 180 | ( 181 | uint8_t const *, 182 | uint8_t const *, 183 | std::array 184 | ); 185 | 186 | template 187 | void post_task_to_thread 188 | ( 189 | int32_t threadId, 190 | F &&, 191 | argument_types && ... 192 | ); 193 | 194 | void wait_for_all_tasks_completed() const; 195 | 196 | void second_stage_its(); 197 | 198 | int32_t second_stage_its_as_burrows_wheeler_transform(); 199 | 200 | void second_stage_its_right_to_left_pass_single_threaded(); 201 | 202 | void second_stage_its_right_to_left_pass_multi_threaded(); 203 | 204 | void second_stage_its_left_to_right_pass_single_threaded(); 205 | 206 | void second_stage_its_left_to_right_pass_multi_threaded(); 207 | 208 | void second_stage_its_as_burrows_wheeler_transform_right_to_left_pass_single_threaded(); 209 | 210 | int32_t second_stage_its_as_burrows_wheeler_transform_left_to_right_pass_single_threaded(); 211 | 212 | void second_stage_its_as_burrows_wheeler_transform_right_to_left_pass_multi_threaded(); 213 | 214 | int32_t second_stage_its_as_burrows_wheeler_transform_left_to_right_pass_multi_threaded(); 215 | 216 | void first_stage_its(); 217 | 218 | suffix_index * multikey_quicksort 219 | ( 220 | suffix_index *, 221 | suffix_index *, 222 | std::size_t, 223 | suffix_value, 224 | std::array, 225 | std::vector & 226 | ); 227 | 228 | void initial_two_byte_radix_sort 229 | ( 230 | uint8_t const *, 231 | uint8_t const *, 232 | int32_t * 233 | ); 234 | 235 | bool has_potential_tandem_repeats 236 | ( 237 | suffix_value, 238 | std::array 239 | ) const; 240 | 241 | void complete_tandem_repeats 242 | ( 243 | std::vector & 244 | ); 245 | 246 | void complete_tandem_repeat 247 | ( 248 | suffix_index *, 249 | suffix_index *, 250 | std::int32_t, 251 | std::int32_t 252 | ); 253 | 254 | struct ibwt_partition_info 255 | { 256 | ibwt_partition_info(){} 257 | 258 | ibwt_partition_info 259 | ( 260 | suffix_index startIndex, 261 | suffix_index currentIndex, 262 | std::uint8_t * beginOutput, 263 | std::uint8_t * currentOutput, 264 | std::uint8_t * endOutput 265 | ): 266 | startIndex_(startIndex), 267 | currentIndex_(currentIndex), 268 | beginOutput_(beginOutput), 269 | currentOutput_(currentOutput), 270 | endOutput_(endOutput) 271 | { 272 | } 273 | 274 | suffix_index startIndex_; 275 | suffix_index currentIndex_; 276 | std::uint8_t * beginOutput_; 277 | std::uint8_t * currentOutput_; 278 | std::uint8_t * endOutput_; 279 | }; 280 | 281 | uint8_t const * inputBegin_; 282 | 283 | uint8_t const * inputEnd_; 284 | 285 | int32_t inputSize_; 286 | 287 | uint8_t const * getValueEnd_; 288 | 289 | suffix_index getValueMaxIndex_; 290 | 291 | uint8_t copyEnd_[sizeof(suffix_value) << 1]; 292 | 293 | suffix_index * suffixArrayBegin_; 294 | 295 | suffix_index * suffixArrayEnd_; 296 | 297 | suffix_index * inverseSuffixArrayBegin_; 298 | 299 | suffix_index * inverseSuffixArrayEnd_; 300 | 301 | suffix_index * frontBucketOffset_[0x100]; 302 | 303 | std::unique_ptr backBucketOffset_; 304 | 305 | int32_t aCount_[0x100]; 306 | 307 | int32_t bCount_[0x100]; 308 | 309 | bool const tandemRepeatSortEnabled_ = true; 310 | 311 | class worker_thread 312 | { 313 | public: 314 | 315 | worker_thread 316 | ( 317 | ): 318 | thread_(), 319 | task_(), 320 | terminate_(false), 321 | taskCompleted_(true) 322 | { 323 | auto workFunction = [] 324 | ( 325 | std::function & task, 326 | bool volatile & taskComplete, 327 | bool volatile & terminate 328 | ) 329 | { 330 | while (taskComplete) 331 | ; 332 | while (!terminate) 333 | { 334 | task(); 335 | taskComplete = true; 336 | while (taskComplete) 337 | ; 338 | } 339 | }; 340 | thread_ = std::thread(workFunction, std::ref(task_), std::ref(taskCompleted_), std::ref(terminate_)); 341 | } 342 | 343 | ~worker_thread 344 | ( 345 | ) 346 | { 347 | terminate(); 348 | thread_.join(); 349 | } 350 | 351 | void terminate 352 | ( 353 | ) 354 | { 355 | terminate_ = true; 356 | taskCompleted_ = false; 357 | } 358 | 359 | template 360 | inline void post_task 361 | ( 362 | argument_types && ... arguments 363 | ) 364 | { 365 | task_ = std::bind(std::forward(arguments) ...); 366 | taskCompleted_ = false; 367 | } 368 | 369 | inline void wait 370 | ( 371 | ) const 372 | { 373 | while (!taskCompleted_) 374 | ; 375 | } 376 | 377 | private: 378 | 379 | std::thread thread_; 380 | std::function task_; 381 | bool volatile terminate_; 382 | bool volatile taskCompleted_; 383 | }; 384 | 385 | 386 | std::unique_ptr workerThreads_; 387 | 388 | int32_t numWorkerThreads_; 389 | 390 | struct stack_frame 391 | { 392 | suffix_index * suffixArrayBegin; 393 | suffix_index * suffixArrayEnd; 394 | std::size_t currentMatchLength; 395 | suffix_value startingPattern; 396 | std::array endingPattern; 397 | std::vector & tandemRepeatStack; 398 | }; 399 | 400 | }; // class msufsort 401 | 402 | 403 | template 404 | msufsort::suffix_array make_suffix_array 405 | ( 406 | input_iter, 407 | input_iter, 408 | int32_t = 1 409 | ); 410 | 411 | template 412 | int32_t forward_burrows_wheeler_transform 413 | ( 414 | input_iter, 415 | input_iter, 416 | int32_t = 1 417 | ); 418 | 419 | template 420 | static void reverse_burrows_wheeler_transform 421 | ( 422 | input_iter, 423 | input_iter, 424 | int32_t, 425 | int32_t = 1 426 | ); 427 | 428 | } // namespace maniscalco 429 | 430 | 431 | //============================================================================== 432 | template 433 | maniscalco::msufsort::suffix_array maniscalco::make_suffix_array 434 | ( 435 | input_iter begin, 436 | input_iter end, 437 | int32_t numThreads 438 | ) 439 | { 440 | if (numThreads <= 0) 441 | numThreads = 1; 442 | if (numThreads > (int32_t)std::thread::hardware_concurrency()) 443 | numThreads = (int32_t)std::thread::hardware_concurrency(); 444 | return msufsort(numThreads).make_suffix_array((uint8_t const *)&*begin, (uint8_t const *)&*end); 445 | } 446 | 447 | 448 | //============================================================================== 449 | template 450 | int32_t maniscalco::forward_burrows_wheeler_transform 451 | ( 452 | input_iter begin, 453 | input_iter end, 454 | int32_t numThreads 455 | ) 456 | { 457 | if (numThreads <= 0) 458 | numThreads = 1; 459 | if (numThreads > (int32_t)std::thread::hardware_concurrency()) 460 | numThreads = (int32_t)std::thread::hardware_concurrency(); 461 | return msufsort(numThreads).forward_burrows_wheeler_transform((uint8_t *)&*begin, (uint8_t *)&*end); 462 | } 463 | 464 | 465 | //============================================================================== 466 | template 467 | void maniscalco::reverse_burrows_wheeler_transform 468 | ( 469 | input_iter begin, 470 | input_iter end, 471 | int32_t sentinelIndex, 472 | int32_t numThreads 473 | ) 474 | { 475 | msufsort::reverse_burrows_wheeler_transform((uint8_t *)&*begin, (uint8_t *)&*end, sentinelIndex, numThreads); 476 | } 477 | --------------------------------------------------------------------------------