├── pkgconfig ├── libdivsufsort.pc.cmake └── CMakeLists.txt ├── .gitignore ├── examples ├── CMakeLists.txt ├── sasearch.c ├── suftest.c ├── mksary.c ├── unbwt.c └── bwt.c ├── CMakeModules ├── CheckFunctionKeywords.cmake ├── cmake_uninstall.cmake.in ├── AppendCompilerFlags.cmake ├── ProjectCPack.cmake └── CheckLFS.cmake ├── CHANGELOG.md ├── VERSION.cmake ├── LICENSE ├── lib ├── CMakeLists.txt ├── utils.c ├── divsufsort.c ├── trsort.c └── sssort.c ├── include ├── lfs.h.cmake ├── config.h.cmake ├── CMakeLists.txt ├── divsufsort_private.h └── divsufsort.h.cmake ├── CMakeLists.txt └── README.md /pkgconfig/libdivsufsort.pc.cmake: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix} 3 | libdir=@CMAKE_INSTALL_LIBDIR@ 4 | includedir=@CMAKE_INSTALL_INCLUDEDIR@ 5 | 6 | Name: @PROJECT_NAME@@W64BIT@ 7 | Description: @PROJECT_DESCRIPTION@ 8 | Version: @PROJECT_VERSION_FULL@ 9 | URL: @PROJECT_URL@ 10 | Libs: -L${libdir} -ldivsufsort@W64BIT@ 11 | Cflags: -I${includedir} 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # CMake files/directories 32 | build/ 33 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Add definitions ## 2 | add_definitions(-D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64) 3 | 4 | ## Targets ## 5 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include" 6 | "${CMAKE_CURRENT_BINARY_DIR}/../include") 7 | link_directories("${CMAKE_CURRENT_BINARY_DIR}/../lib") 8 | foreach(src suftest mksary sasearch bwt unbwt) 9 | add_executable(${src} ${src}.c) 10 | target_link_libraries(${src} divsufsort) 11 | endforeach(src) 12 | -------------------------------------------------------------------------------- /pkgconfig/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## generate libdivsufsort.pc ## 2 | set(W64BIT "") 3 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" @ONLY) 4 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR}) 5 | if(BUILD_DIVSUFSORT64) 6 | set(W64BIT "64") 7 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" @ONLY) 8 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR}) 9 | endif(BUILD_DIVSUFSORT64) 10 | -------------------------------------------------------------------------------- /CMakeModules/CheckFunctionKeywords.cmake: -------------------------------------------------------------------------------- 1 | include(CheckCSourceCompiles) 2 | 3 | macro(check_function_keywords _wordlist) 4 | set(${_result} "") 5 | foreach(flag ${_wordlist}) 6 | string(REGEX REPLACE "[-+/ ()]" "_" flagname "${flag}") 7 | string(TOUPPER "${flagname}" flagname) 8 | set(have_flag "HAVE_${flagname}") 9 | check_c_source_compiles("${flag} void func(); void func() { } int main() { func(); return 0; }" ${have_flag}) 10 | if(${have_flag} AND NOT ${_result}) 11 | set(${_result} "${flag}") 12 | # break() 13 | endif(${have_flag} AND NOT ${_result}) 14 | endforeach(flag) 15 | endmacro(check_function_keywords) 16 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # libdivsufsort Change Log 2 | 3 | See full changelog at: https://github.com/y-256/libdivsufsort/commits 4 | 5 | ## [2.0.1] - 2010-11-11 6 | ### Fixed 7 | * Wrong variable used in `divbwt` function 8 | * Enclose some string variables with double quotation marks in include/CMakeLists.txt 9 | * Fix typo in include/CMakeLists.txt 10 | 11 | ## 2.0.0 - 2008-08-23 12 | ### Changed 13 | * Switch the build system to [CMake](http://www.cmake.org/) 14 | * Improve the performance of the suffix-sorting algorithm 15 | 16 | ### Added 17 | * OpenMP support 18 | * 64-bit version of divsufsort 19 | 20 | [Unreleased]: https://github.com/y-256/libdivsufsort/compare/2.0.1...HEAD 21 | [2.0.1]: https://github.com/y-256/libdivsufsort/compare/2.0.0...2.0.1 22 | -------------------------------------------------------------------------------- /VERSION.cmake: -------------------------------------------------------------------------------- 1 | set(PROJECT_VERSION_MAJOR "2") 2 | set(PROJECT_VERSION_MINOR "0") 3 | set(PROJECT_VERSION_PATCH "2") 4 | set(PROJECT_VERSION_EXTRA "-1") 5 | set(PROJECT_VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}") 6 | set(PROJECT_VERSION_FULL "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}${PROJECT_VERSION_EXTRA}") 7 | 8 | set(LIBRARY_VERSION "3.0.1") 9 | set(LIBRARY_SOVERSION "3") 10 | 11 | ## Git revision number ## 12 | if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") 13 | execute_process(COMMAND git describe --tags HEAD 14 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 15 | OUTPUT_VARIABLE GIT_DESCRIBE_TAGS ERROR_QUIET) 16 | if(GIT_DESCRIBE_TAGS) 17 | string(REGEX REPLACE "^v(.*)" "\\1" GIT_REVISION "${GIT_DESCRIBE_TAGS}") 18 | string(STRIP "${GIT_REVISION}" GIT_REVISION) 19 | if(GIT_REVISION) 20 | set(PROJECT_VERSION_FULL "${GIT_REVISION}") 21 | endif(GIT_REVISION) 22 | endif(GIT_DESCRIBE_TAGS) 23 | endif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2003 Yuta Mori All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include" 2 | "${CMAKE_CURRENT_BINARY_DIR}/../include") 3 | 4 | set(divsufsort_SRCS divsufsort.c sssort.c trsort.c utils.c) 5 | 6 | ## libdivsufsort ## 7 | add_library(divsufsort ${divsufsort_SRCS}) 8 | install(TARGETS divsufsort 9 | RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR} 10 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 11 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) 12 | set_target_properties(divsufsort PROPERTIES 13 | VERSION "${LIBRARY_VERSION}" 14 | SOVERSION "${LIBRARY_SOVERSION}" 15 | DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL 16 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples") 17 | 18 | ## libdivsufsort64 ## 19 | if(BUILD_DIVSUFSORT64) 20 | add_library(divsufsort64 ${divsufsort_SRCS}) 21 | install(TARGETS divsufsort64 22 | RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR} 23 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 24 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) 25 | set_target_properties(divsufsort64 PROPERTIES 26 | VERSION "${LIBRARY_VERSION}" 27 | SOVERSION "${LIBRARY_SOVERSION}" 28 | DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL 29 | COMPILE_FLAGS "-DBUILD_DIVSUFSORT64" 30 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples") 31 | endif(BUILD_DIVSUFSORT64) 32 | -------------------------------------------------------------------------------- /CMakeModules/cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 2 | MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"") 3 | ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 4 | 5 | FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) 6 | STRING(REGEX REPLACE "\n" ";" files "${files}") 7 | 8 | SET(NUM 0) 9 | FOREACH(file ${files}) 10 | IF(EXISTS "$ENV{DESTDIR}${file}") 11 | MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - found") 12 | SET(UNINSTALL_CHECK_${NUM} 1) 13 | ELSE(EXISTS "$ENV{DESTDIR}${file}") 14 | MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - not found") 15 | SET(UNINSTALL_CHECK_${NUM} 0) 16 | ENDIF(EXISTS "$ENV{DESTDIR}${file}") 17 | MATH(EXPR NUM "1 + ${NUM}") 18 | ENDFOREACH(file) 19 | 20 | SET(NUM 0) 21 | FOREACH(file ${files}) 22 | IF(${UNINSTALL_CHECK_${NUM}}) 23 | MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"") 24 | EXEC_PROGRAM( 25 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 26 | OUTPUT_VARIABLE rm_out 27 | RETURN_VALUE rm_retval 28 | ) 29 | IF(NOT "${rm_retval}" STREQUAL 0) 30 | MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"") 31 | ENDIF(NOT "${rm_retval}" STREQUAL 0) 32 | ENDIF(${UNINSTALL_CHECK_${NUM}}) 33 | MATH(EXPR NUM "1 + ${NUM}") 34 | ENDFOREACH(file) 35 | 36 | FILE(REMOVE "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 37 | -------------------------------------------------------------------------------- /CMakeModules/AppendCompilerFlags.cmake: -------------------------------------------------------------------------------- 1 | include(CheckCSourceCompiles) 2 | include(CheckCXXSourceCompiles) 3 | 4 | macro(append_c_compiler_flags _flags _name _result) 5 | set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) 6 | string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}") 7 | string(TOUPPER "${cname}" cname) 8 | foreach(flag ${_flags}) 9 | string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}") 10 | string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}") 11 | string(TOUPPER "${flagname}" flagname) 12 | set(have_flag "HAVE_${cname}_${flagname}") 13 | set(CMAKE_REQUIRED_FLAGS "${flag}") 14 | check_c_source_compiles("int main() { return 0; }" ${have_flag}) 15 | if(${have_flag}) 16 | set(${_result} "${${_result}} ${flag}") 17 | endif(${have_flag}) 18 | endforeach(flag) 19 | set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS}) 20 | endmacro(append_c_compiler_flags) 21 | 22 | macro(append_cxx_compiler_flags _flags _name _result) 23 | set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) 24 | string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}") 25 | string(TOUPPER "${cname}" cname) 26 | foreach(flag ${_flags}) 27 | string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}") 28 | string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}") 29 | string(TOUPPER "${flagname}" flagname) 30 | set(have_flag "HAVE_${cname}_${flagname}") 31 | set(CMAKE_REQUIRED_FLAGS "${flag}") 32 | check_cxx_source_compiles("int main() { return 0; }" ${have_flag}) 33 | if(${have_flag}) 34 | set(${_result} "${${_result}} ${flag}") 35 | endif(${have_flag}) 36 | endforeach(flag) 37 | set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS}) 38 | endmacro(append_cxx_compiler_flags) 39 | -------------------------------------------------------------------------------- /CMakeModules/ProjectCPack.cmake: -------------------------------------------------------------------------------- 1 | # If the cmake version includes cpack, use it 2 | IF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake") 3 | SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${PROJECT_DESCRIPTION}") 4 | SET(CPACK_PACKAGE_VENDOR "${PROJECT_VENDOR}") 5 | SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.md") 6 | SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") 7 | SET(CPACK_PACKAGE_VERSION_MAJOR "${PROJECT_VERSION_MAJOR}") 8 | SET(CPACK_PACKAGE_VERSION_MINOR "${PROJECT_VERSION_MINOR}") 9 | SET(CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}") 10 | # SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${PROJECT_NAME} ${PROJECT_VERSION}") 11 | SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION_FULL}") 12 | 13 | IF(NOT DEFINED CPACK_SYSTEM_NAME) 14 | SET(CPACK_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") 15 | ENDIF(NOT DEFINED CPACK_SYSTEM_NAME) 16 | 17 | IF(${CPACK_SYSTEM_NAME} MATCHES Windows) 18 | IF(CMAKE_CL_64) 19 | SET(CPACK_SYSTEM_NAME win64-${CMAKE_SYSTEM_PROCESSOR}) 20 | ELSE(CMAKE_CL_64) 21 | SET(CPACK_SYSTEM_NAME win32-${CMAKE_SYSTEM_PROCESSOR}) 22 | ENDIF(CMAKE_CL_64) 23 | ENDIF(${CPACK_SYSTEM_NAME} MATCHES Windows) 24 | 25 | IF(NOT DEFINED CPACK_PACKAGE_FILE_NAME) 26 | SET(CPACK_PACKAGE_FILE_NAME "${CPACK_SOURCE_PACKAGE_FILE_NAME}-${CPACK_SYSTEM_NAME}") 27 | ENDIF(NOT DEFINED CPACK_PACKAGE_FILE_NAME) 28 | 29 | SET(CPACK_PACKAGE_CONTACT "${PROJECT_CONTACT}") 30 | IF(UNIX) 31 | SET(CPACK_STRIP_FILES "") 32 | SET(CPACK_SOURCE_STRIP_FILES "") 33 | # SET(CPACK_PACKAGE_EXECUTABLES "ccmake" "CMake") 34 | ENDIF(UNIX) 35 | SET(CPACK_SOURCE_IGNORE_FILES "/CVS/" "/build/" "/\\\\.build/" "/\\\\.svn/" "~$") 36 | # include CPack model once all variables are set 37 | INCLUDE(CPack) 38 | ENDIF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake") 39 | -------------------------------------------------------------------------------- /include/lfs.h.cmake: -------------------------------------------------------------------------------- 1 | /* 2 | * lfs.h for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _LFS_H 28 | #define _LFS_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | #ifndef __STRICT_ANSI__ 35 | # define LFS_OFF_T @LFS_OFF_T@ 36 | # define LFS_FOPEN @LFS_FOPEN@ 37 | # define LFS_FTELL @LFS_FTELL@ 38 | # define LFS_FSEEK @LFS_FSEEK@ 39 | # define LFS_PRId @LFS_PRID@ 40 | #else 41 | # define LFS_OFF_T long 42 | # define LFS_FOPEN fopen 43 | # define LFS_FTELL ftell 44 | # define LFS_FSEEK fseek 45 | # define LFS_PRId "ld" 46 | #endif 47 | #ifndef PRIdOFF_T 48 | # define PRIdOFF_T LFS_PRId 49 | #endif 50 | 51 | 52 | #ifdef __cplusplus 53 | } /* extern "C" */ 54 | #endif /* __cplusplus */ 55 | 56 | #endif /* _LFS_H */ 57 | -------------------------------------------------------------------------------- /include/config.h.cmake: -------------------------------------------------------------------------------- 1 | /* 2 | * config.h for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _CONFIG_H 28 | #define _CONFIG_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | /** Define to the version of this package. **/ 35 | #cmakedefine PROJECT_VERSION_FULL "${PROJECT_VERSION_FULL}" 36 | 37 | /** Define to 1 if you have the header files. **/ 38 | #cmakedefine HAVE_INTTYPES_H 1 39 | #cmakedefine HAVE_STDDEF_H 1 40 | #cmakedefine HAVE_STDINT_H 1 41 | #cmakedefine HAVE_STDLIB_H 1 42 | #cmakedefine HAVE_STRING_H 1 43 | #cmakedefine HAVE_STRINGS_H 1 44 | #cmakedefine HAVE_MEMORY_H 1 45 | #cmakedefine HAVE_SYS_TYPES_H 1 46 | 47 | /** for WinIO **/ 48 | #cmakedefine HAVE_IO_H 1 49 | #cmakedefine HAVE_FCNTL_H 1 50 | #cmakedefine HAVE__SETMODE 1 51 | #cmakedefine HAVE_SETMODE 1 52 | #cmakedefine HAVE__FILENO 1 53 | #cmakedefine HAVE_FOPEN_S 1 54 | #cmakedefine HAVE__O_BINARY 1 55 | #ifndef HAVE__SETMODE 56 | # if HAVE_SETMODE 57 | # define _setmode setmode 58 | # define HAVE__SETMODE 1 59 | # endif 60 | # if HAVE__SETMODE && !HAVE__O_BINARY 61 | # define _O_BINARY 0 62 | # define HAVE__O_BINARY 1 63 | # endif 64 | #endif 65 | 66 | /** for inline **/ 67 | #ifndef INLINE 68 | # define INLINE @INLINE@ 69 | #endif 70 | 71 | /** for VC++ warning **/ 72 | #ifdef _MSC_VER 73 | #pragma warning(disable: 4127) 74 | #endif 75 | 76 | 77 | #ifdef __cplusplus 78 | } /* extern "C" */ 79 | #endif /* __cplusplus */ 80 | 81 | #endif /* _CONFIG_H */ 82 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ### cmake file for building libdivsufsort Package ### 2 | cmake_minimum_required(VERSION 2.4.4) 3 | set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") 4 | include(AppendCompilerFlags) 5 | 6 | ## Project information ## 7 | project(libdivsufsort C) 8 | set(PROJECT_VENDOR "Yuta Mori") 9 | set(PROJECT_CONTACT "yuta.256@gmail.com") 10 | set(PROJECT_URL "https://github.com/y-256/libdivsufsort") 11 | set(PROJECT_DESCRIPTION "A lightweight suffix sorting library") 12 | include(VERSION.cmake) 13 | 14 | ## CPack configuration ## 15 | set(CPACK_GENERATOR "TGZ;TBZ2;ZIP") 16 | set(CPACK_SOURCE_GENERATOR "TGZ;TBZ2;ZIP") 17 | include(ProjectCPack) 18 | 19 | ## Project options ## 20 | option(BUILD_SHARED_LIBS "Set to OFF to build static libraries" ON) 21 | option(BUILD_EXAMPLES "Build examples" ON) 22 | option(BUILD_DIVSUFSORT64 "Build libdivsufsort64" OFF) 23 | option(USE_OPENMP "Use OpenMP for parallelization" OFF) 24 | option(WITH_LFS "Enable Large File Support" ON) 25 | 26 | ## Installation directories ## 27 | set(LIB_SUFFIX "" CACHE STRING "Define suffix of directory name (32 or 64)") 28 | 29 | set(CMAKE_INSTALL_RUNTIMEDIR "" CACHE PATH "Specify the output directory for dll runtimes (default is bin)") 30 | if(NOT CMAKE_INSTALL_RUNTIMEDIR) 31 | set(CMAKE_INSTALL_RUNTIMEDIR "${CMAKE_INSTALL_PREFIX}/bin") 32 | endif(NOT CMAKE_INSTALL_RUNTIMEDIR) 33 | 34 | set(CMAKE_INSTALL_LIBDIR "" CACHE PATH "Specify the output directory for libraries (default is lib)") 35 | if(NOT CMAKE_INSTALL_LIBDIR) 36 | set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}") 37 | endif(NOT CMAKE_INSTALL_LIBDIR) 38 | 39 | set(CMAKE_INSTALL_INCLUDEDIR "" CACHE PATH "Specify the output directory for header files (default is include)") 40 | if(NOT CMAKE_INSTALL_INCLUDEDIR) 41 | set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_PREFIX}/include") 42 | endif(NOT CMAKE_INSTALL_INCLUDEDIR) 43 | 44 | set(CMAKE_INSTALL_PKGCONFIGDIR "" CACHE PATH "Specify the output directory for pkgconfig files (default is lib/pkgconfig)") 45 | if(NOT CMAKE_INSTALL_PKGCONFIGDIR) 46 | set(CMAKE_INSTALL_PKGCONFIGDIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig") 47 | endif(NOT CMAKE_INSTALL_PKGCONFIGDIR) 48 | 49 | ## Build type ## 50 | if(NOT CMAKE_BUILD_TYPE) 51 | set(CMAKE_BUILD_TYPE "Release") 52 | elseif(CMAKE_BUILD_TYPE STREQUAL "Debug") 53 | set(CMAKE_VERBOSE_MAKEFILE ON) 54 | endif(NOT CMAKE_BUILD_TYPE) 55 | 56 | ## Compiler options ## 57 | if(MSVC) 58 | append_c_compiler_flags("/W4" "VC" CMAKE_C_FLAGS) 59 | append_c_compiler_flags("/Oi;/Ot;/Ox;/Oy" "VC" CMAKE_C_FLAGS_RELEASE) 60 | if(USE_OPENMP) 61 | append_c_compiler_flags("/openmp" "VC" CMAKE_C_FLAGS) 62 | endif(USE_OPENMP) 63 | elseif(BORLAND) 64 | append_c_compiler_flags("-w" "BCC" CMAKE_C_FLAGS) 65 | append_c_compiler_flags("-Oi;-Og;-Os;-Ov;-Ox" "BCC" CMAKE_C_FLAGS_RELEASE) 66 | else(MSVC) 67 | if(CMAKE_COMPILER_IS_GNUCC) 68 | append_c_compiler_flags("-Wall" "GCC" CMAKE_C_FLAGS) 69 | append_c_compiler_flags("-fomit-frame-pointer" "GCC" CMAKE_C_FLAGS_RELEASE) 70 | if(USE_OPENMP) 71 | append_c_compiler_flags("-fopenmp" "GCC" CMAKE_C_FLAGS) 72 | endif(USE_OPENMP) 73 | else(CMAKE_COMPILER_IS_GNUCC) 74 | append_c_compiler_flags("-Wall" "UNKNOWN" CMAKE_C_FLAGS) 75 | append_c_compiler_flags("-fomit-frame-pointer" "UNKNOWN" CMAKE_C_FLAGS_RELEASE) 76 | if(USE_OPENMP) 77 | append_c_compiler_flags("-fopenmp;-openmp;-omp" "UNKNOWN" CMAKE_C_FLAGS) 78 | endif(USE_OPENMP) 79 | endif(CMAKE_COMPILER_IS_GNUCC) 80 | endif(MSVC) 81 | 82 | ## Add definitions ## 83 | add_definitions(-DHAVE_CONFIG_H=1 -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS) 84 | 85 | ## Add subdirectories ## 86 | add_subdirectory(pkgconfig) 87 | add_subdirectory(include) 88 | add_subdirectory(lib) 89 | if(BUILD_EXAMPLES) 90 | add_subdirectory(examples) 91 | endif(BUILD_EXAMPLES) 92 | 93 | ## Add 'uninstall' target ## 94 | CONFIGURE_FILE( 95 | "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/cmake_uninstall.cmake.in" 96 | "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake" 97 | IMMEDIATE @ONLY) 98 | ADD_CUSTOM_TARGET(uninstall 99 | "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake") 100 | -------------------------------------------------------------------------------- /CMakeModules/CheckLFS.cmake: -------------------------------------------------------------------------------- 1 | ## Checks for large file support ## 2 | include(CheckIncludeFile) 3 | include(CheckSymbolExists) 4 | include(CheckTypeSize) 5 | 6 | macro(check_lfs _isenable) 7 | set(LFS_OFF_T "") 8 | set(LFS_FOPEN "") 9 | set(LFS_FSEEK "") 10 | set(LFS_FTELL "") 11 | set(LFS_PRID "") 12 | 13 | if(${_isenable}) 14 | set(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}") 15 | set(CMAKE_REQUIRED_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS} 16 | -D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64 17 | -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS) 18 | 19 | check_include_file("sys/types.h" HAVE_SYS_TYPES_H) 20 | check_include_file("inttypes.h" HAVE_INTTYPES_H) 21 | check_include_file("stddef.h" HAVE_STDDEF_H) 22 | check_include_file("stdint.h" HAVE_STDINT_H) 23 | 24 | # LFS type1: 8 <= sizeof(off_t), fseeko, ftello 25 | check_type_size("off_t" SIZEOF_OFF_T) 26 | if(SIZEOF_OFF_T GREATER 7) 27 | check_symbol_exists("fseeko" "stdio.h" HAVE_FSEEKO) 28 | check_symbol_exists("ftello" "stdio.h" HAVE_FTELLO) 29 | if(HAVE_FSEEKO AND HAVE_FTELLO) 30 | set(LFS_OFF_T "off_t") 31 | set(LFS_FOPEN "fopen") 32 | set(LFS_FSEEK "fseeko") 33 | set(LFS_FTELL "ftello") 34 | check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX) 35 | if(HAVE_PRIDMAX) 36 | set(LFS_PRID "PRIdMAX") 37 | else(HAVE_PRIDMAX) 38 | check_type_size("long" SIZEOF_LONG) 39 | check_type_size("int" SIZEOF_INT) 40 | if(SIZEOF_OFF_T GREATER SIZEOF_LONG) 41 | set(LFS_PRID "\"lld\"") 42 | elseif(SIZEOF_LONG GREATER SIZEOF_INT) 43 | set(LFS_PRID "\"ld\"") 44 | else(SIZEOF_OFF_T GREATER SIZEOF_LONG) 45 | set(LFS_PRID "\"d\"") 46 | endif(SIZEOF_OFF_T GREATER SIZEOF_LONG) 47 | endif(HAVE_PRIDMAX) 48 | endif(HAVE_FSEEKO AND HAVE_FTELLO) 49 | endif(SIZEOF_OFF_T GREATER 7) 50 | 51 | # LFS type2: 8 <= sizeof(off64_t), fopen64, fseeko64, ftello64 52 | if(NOT LFS_OFF_T) 53 | check_type_size("off64_t" SIZEOF_OFF64_T) 54 | if(SIZEOF_OFF64_T GREATER 7) 55 | check_symbol_exists("fopen64" "stdio.h" HAVE_FOPEN64) 56 | check_symbol_exists("fseeko64" "stdio.h" HAVE_FSEEKO64) 57 | check_symbol_exists("ftello64" "stdio.h" HAVE_FTELLO64) 58 | if(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64) 59 | set(LFS_OFF_T "off64_t") 60 | set(LFS_FOPEN "fopen64") 61 | set(LFS_FSEEK "fseeko64") 62 | set(LFS_FTELL "ftello64") 63 | check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX) 64 | if(HAVE_PRIDMAX) 65 | set(LFS_PRID "PRIdMAX") 66 | else(HAVE_PRIDMAX) 67 | check_type_size("long" SIZEOF_LONG) 68 | check_type_size("int" SIZEOF_INT) 69 | if(SIZEOF_OFF64_T GREATER SIZEOF_LONG) 70 | set(LFS_PRID "\"lld\"") 71 | elseif(SIZEOF_LONG GREATER SIZEOF_INT) 72 | set(LFS_PRID "\"ld\"") 73 | else(SIZEOF_OFF64_T GREATER SIZEOF_LONG) 74 | set(LFS_PRID "\"d\"") 75 | endif(SIZEOF_OFF64_T GREATER SIZEOF_LONG) 76 | endif(HAVE_PRIDMAX) 77 | endif(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64) 78 | endif(SIZEOF_OFF64_T GREATER 7) 79 | endif(NOT LFS_OFF_T) 80 | 81 | # LFS type3: 8 <= sizeof(__int64), _fseeki64, _ftelli64 82 | if(NOT LFS_OFF_T) 83 | check_type_size("__int64" SIZEOF___INT64) 84 | if(SIZEOF___INT64 GREATER 7) 85 | check_symbol_exists("_fseeki64" "stdio.h" HAVE__FSEEKI64) 86 | check_symbol_exists("_ftelli64" "stdio.h" HAVE__FTELLI64) 87 | if(HAVE__FSEEKI64 AND HAVE__FTELLI64) 88 | set(LFS_OFF_T "__int64") 89 | set(LFS_FOPEN "fopen") 90 | set(LFS_FSEEK "_fseeki64") 91 | set(LFS_FTELL "_ftelli64") 92 | set(LFS_PRID "\"I64d\"") 93 | endif(HAVE__FSEEKI64 AND HAVE__FTELLI64) 94 | endif(SIZEOF___INT64 GREATER 7) 95 | endif(NOT LFS_OFF_T) 96 | 97 | set(CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}") 98 | endif(${_isenable}) 99 | 100 | if(NOT LFS_OFF_T) 101 | ## not found 102 | set(LFS_OFF_T "long") 103 | set(LFS_FOPEN "fopen") 104 | set(LFS_FSEEK "fseek") 105 | set(LFS_FTELL "ftell") 106 | set(LFS_PRID "\"ld\"") 107 | endif(NOT LFS_OFF_T) 108 | 109 | endmacro(check_lfs) 110 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # libdivsufsort 2 | 3 | libdivsufsort is a software library that implements a lightweight suffix array construction algorithm. 4 | 5 | ## News 6 | * 2015-03-21: The project has moved from [Google Code](http://code.google.com/p/libdivsufsort/) to [GitHub](https://github.com/y-256/libdivsufsort) 7 | 8 | ## Introduction 9 | This library provides a simple and an efficient C API to construct a suffix array and a Burrows-Wheeler transformed string from a given string over a constant-size alphabet. 10 | The algorithm runs in O(n log n) worst-case time using only 5n+O(1) bytes of memory space, where n is the length of 11 | the string. 12 | 13 | ## Build requirements 14 | * An ANSI C Compiler (e.g. GNU GCC) 15 | * [CMake](http://www.cmake.org/ "CMake") version 2.4.2 or newer 16 | * CMake-supported build tool 17 | 18 | ## Building on GNU/Linux 19 | 1. Get the source code from GitHub. You can either 20 | * use git to clone the repository 21 | ``` 22 | git clone https://github.com/y-256/libdivsufsort.git 23 | ``` 24 | * or download a [zip file](../../archive/master.zip) directly 25 | 2. Create a `build` directory in the package source directory. 26 | ```shell 27 | $ cd libdivsufsort 28 | $ mkdir build 29 | $ cd build 30 | ``` 31 | 3. Configure the package for your system. 32 | If you want to install to a different location, change the -DCMAKE_INSTALL_PREFIX option. 33 | ```shell 34 | $ cmake -DCMAKE_BUILD_TYPE="Release" \ 35 | -DCMAKE_INSTALL_PREFIX="/usr/local" .. 36 | ``` 37 | 4. Compile the package. 38 | ```shell 39 | $ make 40 | ``` 41 | 5. (Optional) Install the library and header files. 42 | ```shell 43 | $ sudo make install 44 | ``` 45 | 46 | ## API 47 | ```c 48 | /* Data types */ 49 | typedef int32_t saint_t; 50 | typedef int32_t saidx_t; 51 | typedef uint8_t sauchar_t; 52 | 53 | /* 54 | * Constructs the suffix array of a given string. 55 | * @param T[0..n-1] The input string. 56 | * @param SA[0..n-1] The output array or suffixes. 57 | * @param n The length of the given string. 58 | * @return 0 if no error occurred, -1 or -2 otherwise. 59 | */ 60 | saint_t 61 | divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n); 62 | 63 | /* 64 | * Constructs the burrows-wheeler transformed string of a given string. 65 | * @param T[0..n-1] The input string. 66 | * @param U[0..n-1] The output string. (can be T) 67 | * @param A[0..n-1] The temporary array. (can be NULL) 68 | * @param n The length of the given string. 69 | * @return The primary index if no error occurred, -1 or -2 otherwise. 70 | */ 71 | saidx_t 72 | divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n); 73 | ``` 74 | 75 | ## Example Usage 76 | ```c 77 | #include 78 | #include 79 | #include 80 | 81 | #include 82 | 83 | int main() { 84 | // intput data 85 | char *Text = "abracadabra"; 86 | int n = strlen(Text); 87 | int i, j; 88 | 89 | // allocate 90 | int *SA = (int *)malloc(n * sizeof(int)); 91 | 92 | // sort 93 | divsufsort((unsigned char *)Text, SA, n); 94 | 95 | // output 96 | for(i = 0; i < n; ++i) { 97 | printf("SA[%2d] = %2d: ", i, SA[i]); 98 | for(j = SA[i]; j < n; ++j) { 99 | printf("%c", Text[j]); 100 | } 101 | printf("$\n"); 102 | } 103 | 104 | // deallocate 105 | free(SA); 106 | 107 | return 0; 108 | } 109 | ``` 110 | See the [examples](examples) directory for a few other examples. 111 | 112 | ## Benchmarks 113 | See [Benchmarks](https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md) page for details. 114 | 115 | ## License 116 | libdivsufsort is released under the [MIT license](LICENSE "MIT license"). 117 | > The MIT License (MIT) 118 | > 119 | > Copyright (c) 2003 Yuta Mori All rights reserved. 120 | > 121 | > Permission is hereby granted, free of charge, to any person obtaining a copy 122 | > of this software and associated documentation files (the "Software"), to deal 123 | > in the Software without restriction, including without limitation the rights 124 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 125 | > copies of the Software, and to permit persons to whom the Software is 126 | > furnished to do so, subject to the following conditions: 127 | > 128 | > The above copyright notice and this permission notice shall be included in all 129 | > copies or substantial portions of the Software. 130 | > 131 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 132 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 133 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 134 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 135 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 136 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 137 | > SOFTWARE. 138 | 139 | ## Author 140 | * Yuta Mori 141 | -------------------------------------------------------------------------------- /examples/sasearch.c: -------------------------------------------------------------------------------- 1 | /* 2 | * sasearch.c for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #if HAVE_CONFIG_H 28 | # include "config.h" 29 | #endif 30 | #include 31 | #if HAVE_STRING_H 32 | # include 33 | #endif 34 | #if HAVE_STDLIB_H 35 | # include 36 | #endif 37 | #if HAVE_MEMORY_H 38 | # include 39 | #endif 40 | #if HAVE_STDDEF_H 41 | # include 42 | #endif 43 | #if HAVE_STRINGS_H 44 | # include 45 | #endif 46 | #if HAVE_SYS_TYPES_H 47 | # include 48 | #endif 49 | #if HAVE_IO_H && HAVE_FCNTL_H 50 | # include 51 | # include 52 | #endif 53 | #include 54 | #include "lfs.h" 55 | 56 | 57 | static 58 | void 59 | print_help(const char *progname, int status) { 60 | fprintf(stderr, 61 | "sasearch, a simple SA-based full-text search tool, version %s\n", 62 | divsufsort_version()); 63 | fprintf(stderr, "usage: %s PATTERN FILE SAFILE\n\n", progname); 64 | exit(status); 65 | } 66 | 67 | int 68 | main(int argc, const char *argv[]) { 69 | FILE *fp; 70 | const char *P; 71 | sauchar_t *T; 72 | saidx_t *SA; 73 | LFS_OFF_T n; 74 | size_t Psize; 75 | saidx_t i, size, left; 76 | 77 | if((argc == 1) || 78 | (strcmp(argv[1], "-h") == 0) || 79 | (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } 80 | if(argc != 4) { print_help(argv[0], EXIT_FAILURE); } 81 | 82 | P = argv[1]; 83 | Psize = strlen(P); 84 | 85 | /* Open a file for reading. */ 86 | #if HAVE_FOPEN_S 87 | if(fopen_s(&fp, argv[2], "rb") != 0) { 88 | #else 89 | if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) { 90 | #endif 91 | fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]); 92 | perror(NULL); 93 | exit(EXIT_FAILURE); 94 | } 95 | 96 | /* Get the file size. */ 97 | if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { 98 | n = LFS_FTELL(fp); 99 | rewind(fp); 100 | if(n < 0) { 101 | fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]); 102 | perror(NULL); 103 | exit(EXIT_FAILURE); 104 | } 105 | } else { 106 | fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]); 107 | perror(NULL); 108 | exit(EXIT_FAILURE); 109 | } 110 | 111 | /* Allocate 5n bytes of memory. */ 112 | T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); 113 | SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); 114 | if((T == NULL) || (SA == NULL)) { 115 | fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 116 | exit(EXIT_FAILURE); 117 | } 118 | 119 | /* Read n bytes of data. */ 120 | if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { 121 | fprintf(stderr, "%s: %s `%s': ", 122 | argv[0], 123 | (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", 124 | argv[2]); 125 | perror(NULL); 126 | exit(EXIT_FAILURE); 127 | } 128 | fclose(fp); 129 | 130 | /* Open the SA file for reading. */ 131 | #if HAVE_FOPEN_S 132 | if(fopen_s(&fp, argv[3], "rb") != 0) { 133 | #else 134 | if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) { 135 | #endif 136 | fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]); 137 | perror(NULL); 138 | exit(EXIT_FAILURE); 139 | } 140 | 141 | /* Read n * sizeof(saidx_t) bytes of data. */ 142 | if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) { 143 | fprintf(stderr, "%s: %s `%s': ", 144 | argv[0], 145 | (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", 146 | argv[3]); 147 | perror(NULL); 148 | exit(EXIT_FAILURE); 149 | } 150 | fclose(fp); 151 | 152 | /* Search and print */ 153 | size = sa_search(T, (saidx_t)n, 154 | (const sauchar_t *)P, (saidx_t)Psize, 155 | SA, (saidx_t)n, &left); 156 | for(i = 0; i < size; ++i) { 157 | fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]); 158 | } 159 | 160 | /* Deallocate memory. */ 161 | free(SA); 162 | free(T); 163 | 164 | return 0; 165 | } 166 | -------------------------------------------------------------------------------- /examples/suftest.c: -------------------------------------------------------------------------------- 1 | /* 2 | * suftest.c for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #if HAVE_CONFIG_H 28 | # include "config.h" 29 | #endif 30 | #include 31 | #if HAVE_STRING_H 32 | # include 33 | #endif 34 | #if HAVE_STDLIB_H 35 | # include 36 | #endif 37 | #if HAVE_MEMORY_H 38 | # include 39 | #endif 40 | #if HAVE_STDDEF_H 41 | # include 42 | #endif 43 | #if HAVE_STRINGS_H 44 | # include 45 | #endif 46 | #if HAVE_SYS_TYPES_H 47 | # include 48 | #endif 49 | #if HAVE_IO_H && HAVE_FCNTL_H 50 | # include 51 | # include 52 | #endif 53 | #include 54 | #include 55 | #include "lfs.h" 56 | 57 | 58 | static 59 | void 60 | print_help(const char *progname, int status) { 61 | fprintf(stderr, 62 | "suftest, a suffixsort tester, version %s.\n", 63 | divsufsort_version()); 64 | fprintf(stderr, "usage: %s FILE\n\n", progname); 65 | exit(status); 66 | } 67 | 68 | int 69 | main(int argc, const char *argv[]) { 70 | FILE *fp; 71 | const char *fname; 72 | sauchar_t *T; 73 | saidx_t *SA; 74 | LFS_OFF_T n; 75 | clock_t start, finish; 76 | saint_t needclose = 1; 77 | 78 | /* Check arguments. */ 79 | if((argc == 1) || 80 | (strcmp(argv[1], "-h") == 0) || 81 | (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } 82 | if(argc != 2) { print_help(argv[0], EXIT_FAILURE); } 83 | 84 | /* Open a file for reading. */ 85 | if(strcmp(argv[1], "-") != 0) { 86 | #if HAVE_FOPEN_S 87 | if(fopen_s(&fp, fname = argv[1], "rb") != 0) { 88 | #else 89 | if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { 90 | #endif 91 | fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); 92 | perror(NULL); 93 | exit(EXIT_FAILURE); 94 | } 95 | } else { 96 | #if HAVE__SETMODE && HAVE__FILENO 97 | if(_setmode(_fileno(stdin), _O_BINARY) == -1) { 98 | fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 99 | perror(NULL); 100 | exit(EXIT_FAILURE); 101 | } 102 | #endif 103 | fp = stdin; 104 | fname = "stdin"; 105 | needclose = 0; 106 | } 107 | 108 | /* Get the file size. */ 109 | if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { 110 | n = LFS_FTELL(fp); 111 | rewind(fp); 112 | if(n < 0) { 113 | fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); 114 | perror(NULL); 115 | exit(EXIT_FAILURE); 116 | } 117 | if(0x7fffffff <= n) { 118 | fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname); 119 | exit(EXIT_FAILURE); 120 | } 121 | } else { 122 | fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname); 123 | perror(NULL); 124 | exit(EXIT_FAILURE); 125 | } 126 | 127 | /* Allocate 5n bytes of memory. */ 128 | T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); 129 | SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); 130 | if((T == NULL) || (SA == NULL)) { 131 | fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 132 | exit(EXIT_FAILURE); 133 | } 134 | 135 | /* Read n bytes of data. */ 136 | if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { 137 | fprintf(stderr, "%s: %s `%s': ", 138 | argv[0], 139 | (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", 140 | argv[1]); 141 | perror(NULL); 142 | exit(EXIT_FAILURE); 143 | } 144 | if(needclose & 1) { fclose(fp); } 145 | 146 | /* Construct the suffix array. */ 147 | fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n); 148 | start = clock(); 149 | if(divsufsort(T, SA, (saidx_t)n) != 0) { 150 | fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 151 | exit(EXIT_FAILURE); 152 | } 153 | finish = clock(); 154 | fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC); 155 | 156 | /* Check the suffix array. */ 157 | if(sufcheck(T, SA, (saidx_t)n, 1) != 0) { exit(EXIT_FAILURE); } 158 | 159 | /* Deallocate memory. */ 160 | free(SA); 161 | free(T); 162 | 163 | return 0; 164 | } 165 | -------------------------------------------------------------------------------- /examples/mksary.c: -------------------------------------------------------------------------------- 1 | /* 2 | * mksary.c for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #if HAVE_CONFIG_H 28 | # include "config.h" 29 | #endif 30 | #include 31 | #if HAVE_STRING_H 32 | # include 33 | #endif 34 | #if HAVE_STDLIB_H 35 | # include 36 | #endif 37 | #if HAVE_MEMORY_H 38 | # include 39 | #endif 40 | #if HAVE_STDDEF_H 41 | # include 42 | #endif 43 | #if HAVE_STRINGS_H 44 | # include 45 | #endif 46 | #if HAVE_SYS_TYPES_H 47 | # include 48 | #endif 49 | #if HAVE_IO_H && HAVE_FCNTL_H 50 | # include 51 | # include 52 | #endif 53 | #include 54 | #include 55 | #include "lfs.h" 56 | 57 | 58 | static 59 | void 60 | print_help(const char *progname, int status) { 61 | fprintf(stderr, 62 | "mksary, a simple suffix array builder, version %s.\n", 63 | divsufsort_version()); 64 | fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname); 65 | exit(status); 66 | } 67 | 68 | int 69 | main(int argc, const char *argv[]) { 70 | FILE *fp, *ofp; 71 | const char *fname, *ofname; 72 | sauchar_t *T; 73 | saidx_t *SA; 74 | LFS_OFF_T n; 75 | clock_t start, finish; 76 | saint_t needclose = 3; 77 | 78 | /* Check arguments. */ 79 | if((argc == 1) || 80 | (strcmp(argv[1], "-h") == 0) || 81 | (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } 82 | if(argc != 3) { print_help(argv[0], EXIT_FAILURE); } 83 | 84 | /* Open a file for reading. */ 85 | if(strcmp(argv[1], "-") != 0) { 86 | #if HAVE_FOPEN_S 87 | if(fopen_s(&fp, fname = argv[1], "rb") != 0) { 88 | #else 89 | if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { 90 | #endif 91 | fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); 92 | perror(NULL); 93 | exit(EXIT_FAILURE); 94 | } 95 | } else { 96 | #if HAVE__SETMODE && HAVE__FILENO 97 | if(_setmode(_fileno(stdin), _O_BINARY) == -1) { 98 | fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 99 | perror(NULL); 100 | exit(EXIT_FAILURE); 101 | } 102 | #endif 103 | fp = stdin; 104 | fname = "stdin"; 105 | needclose ^= 1; 106 | } 107 | 108 | /* Open a file for writing. */ 109 | if(strcmp(argv[2], "-") != 0) { 110 | #if HAVE_FOPEN_S 111 | if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) { 112 | #else 113 | if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) { 114 | #endif 115 | fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); 116 | perror(NULL); 117 | exit(EXIT_FAILURE); 118 | } 119 | } else { 120 | #if HAVE__SETMODE && HAVE__FILENO 121 | if(_setmode(_fileno(stdout), _O_BINARY) == -1) { 122 | fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 123 | perror(NULL); 124 | exit(EXIT_FAILURE); 125 | } 126 | #endif 127 | ofp = stdout; 128 | ofname = "stdout"; 129 | needclose ^= 2; 130 | } 131 | 132 | /* Get the file size. */ 133 | if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { 134 | n = LFS_FTELL(fp); 135 | rewind(fp); 136 | if(n < 0) { 137 | fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); 138 | perror(NULL); 139 | exit(EXIT_FAILURE); 140 | } 141 | if(0x7fffffff <= n) { 142 | fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname); 143 | exit(EXIT_FAILURE); 144 | } 145 | } else { 146 | fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname); 147 | perror(NULL); 148 | exit(EXIT_FAILURE); 149 | } 150 | 151 | /* Allocate 5blocksize bytes of memory. */ 152 | T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); 153 | SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); 154 | if((T == NULL) || (SA == NULL)) { 155 | fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 156 | exit(EXIT_FAILURE); 157 | } 158 | 159 | /* Read n bytes of data. */ 160 | if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { 161 | fprintf(stderr, "%s: %s `%s': ", 162 | argv[0], 163 | (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", 164 | fname); 165 | perror(NULL); 166 | exit(EXIT_FAILURE); 167 | } 168 | if(needclose & 1) { fclose(fp); } 169 | 170 | /* Construct the suffix array. */ 171 | fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n); 172 | start = clock(); 173 | if(divsufsort(T, SA, (saidx_t)n) != 0) { 174 | fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 175 | exit(EXIT_FAILURE); 176 | } 177 | finish = clock(); 178 | fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC); 179 | 180 | /* Write the suffix array. */ 181 | if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) { 182 | fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); 183 | perror(NULL); 184 | exit(EXIT_FAILURE); 185 | } 186 | if(needclose & 2) { fclose(ofp); } 187 | 188 | /* Deallocate memory. */ 189 | free(SA); 190 | free(T); 191 | 192 | return 0; 193 | } 194 | -------------------------------------------------------------------------------- /include/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(CheckIncludeFiles) 2 | include(CheckIncludeFile) 3 | include(CheckSymbolExists) 4 | include(CheckTypeSize) 5 | include(CheckFunctionKeywords) 6 | include(CheckLFS) 7 | 8 | ## Checks for header files ## 9 | check_include_file("inttypes.h" HAVE_INTTYPES_H) 10 | check_include_file("memory.h" HAVE_MEMORY_H) 11 | check_include_file("stddef.h" HAVE_STDDEF_H) 12 | check_include_file("stdint.h" HAVE_STDINT_H) 13 | check_include_file("stdlib.h" HAVE_STDLIB_H) 14 | check_include_file("string.h" HAVE_STRING_H) 15 | check_include_file("strings.h" HAVE_STRINGS_H) 16 | check_include_file("sys/types.h" HAVE_SYS_TYPES_H) 17 | if(HAVE_INTTYPES_H) 18 | set(INCFILE "#include ") 19 | elseif(HAVE_STDINT_H) 20 | set(INCFILE "#include ") 21 | else(HAVE_INTTYPES_H) 22 | set(INCFILE "") 23 | endif(HAVE_INTTYPES_H) 24 | 25 | ## create configuration files from .cmake file ## 26 | if(BUILD_EXAMPLES) 27 | ## Checks for WinIO ## 28 | if(WIN32) 29 | check_include_file("io.h" HAVE_IO_H) 30 | check_include_file("fcntl.h" HAVE_FCNTL_H) 31 | check_symbol_exists("_setmode" "io.h;fcntl.h" HAVE__SETMODE) 32 | if(NOT HAVE__SETMODE) 33 | check_symbol_exists("setmode" "io.h;fcntl.h" HAVE_SETMODE) 34 | endif(NOT HAVE__SETMODE) 35 | check_symbol_exists("_fileno" "stdio.h" HAVE__FILENO) 36 | check_symbol_exists("fopen_s" "stdio.h" HAVE_FOPEN_S) 37 | check_symbol_exists("_O_BINARY" "fcntl.h" HAVE__O_BINARY) 38 | endif(WIN32) 39 | 40 | ## Checks for large file support ## 41 | check_lfs(WITH_LFS) 42 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lfs.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/lfs.h" @ONLY) 43 | endif(BUILD_EXAMPLES) 44 | 45 | ## generate config.h ## 46 | check_function_keywords("inline;__inline;__inline__;__declspec(dllexport);__declspec(dllimport)") 47 | if(HAVE_INLINE) 48 | set(INLINE "inline") 49 | elseif(HAVE___INLINE) 50 | set(INLINE "__inline") 51 | elseif(HAVE___INLINE__) 52 | set(INLINE "__inline__") 53 | else(HAVE_INLINE) 54 | set(INLINE "") 55 | endif(HAVE_INLINE) 56 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/config.h") 57 | 58 | ## Checks for types ## 59 | # sauchar_t (8bit) 60 | check_type_size("uint8_t" UINT8_T) 61 | if(HAVE_UINT8_T) 62 | set(SAUCHAR_TYPE "uint8_t") 63 | else(HAVE_UINT8_T) 64 | check_type_size("unsigned char" SIZEOF_UNSIGNED_CHAR) 65 | if("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1") 66 | set(SAUCHAR_TYPE "unsigned char") 67 | else("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1") 68 | message(FATAL_ERROR "Cannot find unsigned 8-bit integer type") 69 | endif("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1") 70 | endif(HAVE_UINT8_T) 71 | # saint_t (32bit) 72 | check_type_size("int32_t" INT32_T) 73 | if(HAVE_INT32_T) 74 | set(SAINT32_TYPE "int32_t") 75 | check_symbol_exists("PRId32" "inttypes.h" HAVE_PRID32) 76 | if(HAVE_PRID32) 77 | set(SAINT32_PRId "PRId32") 78 | else(HAVE_PRID32) 79 | set(SAINT32_PRId "\"d\"") 80 | endif(HAVE_PRID32) 81 | else(HAVE_INT32_T) 82 | check_type_size("int" SIZEOF_INT) 83 | check_type_size("long" SIZEOF_LONG) 84 | check_type_size("short" SIZEOF_SHORT) 85 | check_type_size("__int32" SIZEOF___INT32) 86 | if("${SIZEOF_INT}" STREQUAL "4") 87 | set(SAINT32_TYPE "int") 88 | set(SAINT32_PRId "\"d\"") 89 | elseif("${SIZEOF_LONG}" STREQUAL "4") 90 | set(SAINT32_TYPE "long") 91 | set(SAINT32_PRId "\"ld\"") 92 | elseif("${SIZEOF_SHORT}" STREQUAL "4") 93 | set(SAINT32_TYPE "short") 94 | set(SAINT32_PRId "\"d\"") 95 | elseif("${SIZEOF___INT32}" STREQUAL "4") 96 | set(SAINT32_TYPE "__int32") 97 | set(SAINT32_PRId "\"d\"") 98 | else("${SIZEOF_INT}" STREQUAL "4") 99 | message(FATAL_ERROR "Cannot find 32-bit integer type") 100 | endif("${SIZEOF_INT}" STREQUAL "4") 101 | endif(HAVE_INT32_T) 102 | # saint64_t (64bit) 103 | if(BUILD_DIVSUFSORT64) 104 | check_type_size("int64_t" INT64_T) 105 | if(HAVE_INT64_T) 106 | set(SAINT64_TYPE "int64_t") 107 | check_symbol_exists("PRId64" "inttypes.h" HAVE_PRID64) 108 | if(HAVE_PRID64) 109 | set(SAINT64_PRId "PRId64") 110 | else(HAVE_PRID64) 111 | set(SAINT64_PRId "\"lld\"") 112 | endif(HAVE_PRID64) 113 | else(HAVE_INT64_T) 114 | check_type_size("int" SIZEOF_INT) 115 | check_type_size("long" SIZEOF_LONG) 116 | check_type_size("long long" SIZEOF_LONG_LONG) 117 | check_type_size("__int64" SIZEOF___INT64) 118 | if("${SIZEOF_INT}" STREQUAL "8") 119 | set(SAINT64_TYPE "int") 120 | set(SAINT64_PRId "\"d\"") 121 | elseif("${SIZEOF_LONG}" STREQUAL "8") 122 | set(SAINT64_TYPE "long") 123 | set(SAINT64_PRId "\"ld\"") 124 | elseif("${SIZEOF_LONG_LONG}" STREQUAL "8") 125 | set(SAINT64_TYPE "long long") 126 | set(SAINT64_PRId "\"lld\"") 127 | elseif("${SIZEOF___INT64}" STREQUAL "8") 128 | set(SAINT64_TYPE "__int64") 129 | set(SAINT64_PRId "\"I64d\"") 130 | else("${SIZEOF_INT}" STREQUAL "8") 131 | message(SEND_ERROR "Cannot find 64-bit integer type") 132 | set(BUILD_DIVSUFSORT64 OFF) 133 | endif("${SIZEOF_INT}" STREQUAL "8") 134 | endif(HAVE_INT64_T) 135 | endif(BUILD_DIVSUFSORT64) 136 | 137 | ## generate divsufsort.h ## 138 | set(DIVSUFSORT_IMPORT "") 139 | set(DIVSUFSORT_EXPORT "") 140 | if(BUILD_SHARED_LIBS) 141 | if(HAVE___DECLSPEC_DLLIMPORT_) 142 | set(DIVSUFSORT_IMPORT "__declspec(dllimport)") 143 | endif(HAVE___DECLSPEC_DLLIMPORT_) 144 | if(HAVE___DECLSPEC_DLLEXPORT_) 145 | set(DIVSUFSORT_EXPORT "__declspec(dllexport)") 146 | endif(HAVE___DECLSPEC_DLLEXPORT_) 147 | endif(BUILD_SHARED_LIBS) 148 | set(W64BIT "") 149 | set(SAINDEX_TYPE "${SAINT32_TYPE}") 150 | set(SAINDEX_PRId "${SAINT32_PRId}") 151 | set(SAINT_PRId "${SAINT32_PRId}") 152 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake" 153 | "${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" @ONLY) 154 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 155 | if(BUILD_DIVSUFSORT64) 156 | set(W64BIT "64") 157 | set(SAINDEX_TYPE "${SAINT64_TYPE}") 158 | set(SAINDEX_PRId "${SAINT64_PRId}") 159 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake" 160 | "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" @ONLY) 161 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 162 | endif(BUILD_DIVSUFSORT64) 163 | -------------------------------------------------------------------------------- /examples/unbwt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * unbwt.c for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #if HAVE_CONFIG_H 28 | # include "config.h" 29 | #endif 30 | #include 31 | #if HAVE_STRING_H 32 | # include 33 | #endif 34 | #if HAVE_STDLIB_H 35 | # include 36 | #endif 37 | #if HAVE_MEMORY_H 38 | # include 39 | #endif 40 | #if HAVE_STDDEF_H 41 | # include 42 | #endif 43 | #if HAVE_STRINGS_H 44 | # include 45 | #endif 46 | #if HAVE_SYS_TYPES_H 47 | # include 48 | #endif 49 | #if HAVE_IO_H && HAVE_FCNTL_H 50 | # include 51 | # include 52 | #endif 53 | #include 54 | #include 55 | #include "lfs.h" 56 | 57 | 58 | static 59 | size_t 60 | read_int(FILE *fp, saidx_t *n) { 61 | unsigned char c[4]; 62 | size_t m = fread(c, sizeof(unsigned char), 4, fp); 63 | if(m == 4) { 64 | *n = (c[0] << 0) | (c[1] << 8) | 65 | (c[2] << 16) | (c[3] << 24); 66 | } 67 | return m; 68 | } 69 | 70 | static 71 | void 72 | print_help(const char *progname, int status) { 73 | fprintf(stderr, 74 | "unbwt, an inverse burrows-wheeler transform program, version %s.\n", 75 | divsufsort_version()); 76 | fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname); 77 | exit(status); 78 | } 79 | 80 | int 81 | main(int argc, const char *argv[]) { 82 | FILE *fp, *ofp; 83 | const char *fname, *ofname; 84 | sauchar_t *T; 85 | saidx_t *A; 86 | LFS_OFF_T n; 87 | size_t m; 88 | saidx_t pidx; 89 | clock_t start, finish; 90 | saint_t err, blocksize, needclose = 3; 91 | 92 | /* Check arguments. */ 93 | if((argc == 1) || 94 | (strcmp(argv[1], "-h") == 0) || 95 | (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } 96 | if(argc != 3) { print_help(argv[0], EXIT_FAILURE); } 97 | 98 | /* Open a file for reading. */ 99 | if(strcmp(argv[1], "-") != 0) { 100 | #if HAVE_FOPEN_S 101 | if(fopen_s(&fp, fname = argv[1], "rb") != 0) { 102 | #else 103 | if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { 104 | #endif 105 | fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); 106 | perror(NULL); 107 | exit(EXIT_FAILURE); 108 | } 109 | } else { 110 | #if HAVE__SETMODE && HAVE__FILENO 111 | if(_setmode(_fileno(stdin), _O_BINARY) == -1) { 112 | fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 113 | perror(NULL); 114 | exit(EXIT_FAILURE); 115 | } 116 | #endif 117 | fp = stdin; 118 | fname = "stdin"; 119 | needclose ^= 1; 120 | } 121 | 122 | /* Open a file for writing. */ 123 | if(strcmp(argv[2], "-") != 0) { 124 | #if HAVE_FOPEN_S 125 | if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) { 126 | #else 127 | if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) { 128 | #endif 129 | fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); 130 | perror(NULL); 131 | exit(EXIT_FAILURE); 132 | } 133 | } else { 134 | #if HAVE__SETMODE && HAVE__FILENO 135 | if(_setmode(_fileno(stdout), _O_BINARY) == -1) { 136 | fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 137 | perror(NULL); 138 | exit(EXIT_FAILURE); 139 | } 140 | #endif 141 | ofp = stdout; 142 | ofname = "stdout"; 143 | needclose ^= 2; 144 | } 145 | 146 | /* Read the blocksize. */ 147 | if(read_int(fp, &blocksize) != 4) { 148 | fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname); 149 | perror(NULL); 150 | exit(EXIT_FAILURE); 151 | } 152 | 153 | /* Allocate 5blocksize bytes of memory. */ 154 | T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t)); 155 | A = (saidx_t *)malloc(blocksize * sizeof(saidx_t)); 156 | if((T == NULL) || (A == NULL)) { 157 | fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 158 | exit(EXIT_FAILURE); 159 | } 160 | 161 | fprintf(stderr, "UnBWT (blocksize %" PRIdSAINT_T ") ... ", blocksize); 162 | start = clock(); 163 | for(n = 0; (m = read_int(fp, &pidx)) != 0; n += m) { 164 | /* Read blocksize bytes of data. */ 165 | if((m != 4) || ((m = fread(T, sizeof(sauchar_t), blocksize, fp)) == 0)) { 166 | fprintf(stderr, "%s: %s `%s': ", 167 | argv[0], 168 | (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", 169 | fname); 170 | perror(NULL); 171 | exit(EXIT_FAILURE); 172 | } 173 | 174 | /* Inverse Burrows-Wheeler Transform. */ 175 | if((err = inverse_bw_transform(T, T, A, m, pidx)) != 0) { 176 | fprintf(stderr, "%s (reverseBWT): %s.\n", 177 | argv[0], 178 | (err == -1) ? "Invalid data" : "Cannot allocate memory"); 179 | exit(EXIT_FAILURE); 180 | } 181 | 182 | /* Write m bytes of data. */ 183 | if(fwrite(T, sizeof(sauchar_t), m, ofp) != m) { 184 | fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); 185 | perror(NULL); 186 | exit(EXIT_FAILURE); 187 | } 188 | } 189 | if(ferror(fp)) { 190 | fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname); 191 | perror(NULL); 192 | exit(EXIT_FAILURE); 193 | } 194 | finish = clock(); 195 | fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n", 196 | n, (double)(finish - start) / (double)CLOCKS_PER_SEC); 197 | 198 | /* Close files */ 199 | if(needclose & 1) { fclose(fp); } 200 | if(needclose & 2) { fclose(ofp); } 201 | 202 | /* Deallocate memory. */ 203 | free(A); 204 | free(T); 205 | 206 | return 0; 207 | } 208 | -------------------------------------------------------------------------------- /include/divsufsort_private.h: -------------------------------------------------------------------------------- 1 | /* 2 | * divsufsort_private.h for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _DIVSUFSORT_PRIVATE_H 28 | #define _DIVSUFSORT_PRIVATE_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | #if HAVE_CONFIG_H 35 | # include "config.h" 36 | #endif 37 | #include 38 | #include 39 | #if HAVE_STRING_H 40 | # include 41 | #endif 42 | #if HAVE_STDLIB_H 43 | # include 44 | #endif 45 | #if HAVE_MEMORY_H 46 | # include 47 | #endif 48 | #if HAVE_STDDEF_H 49 | # include 50 | #endif 51 | #if HAVE_STRINGS_H 52 | # include 53 | #endif 54 | #if HAVE_INTTYPES_H 55 | # include 56 | #else 57 | # if HAVE_STDINT_H 58 | # include 59 | # endif 60 | #endif 61 | #if defined(BUILD_DIVSUFSORT64) 62 | # include "divsufsort64.h" 63 | # ifndef SAIDX_T 64 | # define SAIDX_T 65 | # define saidx_t saidx64_t 66 | # endif /* SAIDX_T */ 67 | # ifndef PRIdSAIDX_T 68 | # define PRIdSAIDX_T PRIdSAIDX64_T 69 | # endif /* PRIdSAIDX_T */ 70 | # define divsufsort divsufsort64 71 | # define divbwt divbwt64 72 | # define divsufsort_version divsufsort64_version 73 | # define bw_transform bw_transform64 74 | # define inverse_bw_transform inverse_bw_transform64 75 | # define sufcheck sufcheck64 76 | # define sa_search sa_search64 77 | # define sa_simplesearch sa_simplesearch64 78 | # define sssort sssort64 79 | # define trsort trsort64 80 | #else 81 | # include "divsufsort.h" 82 | #endif 83 | 84 | 85 | /*- Constants -*/ 86 | #if !defined(UINT8_MAX) 87 | # define UINT8_MAX (255) 88 | #endif /* UINT8_MAX */ 89 | #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) 90 | # undef ALPHABET_SIZE 91 | #endif 92 | #if !defined(ALPHABET_SIZE) 93 | # define ALPHABET_SIZE (UINT8_MAX + 1) 94 | #endif 95 | /* for divsufsort.c */ 96 | #define BUCKET_A_SIZE (ALPHABET_SIZE) 97 | #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) 98 | /* for sssort.c */ 99 | #if defined(SS_INSERTIONSORT_THRESHOLD) 100 | # if SS_INSERTIONSORT_THRESHOLD < 1 101 | # undef SS_INSERTIONSORT_THRESHOLD 102 | # define SS_INSERTIONSORT_THRESHOLD (1) 103 | # endif 104 | #else 105 | # define SS_INSERTIONSORT_THRESHOLD (8) 106 | #endif 107 | #if defined(SS_BLOCKSIZE) 108 | # if SS_BLOCKSIZE < 0 109 | # undef SS_BLOCKSIZE 110 | # define SS_BLOCKSIZE (0) 111 | # elif 32768 <= SS_BLOCKSIZE 112 | # undef SS_BLOCKSIZE 113 | # define SS_BLOCKSIZE (32767) 114 | # endif 115 | #else 116 | # define SS_BLOCKSIZE (1024) 117 | #endif 118 | /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ 119 | #if SS_BLOCKSIZE == 0 120 | # if defined(BUILD_DIVSUFSORT64) 121 | # define SS_MISORT_STACKSIZE (96) 122 | # else 123 | # define SS_MISORT_STACKSIZE (64) 124 | # endif 125 | #elif SS_BLOCKSIZE <= 4096 126 | # define SS_MISORT_STACKSIZE (16) 127 | #else 128 | # define SS_MISORT_STACKSIZE (24) 129 | #endif 130 | #if defined(BUILD_DIVSUFSORT64) 131 | # define SS_SMERGE_STACKSIZE (64) 132 | #else 133 | # define SS_SMERGE_STACKSIZE (32) 134 | #endif 135 | /* for trsort.c */ 136 | #define TR_INSERTIONSORT_THRESHOLD (8) 137 | #if defined(BUILD_DIVSUFSORT64) 138 | # define TR_STACKSIZE (96) 139 | #else 140 | # define TR_STACKSIZE (64) 141 | #endif 142 | 143 | 144 | /*- Macros -*/ 145 | #ifndef SWAP 146 | # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) 147 | #endif /* SWAP */ 148 | #ifndef MIN 149 | # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) 150 | #endif /* MIN */ 151 | #ifndef MAX 152 | # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) 153 | #endif /* MAX */ 154 | #define STACK_PUSH(_a, _b, _c, _d)\ 155 | do {\ 156 | assert(ssize < STACK_SIZE);\ 157 | stack[ssize].a = (_a), stack[ssize].b = (_b),\ 158 | stack[ssize].c = (_c), stack[ssize++].d = (_d);\ 159 | } while(0) 160 | #define STACK_PUSH5(_a, _b, _c, _d, _e)\ 161 | do {\ 162 | assert(ssize < STACK_SIZE);\ 163 | stack[ssize].a = (_a), stack[ssize].b = (_b),\ 164 | stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ 165 | } while(0) 166 | #define STACK_POP(_a, _b, _c, _d)\ 167 | do {\ 168 | assert(0 <= ssize);\ 169 | if(ssize == 0) { return; }\ 170 | (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ 171 | (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ 172 | } while(0) 173 | #define STACK_POP5(_a, _b, _c, _d, _e)\ 174 | do {\ 175 | assert(0 <= ssize);\ 176 | if(ssize == 0) { return; }\ 177 | (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ 178 | (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ 179 | } while(0) 180 | /* for divsufsort.c */ 181 | #define BUCKET_A(_c0) bucket_A[(_c0)] 182 | #if ALPHABET_SIZE == 256 183 | #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) 184 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) 185 | #else 186 | #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) 187 | #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) 188 | #endif 189 | 190 | 191 | /*- Private Prototypes -*/ 192 | /* sssort.c */ 193 | void 194 | sssort(const sauchar_t *Td, const saidx_t *PA, 195 | saidx_t *first, saidx_t *last, 196 | saidx_t *buf, saidx_t bufsize, 197 | saidx_t depth, saidx_t n, saint_t lastsuffix); 198 | /* trsort.c */ 199 | void 200 | trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth); 201 | 202 | 203 | #ifdef __cplusplus 204 | } /* extern "C" */ 205 | #endif /* __cplusplus */ 206 | 207 | #endif /* _DIVSUFSORT_PRIVATE_H */ 208 | -------------------------------------------------------------------------------- /include/divsufsort.h.cmake: -------------------------------------------------------------------------------- 1 | /* 2 | * divsufsort@W64BIT@.h for libdivsufsort@W64BIT@ 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _DIVSUFSORT@W64BIT@_H 28 | #define _DIVSUFSORT@W64BIT@_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | @INCFILE@ 35 | 36 | #ifndef DIVSUFSORT_API 37 | # ifdef DIVSUFSORT_BUILD_DLL 38 | # define DIVSUFSORT_API @DIVSUFSORT_EXPORT@ 39 | # else 40 | # define DIVSUFSORT_API @DIVSUFSORT_IMPORT@ 41 | # endif 42 | #endif 43 | 44 | /*- Datatypes -*/ 45 | #ifndef SAUCHAR_T 46 | #define SAUCHAR_T 47 | typedef @SAUCHAR_TYPE@ sauchar_t; 48 | #endif /* SAUCHAR_T */ 49 | #ifndef SAINT_T 50 | #define SAINT_T 51 | typedef @SAINT32_TYPE@ saint_t; 52 | #endif /* SAINT_T */ 53 | #ifndef SAIDX@W64BIT@_T 54 | #define SAIDX@W64BIT@_T 55 | typedef @SAINDEX_TYPE@ saidx@W64BIT@_t; 56 | #endif /* SAIDX@W64BIT@_T */ 57 | #ifndef PRIdSAINT_T 58 | #define PRIdSAINT_T @SAINT_PRId@ 59 | #endif /* PRIdSAINT_T */ 60 | #ifndef PRIdSAIDX@W64BIT@_T 61 | #define PRIdSAIDX@W64BIT@_T @SAINDEX_PRId@ 62 | #endif /* PRIdSAIDX@W64BIT@_T */ 63 | 64 | 65 | /*- Prototypes -*/ 66 | 67 | /** 68 | * Constructs the suffix array of a given string. 69 | * @param T[0..n-1] The input string. 70 | * @param SA[0..n-1] The output array of suffixes. 71 | * @param n The length of the given string. 72 | * @return 0 if no error occurred, -1 or -2 otherwise. 73 | */ 74 | DIVSUFSORT_API 75 | saint_t 76 | divsufsort@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t *SA, saidx@W64BIT@_t n); 77 | 78 | /** 79 | * Constructs the burrows-wheeler transformed string of a given string. 80 | * @param T[0..n-1] The input string. 81 | * @param U[0..n-1] The output string. (can be T) 82 | * @param A[0..n-1] The temporary array. (can be NULL) 83 | * @param n The length of the given string. 84 | * @return The primary index if no error occurred, -1 or -2 otherwise. 85 | */ 86 | DIVSUFSORT_API 87 | saidx@W64BIT@_t 88 | divbwt@W64BIT@(const sauchar_t *T, sauchar_t *U, saidx@W64BIT@_t *A, saidx@W64BIT@_t n); 89 | 90 | /** 91 | * Returns the version of the divsufsort library. 92 | * @return The version number string. 93 | */ 94 | DIVSUFSORT_API 95 | const char * 96 | divsufsort@W64BIT@_version(void); 97 | 98 | 99 | /** 100 | * Constructs the burrows-wheeler transformed string of a given string and suffix array. 101 | * @param T[0..n-1] The input string. 102 | * @param U[0..n-1] The output string. (can be T) 103 | * @param SA[0..n-1] The suffix array. (can be NULL) 104 | * @param n The length of the given string. 105 | * @param idx The output primary index. 106 | * @return 0 if no error occurred, -1 or -2 otherwise. 107 | */ 108 | DIVSUFSORT_API 109 | saint_t 110 | bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U, 111 | saidx@W64BIT@_t *SA /* can NULL */, 112 | saidx@W64BIT@_t n, saidx@W64BIT@_t *idx); 113 | 114 | /** 115 | * Inverse BW-transforms a given BWTed string. 116 | * @param T[0..n-1] The input string. 117 | * @param U[0..n-1] The output string. (can be T) 118 | * @param A[0..n-1] The temporary array. (can be NULL) 119 | * @param n The length of the given string. 120 | * @param idx The primary index. 121 | * @return 0 if no error occurred, -1 or -2 otherwise. 122 | */ 123 | DIVSUFSORT_API 124 | saint_t 125 | inverse_bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U, 126 | saidx@W64BIT@_t *A /* can NULL */, 127 | saidx@W64BIT@_t n, saidx@W64BIT@_t idx); 128 | 129 | /** 130 | * Checks the correctness of a given suffix array. 131 | * @param T[0..n-1] The input string. 132 | * @param SA[0..n-1] The input suffix array. 133 | * @param n The length of the given string. 134 | * @param verbose The verbose mode. 135 | * @return 0 if no error occurred. 136 | */ 137 | DIVSUFSORT_API 138 | saint_t 139 | sufcheck@W64BIT@(const sauchar_t *T, const saidx@W64BIT@_t *SA, saidx@W64BIT@_t n, saint_t verbose); 140 | 141 | /** 142 | * Search for the pattern P in the string T. 143 | * @param T[0..Tsize-1] The input string. 144 | * @param Tsize The length of the given string. 145 | * @param P[0..Psize-1] The input pattern string. 146 | * @param Psize The length of the given pattern string. 147 | * @param SA[0..SAsize-1] The input suffix array. 148 | * @param SAsize The length of the given suffix array. 149 | * @param idx The output index. 150 | * @return The count of matches if no error occurred, -1 otherwise. 151 | */ 152 | DIVSUFSORT_API 153 | saidx@W64BIT@_t 154 | sa_search@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize, 155 | const sauchar_t *P, saidx@W64BIT@_t Psize, 156 | const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize, 157 | saidx@W64BIT@_t *left); 158 | 159 | /** 160 | * Search for the character c in the string T. 161 | * @param T[0..Tsize-1] The input string. 162 | * @param Tsize The length of the given string. 163 | * @param SA[0..SAsize-1] The input suffix array. 164 | * @param SAsize The length of the given suffix array. 165 | * @param c The input character. 166 | * @param idx The output index. 167 | * @return The count of matches if no error occurred, -1 otherwise. 168 | */ 169 | DIVSUFSORT_API 170 | saidx@W64BIT@_t 171 | sa_simplesearch@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize, 172 | const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize, 173 | saint_t c, saidx@W64BIT@_t *left); 174 | 175 | 176 | #ifdef __cplusplus 177 | } /* extern "C" */ 178 | #endif /* __cplusplus */ 179 | 180 | #endif /* _DIVSUFSORT@W64BIT@_H */ 181 | -------------------------------------------------------------------------------- /examples/bwt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * bwt.c for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #if HAVE_CONFIG_H 28 | # include "config.h" 29 | #endif 30 | #include 31 | #if HAVE_STRING_H 32 | # include 33 | #endif 34 | #if HAVE_STDLIB_H 35 | # include 36 | #endif 37 | #if HAVE_MEMORY_H 38 | # include 39 | #endif 40 | #if HAVE_STDDEF_H 41 | # include 42 | #endif 43 | #if HAVE_STRINGS_H 44 | # include 45 | #endif 46 | #if HAVE_SYS_TYPES_H 47 | # include 48 | #endif 49 | #if HAVE_IO_H && HAVE_FCNTL_H 50 | # include 51 | # include 52 | #endif 53 | #include 54 | #include 55 | #include "lfs.h" 56 | 57 | 58 | static 59 | size_t 60 | write_int(FILE *fp, saidx_t n) { 61 | unsigned char c[4]; 62 | c[0] = (unsigned char)((n >> 0) & 0xff), c[1] = (unsigned char)((n >> 8) & 0xff), 63 | c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff); 64 | return fwrite(c, sizeof(unsigned char), 4, fp); 65 | } 66 | 67 | static 68 | void 69 | print_help(const char *progname, int status) { 70 | fprintf(stderr, 71 | "bwt, a burrows-wheeler transform program, version %s.\n", 72 | divsufsort_version()); 73 | fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname); 74 | fprintf(stderr, " -b num set block size to num MiB [1..512] (default: 32)\n\n"); 75 | exit(status); 76 | } 77 | 78 | int 79 | main(int argc, const char *argv[]) { 80 | FILE *fp, *ofp; 81 | const char *fname, *ofname; 82 | sauchar_t *T; 83 | saidx_t *SA; 84 | LFS_OFF_T n; 85 | size_t m; 86 | saidx_t pidx; 87 | clock_t start,finish; 88 | saint_t i, blocksize = 32, needclose = 3; 89 | 90 | /* Check arguments. */ 91 | if((argc == 1) || 92 | (strcmp(argv[1], "-h") == 0) || 93 | (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } 94 | if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); } 95 | i = 1; 96 | if(argc == 5) { 97 | if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); } 98 | blocksize = atoi(argv[i + 1]); 99 | if(blocksize < 0) { blocksize = 1; } 100 | else if(512 < blocksize) { blocksize = 512; } 101 | i += 2; 102 | } 103 | blocksize <<= 20; 104 | 105 | /* Open a file for reading. */ 106 | if(strcmp(argv[i], "-") != 0) { 107 | #if HAVE_FOPEN_S 108 | if(fopen_s(&fp, fname = argv[i], "rb") != 0) { 109 | #else 110 | if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) { 111 | #endif 112 | fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); 113 | perror(NULL); 114 | exit(EXIT_FAILURE); 115 | } 116 | } else { 117 | #if HAVE__SETMODE && HAVE__FILENO 118 | if(_setmode(_fileno(stdin), _O_BINARY) == -1) { 119 | fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 120 | perror(NULL); 121 | exit(EXIT_FAILURE); 122 | } 123 | #endif 124 | fp = stdin; 125 | fname = "stdin"; 126 | needclose ^= 1; 127 | } 128 | i += 1; 129 | 130 | /* Open a file for writing. */ 131 | if(strcmp(argv[i], "-") != 0) { 132 | #if HAVE_FOPEN_S 133 | if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) { 134 | #else 135 | if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) { 136 | #endif 137 | fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); 138 | perror(NULL); 139 | exit(EXIT_FAILURE); 140 | } 141 | } else { 142 | #if HAVE__SETMODE && HAVE__FILENO 143 | if(_setmode(_fileno(stdout), _O_BINARY) == -1) { 144 | fprintf(stderr, "%s: Cannot set mode: ", argv[0]); 145 | perror(NULL); 146 | exit(EXIT_FAILURE); 147 | } 148 | #endif 149 | ofp = stdout; 150 | ofname = "stdout"; 151 | needclose ^= 2; 152 | } 153 | 154 | /* Get the file size. */ 155 | if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { 156 | n = LFS_FTELL(fp); 157 | rewind(fp); 158 | if(n < 0) { 159 | fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); 160 | perror(NULL); 161 | exit(EXIT_FAILURE); 162 | } 163 | if(0x20000000L < n) { n = 0x20000000L; } 164 | if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; } 165 | } else if(blocksize == 0) { blocksize = 32 << 20; } 166 | 167 | /* Allocate 5blocksize bytes of memory. */ 168 | T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t)); 169 | SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t)); 170 | if((T == NULL) || (SA == NULL)) { 171 | fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); 172 | exit(EXIT_FAILURE); 173 | } 174 | 175 | /* Write the blocksize. */ 176 | if(write_int(ofp, blocksize) != 4) { 177 | fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); 178 | perror(NULL); 179 | exit(EXIT_FAILURE); 180 | } 181 | 182 | fprintf(stderr, " BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize); 183 | start = clock(); 184 | for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) { 185 | /* Burrows-Wheeler Transform. */ 186 | pidx = divbwt(T, T, SA, m); 187 | if(pidx < 0) { 188 | fprintf(stderr, "%s (bw_transform): %s.\n", 189 | argv[0], 190 | (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory"); 191 | exit(EXIT_FAILURE); 192 | } 193 | 194 | /* Write the bwted data. */ 195 | if((write_int(ofp, pidx) != 4) || 196 | (fwrite(T, sizeof(sauchar_t), m, ofp) != m)) { 197 | fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); 198 | perror(NULL); 199 | exit(EXIT_FAILURE); 200 | } 201 | } 202 | if(ferror(fp)) { 203 | fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname); 204 | perror(NULL); 205 | exit(EXIT_FAILURE); 206 | } 207 | finish = clock(); 208 | fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n", 209 | n, (double)(finish - start) / (double)CLOCKS_PER_SEC); 210 | 211 | /* Close files */ 212 | if(needclose & 1) { fclose(fp); } 213 | if(needclose & 2) { fclose(ofp); } 214 | 215 | /* Deallocate memory. */ 216 | free(SA); 217 | free(T); 218 | 219 | return 0; 220 | } 221 | -------------------------------------------------------------------------------- /lib/utils.c: -------------------------------------------------------------------------------- 1 | /* 2 | * utils.c for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #include "divsufsort_private.h" 28 | 29 | 30 | /*- Private Function -*/ 31 | 32 | /* Binary search for inverse bwt. */ 33 | static 34 | saidx_t 35 | binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) { 36 | saidx_t half, i; 37 | for(i = 0, half = size >> 1; 38 | 0 < size; 39 | size = half, half >>= 1) { 40 | if(A[i + half] < value) { 41 | i += half + 1; 42 | half -= (size & 1) ^ 1; 43 | } 44 | } 45 | return i; 46 | } 47 | 48 | 49 | /*- Functions -*/ 50 | 51 | /* Burrows-Wheeler transform. */ 52 | saint_t 53 | bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA, 54 | saidx_t n, saidx_t *idx) { 55 | saidx_t *A, i, j, p, t; 56 | saint_t c; 57 | 58 | /* Check arguments. */ 59 | if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } 60 | if(n <= 1) { 61 | if(n == 1) { U[0] = T[0]; } 62 | *idx = n; 63 | return 0; 64 | } 65 | 66 | if((A = SA) == NULL) { 67 | i = divbwt(T, U, NULL, n); 68 | if(0 <= i) { *idx = i; i = 0; } 69 | return (saint_t)i; 70 | } 71 | 72 | /* BW transform. */ 73 | if(T == U) { 74 | t = n; 75 | for(i = 0, j = 0; i < n; ++i) { 76 | p = t - 1; 77 | t = A[i]; 78 | if(0 <= p) { 79 | c = T[j]; 80 | U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; 81 | A[j] = c; 82 | j++; 83 | } else { 84 | *idx = i; 85 | } 86 | } 87 | p = t - 1; 88 | if(0 <= p) { 89 | c = T[j]; 90 | U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; 91 | A[j] = c; 92 | } else { 93 | *idx = i; 94 | } 95 | } else { 96 | U[0] = T[n - 1]; 97 | for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } 98 | *idx = i + 1; 99 | for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; } 100 | } 101 | 102 | if(SA == NULL) { 103 | /* Deallocate memory. */ 104 | free(A); 105 | } 106 | 107 | return 0; 108 | } 109 | 110 | /* Inverse Burrows-Wheeler transform. */ 111 | saint_t 112 | inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A, 113 | saidx_t n, saidx_t idx) { 114 | saidx_t C[ALPHABET_SIZE]; 115 | sauchar_t D[ALPHABET_SIZE]; 116 | saidx_t *B; 117 | saidx_t i, p; 118 | saint_t c, d; 119 | 120 | /* Check arguments. */ 121 | if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || 122 | (n < idx) || ((0 < n) && (idx == 0))) { 123 | return -1; 124 | } 125 | if(n <= 1) { return 0; } 126 | 127 | if((B = A) == NULL) { 128 | /* Allocate n*sizeof(saidx_t) bytes of memory. */ 129 | if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } 130 | } 131 | 132 | /* Inverse BW transform. */ 133 | for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } 134 | for(i = 0; i < n; ++i) { ++C[T[i]]; } 135 | for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) { 136 | p = C[c]; 137 | if(0 < p) { 138 | C[c] = i; 139 | D[d++] = (sauchar_t)c; 140 | i += p; 141 | } 142 | } 143 | for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } 144 | for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; } 145 | for(c = 0; c < d; ++c) { C[c] = C[D[c]]; } 146 | for(i = 0, p = idx; i < n; ++i) { 147 | U[i] = D[binarysearch_lower(C, d, p)]; 148 | p = B[p - 1]; 149 | } 150 | 151 | if(A == NULL) { 152 | /* Deallocate memory. */ 153 | free(B); 154 | } 155 | 156 | return 0; 157 | } 158 | 159 | /* Checks the suffix array SA of the string T. */ 160 | saint_t 161 | sufcheck(const sauchar_t *T, const saidx_t *SA, 162 | saidx_t n, saint_t verbose) { 163 | saidx_t C[ALPHABET_SIZE]; 164 | saidx_t i, p, q, t; 165 | saint_t c; 166 | 167 | if(verbose) { fprintf(stderr, "sufcheck: "); } 168 | 169 | /* Check arguments. */ 170 | if((T == NULL) || (SA == NULL) || (n < 0)) { 171 | if(verbose) { fprintf(stderr, "Invalid arguments.\n"); } 172 | return -1; 173 | } 174 | if(n == 0) { 175 | if(verbose) { fprintf(stderr, "Done.\n"); } 176 | return 0; 177 | } 178 | 179 | /* check range: [0..n-1] */ 180 | for(i = 0; i < n; ++i) { 181 | if((SA[i] < 0) || (n <= SA[i])) { 182 | if(verbose) { 183 | fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" 184 | " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", 185 | n - 1, i, SA[i]); 186 | } 187 | return -2; 188 | } 189 | } 190 | 191 | /* check first characters. */ 192 | for(i = 1; i < n; ++i) { 193 | if(T[SA[i - 1]] > T[SA[i]]) { 194 | if(verbose) { 195 | fprintf(stderr, "Suffixes in wrong order.\n" 196 | " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" 197 | " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", 198 | i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); 199 | } 200 | return -3; 201 | } 202 | } 203 | 204 | /* check suffixes. */ 205 | for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } 206 | for(i = 0; i < n; ++i) { ++C[T[i]]; } 207 | for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) { 208 | t = C[i]; 209 | C[i] = p; 210 | p += t; 211 | } 212 | 213 | q = C[T[n - 1]]; 214 | C[T[n - 1]] += 1; 215 | for(i = 0; i < n; ++i) { 216 | p = SA[i]; 217 | if(0 < p) { 218 | c = T[--p]; 219 | t = C[c]; 220 | } else { 221 | c = T[p = n - 1]; 222 | t = q; 223 | } 224 | if((t < 0) || (p != SA[t])) { 225 | if(verbose) { 226 | fprintf(stderr, "Suffix in wrong position.\n" 227 | " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" 228 | " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", 229 | t, (0 <= t) ? SA[t] : -1, i, SA[i]); 230 | } 231 | return -4; 232 | } 233 | if(t != q) { 234 | ++C[c]; 235 | if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } 236 | } 237 | } 238 | 239 | if(1 <= verbose) { fprintf(stderr, "Done.\n"); } 240 | return 0; 241 | } 242 | 243 | 244 | static 245 | int 246 | _compare(const sauchar_t *T, saidx_t Tsize, 247 | const sauchar_t *P, saidx_t Psize, 248 | saidx_t suf, saidx_t *match) { 249 | saidx_t i, j; 250 | saint_t r; 251 | for(i = suf + *match, j = *match, r = 0; 252 | (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } 253 | *match = j; 254 | return (r == 0) ? -(j != Psize) : r; 255 | } 256 | 257 | /* Search for the pattern P in the string T. */ 258 | saidx_t 259 | sa_search(const sauchar_t *T, saidx_t Tsize, 260 | const sauchar_t *P, saidx_t Psize, 261 | const saidx_t *SA, saidx_t SAsize, 262 | saidx_t *idx) { 263 | saidx_t size, lsize, rsize, half; 264 | saidx_t match, lmatch, rmatch; 265 | saidx_t llmatch, lrmatch, rlmatch, rrmatch; 266 | saidx_t i, j, k; 267 | saint_t r; 268 | 269 | if(idx != NULL) { *idx = -1; } 270 | if((T == NULL) || (P == NULL) || (SA == NULL) || 271 | (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } 272 | if((Tsize == 0) || (SAsize == 0)) { return 0; } 273 | if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; } 274 | 275 | for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; 276 | 0 < size; 277 | size = half, half >>= 1) { 278 | match = MIN(lmatch, rmatch); 279 | r = _compare(T, Tsize, P, Psize, SA[i + half], &match); 280 | if(r < 0) { 281 | i += half + 1; 282 | half -= (size & 1) ^ 1; 283 | lmatch = match; 284 | } else if(r > 0) { 285 | rmatch = match; 286 | } else { 287 | lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; 288 | 289 | /* left part */ 290 | for(llmatch = lmatch, lrmatch = match, half = lsize >> 1; 291 | 0 < lsize; 292 | lsize = half, half >>= 1) { 293 | lmatch = MIN(llmatch, lrmatch); 294 | r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); 295 | if(r < 0) { 296 | j += half + 1; 297 | half -= (lsize & 1) ^ 1; 298 | llmatch = lmatch; 299 | } else { 300 | lrmatch = lmatch; 301 | } 302 | } 303 | 304 | /* right part */ 305 | for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1; 306 | 0 < rsize; 307 | rsize = half, half >>= 1) { 308 | rmatch = MIN(rlmatch, rrmatch); 309 | r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); 310 | if(r <= 0) { 311 | k += half + 1; 312 | half -= (rsize & 1) ^ 1; 313 | rlmatch = rmatch; 314 | } else { 315 | rrmatch = rmatch; 316 | } 317 | } 318 | 319 | break; 320 | } 321 | } 322 | 323 | if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } 324 | return k - j; 325 | } 326 | 327 | /* Search for the character c in the string T. */ 328 | saidx_t 329 | sa_simplesearch(const sauchar_t *T, saidx_t Tsize, 330 | const saidx_t *SA, saidx_t SAsize, 331 | saint_t c, saidx_t *idx) { 332 | saidx_t size, lsize, rsize, half; 333 | saidx_t i, j, k, p; 334 | saint_t r; 335 | 336 | if(idx != NULL) { *idx = -1; } 337 | if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } 338 | if((Tsize == 0) || (SAsize == 0)) { return 0; } 339 | 340 | for(i = j = k = 0, size = SAsize, half = size >> 1; 341 | 0 < size; 342 | size = half, half >>= 1) { 343 | p = SA[i + half]; 344 | r = (p < Tsize) ? T[p] - c : -1; 345 | if(r < 0) { 346 | i += half + 1; 347 | half -= (size & 1) ^ 1; 348 | } else if(r == 0) { 349 | lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; 350 | 351 | /* left part */ 352 | for(half = lsize >> 1; 353 | 0 < lsize; 354 | lsize = half, half >>= 1) { 355 | p = SA[j + half]; 356 | r = (p < Tsize) ? T[p] - c : -1; 357 | if(r < 0) { 358 | j += half + 1; 359 | half -= (lsize & 1) ^ 1; 360 | } 361 | } 362 | 363 | /* right part */ 364 | for(half = rsize >> 1; 365 | 0 < rsize; 366 | rsize = half, half >>= 1) { 367 | p = SA[k + half]; 368 | r = (p < Tsize) ? T[p] - c : -1; 369 | if(r <= 0) { 370 | k += half + 1; 371 | half -= (rsize & 1) ^ 1; 372 | } 373 | } 374 | 375 | break; 376 | } 377 | } 378 | 379 | if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } 380 | return k - j; 381 | } 382 | -------------------------------------------------------------------------------- /lib/divsufsort.c: -------------------------------------------------------------------------------- 1 | /* 2 | * divsufsort.c for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #include "divsufsort_private.h" 28 | #ifdef _OPENMP 29 | # include 30 | #endif 31 | 32 | 33 | /*- Private Functions -*/ 34 | 35 | /* Sorts suffixes of type B*. */ 36 | static 37 | saidx_t 38 | sort_typeBstar(const sauchar_t *T, saidx_t *SA, 39 | saidx_t *bucket_A, saidx_t *bucket_B, 40 | saidx_t n) { 41 | saidx_t *PAb, *ISAb, *buf; 42 | #ifdef _OPENMP 43 | saidx_t *curbuf; 44 | saidx_t l; 45 | #endif 46 | saidx_t i, j, k, t, m, bufsize; 47 | saint_t c0, c1; 48 | #ifdef _OPENMP 49 | saint_t d0, d1; 50 | int tmp; 51 | #endif 52 | 53 | /* Initialize bucket arrays. */ 54 | for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } 55 | for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } 56 | 57 | /* Count the number of occurrences of the first one or two characters of each 58 | type A, B and B* suffix. Moreover, store the beginning position of all 59 | type B* suffixes into the array SA. */ 60 | for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { 61 | /* type A suffix. */ 62 | do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); 63 | if(0 <= i) { 64 | /* type B* suffix. */ 65 | ++BUCKET_BSTAR(c0, c1); 66 | SA[--m] = i; 67 | /* type B suffix. */ 68 | for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { 69 | ++BUCKET_B(c0, c1); 70 | } 71 | } 72 | } 73 | m = n - m; 74 | /* 75 | note: 76 | A type B* suffix is lexicographically smaller than a type B suffix that 77 | begins with the same first two characters. 78 | */ 79 | 80 | /* Calculate the index of start/end point of each bucket. */ 81 | for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { 82 | t = i + BUCKET_A(c0); 83 | BUCKET_A(c0) = i + j; /* start point */ 84 | i = t + BUCKET_B(c0, c0); 85 | for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { 86 | j += BUCKET_BSTAR(c0, c1); 87 | BUCKET_BSTAR(c0, c1) = j; /* end point */ 88 | i += BUCKET_B(c0, c1); 89 | } 90 | } 91 | 92 | if(0 < m) { 93 | /* Sort the type B* suffixes by their first two characters. */ 94 | PAb = SA + n - m; ISAb = SA + m; 95 | for(i = m - 2; 0 <= i; --i) { 96 | t = PAb[i], c0 = T[t], c1 = T[t + 1]; 97 | SA[--BUCKET_BSTAR(c0, c1)] = i; 98 | } 99 | t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; 100 | SA[--BUCKET_BSTAR(c0, c1)] = m - 1; 101 | 102 | /* Sort the type B* substrings using sssort. */ 103 | #ifdef _OPENMP 104 | tmp = omp_get_max_threads(); 105 | buf = SA + m, bufsize = (n - (2 * m)) / tmp; 106 | c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; 107 | #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) 108 | { 109 | tmp = omp_get_thread_num(); 110 | curbuf = buf + tmp * bufsize; 111 | k = 0; 112 | for(;;) { 113 | #pragma omp critical(sssort_lock) 114 | { 115 | if(0 < (l = j)) { 116 | d0 = c0, d1 = c1; 117 | do { 118 | k = BUCKET_BSTAR(d0, d1); 119 | if(--d1 <= d0) { 120 | d1 = ALPHABET_SIZE - 1; 121 | if(--d0 < 0) { break; } 122 | } 123 | } while(((l - k) <= 1) && (0 < (l = k))); 124 | c0 = d0, c1 = d1, j = k; 125 | } 126 | } 127 | if(l == 0) { break; } 128 | sssort(T, PAb, SA + k, SA + l, 129 | curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); 130 | } 131 | } 132 | #else 133 | buf = SA + m, bufsize = n - (2 * m); 134 | for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { 135 | for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { 136 | i = BUCKET_BSTAR(c0, c1); 137 | if(1 < (j - i)) { 138 | sssort(T, PAb, SA + i, SA + j, 139 | buf, bufsize, 2, n, *(SA + i) == (m - 1)); 140 | } 141 | } 142 | } 143 | #endif 144 | 145 | /* Compute ranks of type B* substrings. */ 146 | for(i = m - 1; 0 <= i; --i) { 147 | if(0 <= SA[i]) { 148 | j = i; 149 | do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); 150 | SA[i + 1] = i - j; 151 | if(i <= 0) { break; } 152 | } 153 | j = i; 154 | do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); 155 | ISAb[SA[i]] = j; 156 | } 157 | 158 | /* Construct the inverse suffix array of type B* suffixes using trsort. */ 159 | trsort(ISAb, SA, m, 1); 160 | 161 | /* Set the sorted order of tyoe B* suffixes. */ 162 | for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { 163 | for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } 164 | if(0 <= i) { 165 | t = i; 166 | for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } 167 | SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; 168 | } 169 | } 170 | 171 | /* Calculate the index of start/end point of each bucket. */ 172 | BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ 173 | for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { 174 | i = BUCKET_A(c0 + 1) - 1; 175 | for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { 176 | t = i - BUCKET_B(c0, c1); 177 | BUCKET_B(c0, c1) = i; /* end point */ 178 | 179 | /* Move all type B* suffixes to the correct position. */ 180 | for(i = t, j = BUCKET_BSTAR(c0, c1); 181 | j <= k; 182 | --i, --k) { SA[i] = SA[k]; } 183 | } 184 | BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ 185 | BUCKET_B(c0, c0) = i; /* end point */ 186 | } 187 | } 188 | 189 | return m; 190 | } 191 | 192 | /* Constructs the suffix array by using the sorted order of type B* suffixes. */ 193 | static 194 | void 195 | construct_SA(const sauchar_t *T, saidx_t *SA, 196 | saidx_t *bucket_A, saidx_t *bucket_B, 197 | saidx_t n, saidx_t m) { 198 | saidx_t *i, *j, *k; 199 | saidx_t s; 200 | saint_t c0, c1, c2; 201 | 202 | if(0 < m) { 203 | /* Construct the sorted order of type B suffixes by using 204 | the sorted order of type B* suffixes. */ 205 | for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { 206 | /* Scan the suffix array from right to left. */ 207 | for(i = SA + BUCKET_BSTAR(c1, c1 + 1), 208 | j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; 209 | i <= j; 210 | --j) { 211 | if(0 < (s = *j)) { 212 | assert(T[s] == c1); 213 | assert(((s + 1) < n) && (T[s] <= T[s + 1])); 214 | assert(T[s - 1] <= T[s]); 215 | *j = ~s; 216 | c0 = T[--s]; 217 | if((0 < s) && (T[s - 1] > c0)) { s = ~s; } 218 | if(c0 != c2) { 219 | if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } 220 | k = SA + BUCKET_B(c2 = c0, c1); 221 | } 222 | assert(k < j); 223 | *k-- = s; 224 | } else { 225 | assert(((s == 0) && (T[s] == c1)) || (s < 0)); 226 | *j = ~s; 227 | } 228 | } 229 | } 230 | } 231 | 232 | /* Construct the suffix array by using 233 | the sorted order of type B suffixes. */ 234 | k = SA + BUCKET_A(c2 = T[n - 1]); 235 | *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); 236 | /* Scan the suffix array from left to right. */ 237 | for(i = SA, j = SA + n; i < j; ++i) { 238 | if(0 < (s = *i)) { 239 | assert(T[s - 1] >= T[s]); 240 | c0 = T[--s]; 241 | if((s == 0) || (T[s - 1] < c0)) { s = ~s; } 242 | if(c0 != c2) { 243 | BUCKET_A(c2) = k - SA; 244 | k = SA + BUCKET_A(c2 = c0); 245 | } 246 | assert(i < k); 247 | *k++ = s; 248 | } else { 249 | assert(s < 0); 250 | *i = ~s; 251 | } 252 | } 253 | } 254 | 255 | /* Constructs the burrows-wheeler transformed string directly 256 | by using the sorted order of type B* suffixes. */ 257 | static 258 | saidx_t 259 | construct_BWT(const sauchar_t *T, saidx_t *SA, 260 | saidx_t *bucket_A, saidx_t *bucket_B, 261 | saidx_t n, saidx_t m) { 262 | saidx_t *i, *j, *k, *orig; 263 | saidx_t s; 264 | saint_t c0, c1, c2; 265 | 266 | if(0 < m) { 267 | /* Construct the sorted order of type B suffixes by using 268 | the sorted order of type B* suffixes. */ 269 | for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { 270 | /* Scan the suffix array from right to left. */ 271 | for(i = SA + BUCKET_BSTAR(c1, c1 + 1), 272 | j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; 273 | i <= j; 274 | --j) { 275 | if(0 < (s = *j)) { 276 | assert(T[s] == c1); 277 | assert(((s + 1) < n) && (T[s] <= T[s + 1])); 278 | assert(T[s - 1] <= T[s]); 279 | c0 = T[--s]; 280 | *j = ~((saidx_t)c0); 281 | if((0 < s) && (T[s - 1] > c0)) { s = ~s; } 282 | if(c0 != c2) { 283 | if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } 284 | k = SA + BUCKET_B(c2 = c0, c1); 285 | } 286 | assert(k < j); 287 | *k-- = s; 288 | } else if(s != 0) { 289 | *j = ~s; 290 | #ifndef NDEBUG 291 | } else { 292 | assert(T[s] == c1); 293 | #endif 294 | } 295 | } 296 | } 297 | } 298 | 299 | /* Construct the BWTed string by using 300 | the sorted order of type B suffixes. */ 301 | k = SA + BUCKET_A(c2 = T[n - 1]); 302 | *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); 303 | /* Scan the suffix array from left to right. */ 304 | for(i = SA, j = SA + n, orig = SA; i < j; ++i) { 305 | if(0 < (s = *i)) { 306 | assert(T[s - 1] >= T[s]); 307 | c0 = T[--s]; 308 | *i = c0; 309 | if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); } 310 | if(c0 != c2) { 311 | BUCKET_A(c2) = k - SA; 312 | k = SA + BUCKET_A(c2 = c0); 313 | } 314 | assert(i < k); 315 | *k++ = s; 316 | } else if(s != 0) { 317 | *i = ~s; 318 | } else { 319 | orig = i; 320 | } 321 | } 322 | 323 | return orig - SA; 324 | } 325 | 326 | 327 | /*---------------------------------------------------------------------------*/ 328 | 329 | /*- Function -*/ 330 | 331 | saint_t 332 | divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) { 333 | saidx_t *bucket_A, *bucket_B; 334 | saidx_t m; 335 | saint_t err = 0; 336 | 337 | /* Check arguments. */ 338 | if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } 339 | else if(n == 0) { return 0; } 340 | else if(n == 1) { SA[0] = 0; return 0; } 341 | else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } 342 | 343 | bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); 344 | bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); 345 | 346 | /* Suffixsort. */ 347 | if((bucket_A != NULL) && (bucket_B != NULL)) { 348 | m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); 349 | construct_SA(T, SA, bucket_A, bucket_B, n, m); 350 | } else { 351 | err = -2; 352 | } 353 | 354 | free(bucket_B); 355 | free(bucket_A); 356 | 357 | return err; 358 | } 359 | 360 | saidx_t 361 | divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) { 362 | saidx_t *B; 363 | saidx_t *bucket_A, *bucket_B; 364 | saidx_t m, pidx, i; 365 | 366 | /* Check arguments. */ 367 | if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } 368 | else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } 369 | 370 | if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); } 371 | bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); 372 | bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); 373 | 374 | /* Burrows-Wheeler Transform. */ 375 | if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { 376 | m = sort_typeBstar(T, B, bucket_A, bucket_B, n); 377 | pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); 378 | 379 | /* Copy to output string. */ 380 | U[0] = T[n - 1]; 381 | for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; } 382 | for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; } 383 | pidx += 1; 384 | } else { 385 | pidx = -2; 386 | } 387 | 388 | free(bucket_B); 389 | free(bucket_A); 390 | if(A == NULL) { free(B); } 391 | 392 | return pidx; 393 | } 394 | 395 | const char * 396 | divsufsort_version(void) { 397 | return PROJECT_VERSION_FULL; 398 | } 399 | -------------------------------------------------------------------------------- /lib/trsort.c: -------------------------------------------------------------------------------- 1 | /* 2 | * trsort.c for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #include "divsufsort_private.h" 28 | 29 | 30 | /*- Private Functions -*/ 31 | 32 | static const saint_t lg_table[256]= { 33 | -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 34 | 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 35 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 36 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 37 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 38 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 39 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 40 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 41 | }; 42 | 43 | static INLINE 44 | saint_t 45 | tr_ilg(saidx_t n) { 46 | #if defined(BUILD_DIVSUFSORT64) 47 | return (n >> 32) ? 48 | ((n >> 48) ? 49 | ((n >> 56) ? 50 | 56 + lg_table[(n >> 56) & 0xff] : 51 | 48 + lg_table[(n >> 48) & 0xff]) : 52 | ((n >> 40) ? 53 | 40 + lg_table[(n >> 40) & 0xff] : 54 | 32 + lg_table[(n >> 32) & 0xff])) : 55 | ((n & 0xffff0000) ? 56 | ((n & 0xff000000) ? 57 | 24 + lg_table[(n >> 24) & 0xff] : 58 | 16 + lg_table[(n >> 16) & 0xff]) : 59 | ((n & 0x0000ff00) ? 60 | 8 + lg_table[(n >> 8) & 0xff] : 61 | 0 + lg_table[(n >> 0) & 0xff])); 62 | #else 63 | return (n & 0xffff0000) ? 64 | ((n & 0xff000000) ? 65 | 24 + lg_table[(n >> 24) & 0xff] : 66 | 16 + lg_table[(n >> 16) & 0xff]) : 67 | ((n & 0x0000ff00) ? 68 | 8 + lg_table[(n >> 8) & 0xff] : 69 | 0 + lg_table[(n >> 0) & 0xff]); 70 | #endif 71 | } 72 | 73 | 74 | /*---------------------------------------------------------------------------*/ 75 | 76 | /* Simple insertionsort for small size groups. */ 77 | static 78 | void 79 | tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { 80 | saidx_t *a, *b; 81 | saidx_t t, r; 82 | 83 | for(a = first + 1; a < last; ++a) { 84 | for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { 85 | do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); 86 | if(b < first) { break; } 87 | } 88 | if(r == 0) { *b = ~*b; } 89 | *(b + 1) = t; 90 | } 91 | } 92 | 93 | 94 | /*---------------------------------------------------------------------------*/ 95 | 96 | static INLINE 97 | void 98 | tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) { 99 | saidx_t j, k; 100 | saidx_t v; 101 | saidx_t c, d, e; 102 | 103 | for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { 104 | d = ISAd[SA[k = j++]]; 105 | if(d < (e = ISAd[SA[j]])) { k = j; d = e; } 106 | if(d <= c) { break; } 107 | } 108 | SA[i] = v; 109 | } 110 | 111 | /* Simple top-down heapsort. */ 112 | static 113 | void 114 | tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) { 115 | saidx_t i, m; 116 | saidx_t t; 117 | 118 | m = size; 119 | if((size % 2) == 0) { 120 | m--; 121 | if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } 122 | } 123 | 124 | for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } 125 | if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } 126 | for(i = m - 1; 0 < i; --i) { 127 | t = SA[0], SA[0] = SA[i]; 128 | tr_fixdown(ISAd, SA, 0, i); 129 | SA[i] = t; 130 | } 131 | } 132 | 133 | 134 | /*---------------------------------------------------------------------------*/ 135 | 136 | /* Returns the median of three elements. */ 137 | static INLINE 138 | saidx_t * 139 | tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) { 140 | saidx_t *t; 141 | if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } 142 | if(ISAd[*v2] > ISAd[*v3]) { 143 | if(ISAd[*v1] > ISAd[*v3]) { return v1; } 144 | else { return v3; } 145 | } 146 | return v2; 147 | } 148 | 149 | /* Returns the median of five elements. */ 150 | static INLINE 151 | saidx_t * 152 | tr_median5(const saidx_t *ISAd, 153 | saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { 154 | saidx_t *t; 155 | if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } 156 | if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } 157 | if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } 158 | if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } 159 | if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } 160 | if(ISAd[*v3] > ISAd[*v4]) { return v4; } 161 | return v3; 162 | } 163 | 164 | /* Returns the pivot element. */ 165 | static INLINE 166 | saidx_t * 167 | tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) { 168 | saidx_t *middle; 169 | saidx_t t; 170 | 171 | t = last - first; 172 | middle = first + t / 2; 173 | 174 | if(t <= 512) { 175 | if(t <= 32) { 176 | return tr_median3(ISAd, first, middle, last - 1); 177 | } else { 178 | t >>= 2; 179 | return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); 180 | } 181 | } 182 | t >>= 3; 183 | first = tr_median3(ISAd, first, first + t, first + (t << 1)); 184 | middle = tr_median3(ISAd, middle - t, middle, middle + t); 185 | last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); 186 | return tr_median3(ISAd, first, middle, last); 187 | } 188 | 189 | 190 | /*---------------------------------------------------------------------------*/ 191 | 192 | typedef struct _trbudget_t trbudget_t; 193 | struct _trbudget_t { 194 | saidx_t chance; 195 | saidx_t remain; 196 | saidx_t incval; 197 | saidx_t count; 198 | }; 199 | 200 | static INLINE 201 | void 202 | trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) { 203 | budget->chance = chance; 204 | budget->remain = budget->incval = incval; 205 | } 206 | 207 | static INLINE 208 | saint_t 209 | trbudget_check(trbudget_t *budget, saidx_t size) { 210 | if(size <= budget->remain) { budget->remain -= size; return 1; } 211 | if(budget->chance == 0) { budget->count += size; return 0; } 212 | budget->remain += budget->incval - size; 213 | budget->chance -= 1; 214 | return 1; 215 | } 216 | 217 | 218 | /*---------------------------------------------------------------------------*/ 219 | 220 | static INLINE 221 | void 222 | tr_partition(const saidx_t *ISAd, 223 | saidx_t *first, saidx_t *middle, saidx_t *last, 224 | saidx_t **pa, saidx_t **pb, saidx_t v) { 225 | saidx_t *a, *b, *c, *d, *e, *f; 226 | saidx_t t, s; 227 | saidx_t x = 0; 228 | 229 | for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } 230 | if(((a = b) < last) && (x < v)) { 231 | for(; (++b < last) && ((x = ISAd[*b]) <= v);) { 232 | if(x == v) { SWAP(*b, *a); ++a; } 233 | } 234 | } 235 | for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } 236 | if((b < (d = c)) && (x > v)) { 237 | for(; (b < --c) && ((x = ISAd[*c]) >= v);) { 238 | if(x == v) { SWAP(*c, *d); --d; } 239 | } 240 | } 241 | for(; b < c;) { 242 | SWAP(*b, *c); 243 | for(; (++b < c) && ((x = ISAd[*b]) <= v);) { 244 | if(x == v) { SWAP(*b, *a); ++a; } 245 | } 246 | for(; (b < --c) && ((x = ISAd[*c]) >= v);) { 247 | if(x == v) { SWAP(*c, *d); --d; } 248 | } 249 | } 250 | 251 | if(a <= d) { 252 | c = b - 1; 253 | if((s = a - first) > (t = b - a)) { s = t; } 254 | for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 255 | if((s = d - c) > (t = last - d - 1)) { s = t; } 256 | for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 257 | first += (b - a), last -= (d - c); 258 | } 259 | *pa = first, *pb = last; 260 | } 261 | 262 | static 263 | void 264 | tr_copy(saidx_t *ISA, const saidx_t *SA, 265 | saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, 266 | saidx_t depth) { 267 | /* sort suffixes of middle partition 268 | by using sorted order of suffixes of left and right partition. */ 269 | saidx_t *c, *d, *e; 270 | saidx_t s, v; 271 | 272 | v = b - SA - 1; 273 | for(c = first, d = a - 1; c <= d; ++c) { 274 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 275 | *++d = s; 276 | ISA[s] = d - SA; 277 | } 278 | } 279 | for(c = last - 1, e = d + 1, d = b; e < d; --c) { 280 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 281 | *--d = s; 282 | ISA[s] = d - SA; 283 | } 284 | } 285 | } 286 | 287 | static 288 | void 289 | tr_partialcopy(saidx_t *ISA, const saidx_t *SA, 290 | saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last, 291 | saidx_t depth) { 292 | saidx_t *c, *d, *e; 293 | saidx_t s, v; 294 | saidx_t rank, lastrank, newrank = -1; 295 | 296 | v = b - SA - 1; 297 | lastrank = -1; 298 | for(c = first, d = a - 1; c <= d; ++c) { 299 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 300 | *++d = s; 301 | rank = ISA[s + depth]; 302 | if(lastrank != rank) { lastrank = rank; newrank = d - SA; } 303 | ISA[s] = newrank; 304 | } 305 | } 306 | 307 | lastrank = -1; 308 | for(e = d; first <= e; --e) { 309 | rank = ISA[*e]; 310 | if(lastrank != rank) { lastrank = rank; newrank = e - SA; } 311 | if(newrank != rank) { ISA[*e] = newrank; } 312 | } 313 | 314 | lastrank = -1; 315 | for(c = last - 1, e = d + 1, d = b; e < d; --c) { 316 | if((0 <= (s = *c - depth)) && (ISA[s] == v)) { 317 | *--d = s; 318 | rank = ISA[s + depth]; 319 | if(lastrank != rank) { lastrank = rank; newrank = d - SA; } 320 | ISA[s] = newrank; 321 | } 322 | } 323 | } 324 | 325 | static 326 | void 327 | tr_introsort(saidx_t *ISA, const saidx_t *ISAd, 328 | saidx_t *SA, saidx_t *first, saidx_t *last, 329 | trbudget_t *budget) { 330 | #define STACK_SIZE TR_STACKSIZE 331 | struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE]; 332 | saidx_t *a, *b, *c; 333 | saidx_t t; 334 | saidx_t v, x = 0; 335 | saidx_t incr = ISAd - ISA; 336 | saint_t limit, next; 337 | saint_t ssize, trlink = -1; 338 | 339 | for(ssize = 0, limit = tr_ilg(last - first);;) { 340 | 341 | if(limit < 0) { 342 | if(limit == -1) { 343 | /* tandem repeat partition */ 344 | tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); 345 | 346 | /* update ranks */ 347 | if(a < last) { 348 | for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } 349 | } 350 | if(b < last) { 351 | for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } 352 | } 353 | 354 | /* push */ 355 | if(1 < (b - a)) { 356 | STACK_PUSH5(NULL, a, b, 0, 0); 357 | STACK_PUSH5(ISAd - incr, first, last, -2, trlink); 358 | trlink = ssize - 2; 359 | } 360 | if((a - first) <= (last - b)) { 361 | if(1 < (a - first)) { 362 | STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); 363 | last = a, limit = tr_ilg(a - first); 364 | } else if(1 < (last - b)) { 365 | first = b, limit = tr_ilg(last - b); 366 | } else { 367 | STACK_POP5(ISAd, first, last, limit, trlink); 368 | } 369 | } else { 370 | if(1 < (last - b)) { 371 | STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); 372 | first = b, limit = tr_ilg(last - b); 373 | } else if(1 < (a - first)) { 374 | last = a, limit = tr_ilg(a - first); 375 | } else { 376 | STACK_POP5(ISAd, first, last, limit, trlink); 377 | } 378 | } 379 | } else if(limit == -2) { 380 | /* tandem repeat copy */ 381 | a = stack[--ssize].b, b = stack[ssize].c; 382 | if(stack[ssize].d == 0) { 383 | tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); 384 | } else { 385 | if(0 <= trlink) { stack[trlink].d = -1; } 386 | tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); 387 | } 388 | STACK_POP5(ISAd, first, last, limit, trlink); 389 | } else { 390 | /* sorted partition */ 391 | if(0 <= *first) { 392 | a = first; 393 | do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); 394 | first = a; 395 | } 396 | if(first < last) { 397 | a = first; do { *a = ~*a; } while(*++a < 0); 398 | next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; 399 | if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } 400 | 401 | /* push */ 402 | if(trbudget_check(budget, a - first)) { 403 | if((a - first) <= (last - a)) { 404 | STACK_PUSH5(ISAd, a, last, -3, trlink); 405 | ISAd += incr, last = a, limit = next; 406 | } else { 407 | if(1 < (last - a)) { 408 | STACK_PUSH5(ISAd + incr, first, a, next, trlink); 409 | first = a, limit = -3; 410 | } else { 411 | ISAd += incr, last = a, limit = next; 412 | } 413 | } 414 | } else { 415 | if(0 <= trlink) { stack[trlink].d = -1; } 416 | if(1 < (last - a)) { 417 | first = a, limit = -3; 418 | } else { 419 | STACK_POP5(ISAd, first, last, limit, trlink); 420 | } 421 | } 422 | } else { 423 | STACK_POP5(ISAd, first, last, limit, trlink); 424 | } 425 | } 426 | continue; 427 | } 428 | 429 | if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { 430 | tr_insertionsort(ISAd, first, last); 431 | limit = -3; 432 | continue; 433 | } 434 | 435 | if(limit-- == 0) { 436 | tr_heapsort(ISAd, first, last - first); 437 | for(a = last - 1; first < a; a = b) { 438 | for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } 439 | } 440 | limit = -3; 441 | continue; 442 | } 443 | 444 | /* choose pivot */ 445 | a = tr_pivot(ISAd, first, last); 446 | SWAP(*first, *a); 447 | v = ISAd[*first]; 448 | 449 | /* partition */ 450 | tr_partition(ISAd, first, first + 1, last, &a, &b, v); 451 | if((last - first) != (b - a)) { 452 | next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; 453 | 454 | /* update ranks */ 455 | for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } 456 | if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } 457 | 458 | /* push */ 459 | if((1 < (b - a)) && (trbudget_check(budget, b - a))) { 460 | if((a - first) <= (last - b)) { 461 | if((last - b) <= (b - a)) { 462 | if(1 < (a - first)) { 463 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 464 | STACK_PUSH5(ISAd, b, last, limit, trlink); 465 | last = a; 466 | } else if(1 < (last - b)) { 467 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 468 | first = b; 469 | } else { 470 | ISAd += incr, first = a, last = b, limit = next; 471 | } 472 | } else if((a - first) <= (b - a)) { 473 | if(1 < (a - first)) { 474 | STACK_PUSH5(ISAd, b, last, limit, trlink); 475 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 476 | last = a; 477 | } else { 478 | STACK_PUSH5(ISAd, b, last, limit, trlink); 479 | ISAd += incr, first = a, last = b, limit = next; 480 | } 481 | } else { 482 | STACK_PUSH5(ISAd, b, last, limit, trlink); 483 | STACK_PUSH5(ISAd, first, a, limit, trlink); 484 | ISAd += incr, first = a, last = b, limit = next; 485 | } 486 | } else { 487 | if((a - first) <= (b - a)) { 488 | if(1 < (last - b)) { 489 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 490 | STACK_PUSH5(ISAd, first, a, limit, trlink); 491 | first = b; 492 | } else if(1 < (a - first)) { 493 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 494 | last = a; 495 | } else { 496 | ISAd += incr, first = a, last = b, limit = next; 497 | } 498 | } else if((last - b) <= (b - a)) { 499 | if(1 < (last - b)) { 500 | STACK_PUSH5(ISAd, first, a, limit, trlink); 501 | STACK_PUSH5(ISAd + incr, a, b, next, trlink); 502 | first = b; 503 | } else { 504 | STACK_PUSH5(ISAd, first, a, limit, trlink); 505 | ISAd += incr, first = a, last = b, limit = next; 506 | } 507 | } else { 508 | STACK_PUSH5(ISAd, first, a, limit, trlink); 509 | STACK_PUSH5(ISAd, b, last, limit, trlink); 510 | ISAd += incr, first = a, last = b, limit = next; 511 | } 512 | } 513 | } else { 514 | if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } 515 | if((a - first) <= (last - b)) { 516 | if(1 < (a - first)) { 517 | STACK_PUSH5(ISAd, b, last, limit, trlink); 518 | last = a; 519 | } else if(1 < (last - b)) { 520 | first = b; 521 | } else { 522 | STACK_POP5(ISAd, first, last, limit, trlink); 523 | } 524 | } else { 525 | if(1 < (last - b)) { 526 | STACK_PUSH5(ISAd, first, a, limit, trlink); 527 | first = b; 528 | } else if(1 < (a - first)) { 529 | last = a; 530 | } else { 531 | STACK_POP5(ISAd, first, last, limit, trlink); 532 | } 533 | } 534 | } 535 | } else { 536 | if(trbudget_check(budget, last - first)) { 537 | limit = tr_ilg(last - first), ISAd += incr; 538 | } else { 539 | if(0 <= trlink) { stack[trlink].d = -1; } 540 | STACK_POP5(ISAd, first, last, limit, trlink); 541 | } 542 | } 543 | } 544 | #undef STACK_SIZE 545 | } 546 | 547 | 548 | 549 | /*---------------------------------------------------------------------------*/ 550 | 551 | /*- Function -*/ 552 | 553 | /* Tandem repeat sort */ 554 | void 555 | trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) { 556 | saidx_t *ISAd; 557 | saidx_t *first, *last; 558 | trbudget_t budget; 559 | saidx_t t, skip, unsorted; 560 | 561 | trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); 562 | /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ 563 | for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { 564 | first = SA; 565 | skip = 0; 566 | unsorted = 0; 567 | do { 568 | if((t = *first) < 0) { first -= t; skip += t; } 569 | else { 570 | if(skip != 0) { *(first + skip) = skip; skip = 0; } 571 | last = SA + ISA[t] + 1; 572 | if(1 < (last - first)) { 573 | budget.count = 0; 574 | tr_introsort(ISA, ISAd, SA, first, last, &budget); 575 | if(budget.count != 0) { unsorted += budget.count; } 576 | else { skip = first - last; } 577 | } else if((last - first) == 1) { 578 | skip = -1; 579 | } 580 | first = last; 581 | } 582 | } while(first < (SA + n)); 583 | if(skip != 0) { *(first + skip) = skip; } 584 | if(unsorted == 0) { break; } 585 | } 586 | } 587 | -------------------------------------------------------------------------------- /lib/sssort.c: -------------------------------------------------------------------------------- 1 | /* 2 | * sssort.c for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #include "divsufsort_private.h" 28 | 29 | 30 | /*- Private Functions -*/ 31 | 32 | static const saint_t lg_table[256]= { 33 | -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 34 | 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 35 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 36 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 37 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 38 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 39 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 40 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 41 | }; 42 | 43 | #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 44 | 45 | static INLINE 46 | saint_t 47 | ss_ilg(saidx_t n) { 48 | #if SS_BLOCKSIZE == 0 49 | # if defined(BUILD_DIVSUFSORT64) 50 | return (n >> 32) ? 51 | ((n >> 48) ? 52 | ((n >> 56) ? 53 | 56 + lg_table[(n >> 56) & 0xff] : 54 | 48 + lg_table[(n >> 48) & 0xff]) : 55 | ((n >> 40) ? 56 | 40 + lg_table[(n >> 40) & 0xff] : 57 | 32 + lg_table[(n >> 32) & 0xff])) : 58 | ((n & 0xffff0000) ? 59 | ((n & 0xff000000) ? 60 | 24 + lg_table[(n >> 24) & 0xff] : 61 | 16 + lg_table[(n >> 16) & 0xff]) : 62 | ((n & 0x0000ff00) ? 63 | 8 + lg_table[(n >> 8) & 0xff] : 64 | 0 + lg_table[(n >> 0) & 0xff])); 65 | # else 66 | return (n & 0xffff0000) ? 67 | ((n & 0xff000000) ? 68 | 24 + lg_table[(n >> 24) & 0xff] : 69 | 16 + lg_table[(n >> 16) & 0xff]) : 70 | ((n & 0x0000ff00) ? 71 | 8 + lg_table[(n >> 8) & 0xff] : 72 | 0 + lg_table[(n >> 0) & 0xff]); 73 | # endif 74 | #elif SS_BLOCKSIZE < 256 75 | return lg_table[n]; 76 | #else 77 | return (n & 0xff00) ? 78 | 8 + lg_table[(n >> 8) & 0xff] : 79 | 0 + lg_table[(n >> 0) & 0xff]; 80 | #endif 81 | } 82 | 83 | #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 84 | 85 | #if SS_BLOCKSIZE != 0 86 | 87 | static const saint_t sqq_table[256] = { 88 | 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 89 | 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90 | 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, 91 | 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 92 | 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 93 | 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 94 | 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, 95 | 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 96 | 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, 97 | 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, 98 | 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, 99 | 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 100 | 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 101 | 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, 102 | 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 103 | 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 104 | }; 105 | 106 | static INLINE 107 | saidx_t 108 | ss_isqrt(saidx_t x) { 109 | saidx_t y, e; 110 | 111 | if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } 112 | e = (x & 0xffff0000) ? 113 | ((x & 0xff000000) ? 114 | 24 + lg_table[(x >> 24) & 0xff] : 115 | 16 + lg_table[(x >> 16) & 0xff]) : 116 | ((x & 0x0000ff00) ? 117 | 8 + lg_table[(x >> 8) & 0xff] : 118 | 0 + lg_table[(x >> 0) & 0xff]); 119 | 120 | if(e >= 16) { 121 | y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); 122 | if(e >= 24) { y = (y + 1 + x / y) >> 1; } 123 | y = (y + 1 + x / y) >> 1; 124 | } else if(e >= 8) { 125 | y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; 126 | } else { 127 | return sqq_table[x] >> 4; 128 | } 129 | 130 | return (x < (y * y)) ? y - 1 : y; 131 | } 132 | 133 | #endif /* SS_BLOCKSIZE != 0 */ 134 | 135 | 136 | /*---------------------------------------------------------------------------*/ 137 | 138 | /* Compares two suffixes. */ 139 | static INLINE 140 | saint_t 141 | ss_compare(const sauchar_t *T, 142 | const saidx_t *p1, const saidx_t *p2, 143 | saidx_t depth) { 144 | const sauchar_t *U1, *U2, *U1n, *U2n; 145 | 146 | for(U1 = T + depth + *p1, 147 | U2 = T + depth + *p2, 148 | U1n = T + *(p1 + 1) + 2, 149 | U2n = T + *(p2 + 1) + 2; 150 | (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); 151 | ++U1, ++U2) { 152 | } 153 | 154 | return U1 < U1n ? 155 | (U2 < U2n ? *U1 - *U2 : 1) : 156 | (U2 < U2n ? -1 : 0); 157 | } 158 | 159 | 160 | /*---------------------------------------------------------------------------*/ 161 | 162 | #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) 163 | 164 | /* Insertionsort for small size groups */ 165 | static 166 | void 167 | ss_insertionsort(const sauchar_t *T, const saidx_t *PA, 168 | saidx_t *first, saidx_t *last, saidx_t depth) { 169 | saidx_t *i, *j; 170 | saidx_t t; 171 | saint_t r; 172 | 173 | for(i = last - 2; first <= i; --i) { 174 | for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { 175 | do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); 176 | if(last <= j) { break; } 177 | } 178 | if(r == 0) { *j = ~*j; } 179 | *(j - 1) = t; 180 | } 181 | } 182 | 183 | #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ 184 | 185 | 186 | /*---------------------------------------------------------------------------*/ 187 | 188 | #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) 189 | 190 | static INLINE 191 | void 192 | ss_fixdown(const sauchar_t *Td, const saidx_t *PA, 193 | saidx_t *SA, saidx_t i, saidx_t size) { 194 | saidx_t j, k; 195 | saidx_t v; 196 | saint_t c, d, e; 197 | 198 | for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { 199 | d = Td[PA[SA[k = j++]]]; 200 | if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } 201 | if(d <= c) { break; } 202 | } 203 | SA[i] = v; 204 | } 205 | 206 | /* Simple top-down heapsort. */ 207 | static 208 | void 209 | ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) { 210 | saidx_t i, m; 211 | saidx_t t; 212 | 213 | m = size; 214 | if((size % 2) == 0) { 215 | m--; 216 | if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } 217 | } 218 | 219 | for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } 220 | if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } 221 | for(i = m - 1; 0 < i; --i) { 222 | t = SA[0], SA[0] = SA[i]; 223 | ss_fixdown(Td, PA, SA, 0, i); 224 | SA[i] = t; 225 | } 226 | } 227 | 228 | 229 | /*---------------------------------------------------------------------------*/ 230 | 231 | /* Returns the median of three elements. */ 232 | static INLINE 233 | saidx_t * 234 | ss_median3(const sauchar_t *Td, const saidx_t *PA, 235 | saidx_t *v1, saidx_t *v2, saidx_t *v3) { 236 | saidx_t *t; 237 | if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } 238 | if(Td[PA[*v2]] > Td[PA[*v3]]) { 239 | if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } 240 | else { return v3; } 241 | } 242 | return v2; 243 | } 244 | 245 | /* Returns the median of five elements. */ 246 | static INLINE 247 | saidx_t * 248 | ss_median5(const sauchar_t *Td, const saidx_t *PA, 249 | saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) { 250 | saidx_t *t; 251 | if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } 252 | if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } 253 | if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } 254 | if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } 255 | if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } 256 | if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } 257 | return v3; 258 | } 259 | 260 | /* Returns the pivot element. */ 261 | static INLINE 262 | saidx_t * 263 | ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) { 264 | saidx_t *middle; 265 | saidx_t t; 266 | 267 | t = last - first; 268 | middle = first + t / 2; 269 | 270 | if(t <= 512) { 271 | if(t <= 32) { 272 | return ss_median3(Td, PA, first, middle, last - 1); 273 | } else { 274 | t >>= 2; 275 | return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); 276 | } 277 | } 278 | t >>= 3; 279 | first = ss_median3(Td, PA, first, first + t, first + (t << 1)); 280 | middle = ss_median3(Td, PA, middle - t, middle, middle + t); 281 | last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); 282 | return ss_median3(Td, PA, first, middle, last); 283 | } 284 | 285 | 286 | /*---------------------------------------------------------------------------*/ 287 | 288 | /* Binary partition for substrings. */ 289 | static INLINE 290 | saidx_t * 291 | ss_partition(const saidx_t *PA, 292 | saidx_t *first, saidx_t *last, saidx_t depth) { 293 | saidx_t *a, *b; 294 | saidx_t t; 295 | for(a = first - 1, b = last;;) { 296 | for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } 297 | for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } 298 | if(b <= a) { break; } 299 | t = ~*b; 300 | *b = *a; 301 | *a = t; 302 | } 303 | if(first < a) { *first = ~*first; } 304 | return a; 305 | } 306 | 307 | /* Multikey introsort for medium size groups. */ 308 | static 309 | void 310 | ss_mintrosort(const sauchar_t *T, const saidx_t *PA, 311 | saidx_t *first, saidx_t *last, 312 | saidx_t depth) { 313 | #define STACK_SIZE SS_MISORT_STACKSIZE 314 | struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE]; 315 | const sauchar_t *Td; 316 | saidx_t *a, *b, *c, *d, *e, *f; 317 | saidx_t s, t; 318 | saint_t ssize; 319 | saint_t limit; 320 | saint_t v, x = 0; 321 | 322 | for(ssize = 0, limit = ss_ilg(last - first);;) { 323 | 324 | if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { 325 | #if 1 < SS_INSERTIONSORT_THRESHOLD 326 | if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } 327 | #endif 328 | STACK_POP(first, last, depth, limit); 329 | continue; 330 | } 331 | 332 | Td = T + depth; 333 | if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } 334 | if(limit < 0) { 335 | for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { 336 | if((x = Td[PA[*a]]) != v) { 337 | if(1 < (a - first)) { break; } 338 | v = x; 339 | first = a; 340 | } 341 | } 342 | if(Td[PA[*first] - 1] < v) { 343 | first = ss_partition(PA, first, a, depth); 344 | } 345 | if((a - first) <= (last - a)) { 346 | if(1 < (a - first)) { 347 | STACK_PUSH(a, last, depth, -1); 348 | last = a, depth += 1, limit = ss_ilg(a - first); 349 | } else { 350 | first = a, limit = -1; 351 | } 352 | } else { 353 | if(1 < (last - a)) { 354 | STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); 355 | first = a, limit = -1; 356 | } else { 357 | last = a, depth += 1, limit = ss_ilg(a - first); 358 | } 359 | } 360 | continue; 361 | } 362 | 363 | /* choose pivot */ 364 | a = ss_pivot(Td, PA, first, last); 365 | v = Td[PA[*a]]; 366 | SWAP(*first, *a); 367 | 368 | /* partition */ 369 | for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } 370 | if(((a = b) < last) && (x < v)) { 371 | for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { 372 | if(x == v) { SWAP(*b, *a); ++a; } 373 | } 374 | } 375 | for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } 376 | if((b < (d = c)) && (x > v)) { 377 | for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 378 | if(x == v) { SWAP(*c, *d); --d; } 379 | } 380 | } 381 | for(; b < c;) { 382 | SWAP(*b, *c); 383 | for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { 384 | if(x == v) { SWAP(*b, *a); ++a; } 385 | } 386 | for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { 387 | if(x == v) { SWAP(*c, *d); --d; } 388 | } 389 | } 390 | 391 | if(a <= d) { 392 | c = b - 1; 393 | 394 | if((s = a - first) > (t = b - a)) { s = t; } 395 | for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 396 | if((s = d - c) > (t = last - d - 1)) { s = t; } 397 | for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } 398 | 399 | a = first + (b - a), c = last - (d - c); 400 | b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); 401 | 402 | if((a - first) <= (last - c)) { 403 | if((last - c) <= (c - b)) { 404 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 405 | STACK_PUSH(c, last, depth, limit); 406 | last = a; 407 | } else if((a - first) <= (c - b)) { 408 | STACK_PUSH(c, last, depth, limit); 409 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 410 | last = a; 411 | } else { 412 | STACK_PUSH(c, last, depth, limit); 413 | STACK_PUSH(first, a, depth, limit); 414 | first = b, last = c, depth += 1, limit = ss_ilg(c - b); 415 | } 416 | } else { 417 | if((a - first) <= (c - b)) { 418 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 419 | STACK_PUSH(first, a, depth, limit); 420 | first = c; 421 | } else if((last - c) <= (c - b)) { 422 | STACK_PUSH(first, a, depth, limit); 423 | STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); 424 | first = c; 425 | } else { 426 | STACK_PUSH(first, a, depth, limit); 427 | STACK_PUSH(c, last, depth, limit); 428 | first = b, last = c, depth += 1, limit = ss_ilg(c - b); 429 | } 430 | } 431 | } else { 432 | limit += 1; 433 | if(Td[PA[*first] - 1] < v) { 434 | first = ss_partition(PA, first, last, depth); 435 | limit = ss_ilg(last - first); 436 | } 437 | depth += 1; 438 | } 439 | } 440 | #undef STACK_SIZE 441 | } 442 | 443 | #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ 444 | 445 | 446 | /*---------------------------------------------------------------------------*/ 447 | 448 | #if SS_BLOCKSIZE != 0 449 | 450 | static INLINE 451 | void 452 | ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) { 453 | saidx_t t; 454 | for(; 0 < n; --n, ++a, ++b) { 455 | t = *a, *a = *b, *b = t; 456 | } 457 | } 458 | 459 | static INLINE 460 | void 461 | ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) { 462 | saidx_t *a, *b, t; 463 | saidx_t l, r; 464 | l = middle - first, r = last - middle; 465 | for(; (0 < l) && (0 < r);) { 466 | if(l == r) { ss_blockswap(first, middle, l); break; } 467 | if(l < r) { 468 | a = last - 1, b = middle - 1; 469 | t = *a; 470 | do { 471 | *a-- = *b, *b-- = *a; 472 | if(b < first) { 473 | *a = t; 474 | last = a; 475 | if((r -= l + 1) <= l) { break; } 476 | a -= 1, b = middle - 1; 477 | t = *a; 478 | } 479 | } while(1); 480 | } else { 481 | a = first, b = middle; 482 | t = *a; 483 | do { 484 | *a++ = *b, *b++ = *a; 485 | if(last <= b) { 486 | *a = t; 487 | first = a + 1; 488 | if((l -= r + 1) <= r) { break; } 489 | a += 1, b = middle; 490 | t = *a; 491 | } 492 | } while(1); 493 | } 494 | } 495 | } 496 | 497 | 498 | /*---------------------------------------------------------------------------*/ 499 | 500 | static 501 | void 502 | ss_inplacemerge(const sauchar_t *T, const saidx_t *PA, 503 | saidx_t *first, saidx_t *middle, saidx_t *last, 504 | saidx_t depth) { 505 | const saidx_t *p; 506 | saidx_t *a, *b; 507 | saidx_t len, half; 508 | saint_t q, r; 509 | saint_t x; 510 | 511 | for(;;) { 512 | if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } 513 | else { x = 0; p = PA + *(last - 1); } 514 | for(a = first, len = middle - first, half = len >> 1, r = -1; 515 | 0 < len; 516 | len = half, half >>= 1) { 517 | b = a + half; 518 | q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); 519 | if(q < 0) { 520 | a = b + 1; 521 | half -= (len & 1) ^ 1; 522 | } else { 523 | r = q; 524 | } 525 | } 526 | if(a < middle) { 527 | if(r == 0) { *a = ~*a; } 528 | ss_rotate(a, middle, last); 529 | last -= middle - a; 530 | middle = a; 531 | if(first == middle) { break; } 532 | } 533 | --last; 534 | if(x != 0) { while(*--last < 0) { } } 535 | if(middle == last) { break; } 536 | } 537 | } 538 | 539 | 540 | /*---------------------------------------------------------------------------*/ 541 | 542 | /* Merge-forward with internal buffer. */ 543 | static 544 | void 545 | ss_mergeforward(const sauchar_t *T, const saidx_t *PA, 546 | saidx_t *first, saidx_t *middle, saidx_t *last, 547 | saidx_t *buf, saidx_t depth) { 548 | saidx_t *a, *b, *c, *bufend; 549 | saidx_t t; 550 | saint_t r; 551 | 552 | bufend = buf + (middle - first) - 1; 553 | ss_blockswap(buf, first, middle - first); 554 | 555 | for(t = *(a = first), b = buf, c = middle;;) { 556 | r = ss_compare(T, PA + *b, PA + *c, depth); 557 | if(r < 0) { 558 | do { 559 | *a++ = *b; 560 | if(bufend <= b) { *bufend = t; return; } 561 | *b++ = *a; 562 | } while(*b < 0); 563 | } else if(r > 0) { 564 | do { 565 | *a++ = *c, *c++ = *a; 566 | if(last <= c) { 567 | while(b < bufend) { *a++ = *b, *b++ = *a; } 568 | *a = *b, *b = t; 569 | return; 570 | } 571 | } while(*c < 0); 572 | } else { 573 | *c = ~*c; 574 | do { 575 | *a++ = *b; 576 | if(bufend <= b) { *bufend = t; return; } 577 | *b++ = *a; 578 | } while(*b < 0); 579 | 580 | do { 581 | *a++ = *c, *c++ = *a; 582 | if(last <= c) { 583 | while(b < bufend) { *a++ = *b, *b++ = *a; } 584 | *a = *b, *b = t; 585 | return; 586 | } 587 | } while(*c < 0); 588 | } 589 | } 590 | } 591 | 592 | /* Merge-backward with internal buffer. */ 593 | static 594 | void 595 | ss_mergebackward(const sauchar_t *T, const saidx_t *PA, 596 | saidx_t *first, saidx_t *middle, saidx_t *last, 597 | saidx_t *buf, saidx_t depth) { 598 | const saidx_t *p1, *p2; 599 | saidx_t *a, *b, *c, *bufend; 600 | saidx_t t; 601 | saint_t r; 602 | saint_t x; 603 | 604 | bufend = buf + (last - middle) - 1; 605 | ss_blockswap(buf, middle, last - middle); 606 | 607 | x = 0; 608 | if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } 609 | else { p1 = PA + *bufend; } 610 | if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } 611 | else { p2 = PA + *(middle - 1); } 612 | for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { 613 | r = ss_compare(T, p1, p2, depth); 614 | if(0 < r) { 615 | if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 616 | *a-- = *b; 617 | if(b <= buf) { *buf = t; break; } 618 | *b-- = *a; 619 | if(*b < 0) { p1 = PA + ~*b; x |= 1; } 620 | else { p1 = PA + *b; } 621 | } else if(r < 0) { 622 | if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 623 | *a-- = *c, *c-- = *a; 624 | if(c < first) { 625 | while(buf < b) { *a-- = *b, *b-- = *a; } 626 | *a = *b, *b = t; 627 | break; 628 | } 629 | if(*c < 0) { p2 = PA + ~*c; x |= 2; } 630 | else { p2 = PA + *c; } 631 | } else { 632 | if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } 633 | *a-- = ~*b; 634 | if(b <= buf) { *buf = t; break; } 635 | *b-- = *a; 636 | if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } 637 | *a-- = *c, *c-- = *a; 638 | if(c < first) { 639 | while(buf < b) { *a-- = *b, *b-- = *a; } 640 | *a = *b, *b = t; 641 | break; 642 | } 643 | if(*b < 0) { p1 = PA + ~*b; x |= 1; } 644 | else { p1 = PA + *b; } 645 | if(*c < 0) { p2 = PA + ~*c; x |= 2; } 646 | else { p2 = PA + *c; } 647 | } 648 | } 649 | } 650 | 651 | /* D&C based merge. */ 652 | static 653 | void 654 | ss_swapmerge(const sauchar_t *T, const saidx_t *PA, 655 | saidx_t *first, saidx_t *middle, saidx_t *last, 656 | saidx_t *buf, saidx_t bufsize, saidx_t depth) { 657 | #define STACK_SIZE SS_SMERGE_STACKSIZE 658 | #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) 659 | #define MERGE_CHECK(a, b, c)\ 660 | do {\ 661 | if(((c) & 1) ||\ 662 | (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ 663 | *(a) = ~*(a);\ 664 | }\ 665 | if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ 666 | *(b) = ~*(b);\ 667 | }\ 668 | } while(0) 669 | struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE]; 670 | saidx_t *l, *r, *lm, *rm; 671 | saidx_t m, len, half; 672 | saint_t ssize; 673 | saint_t check, next; 674 | 675 | for(check = 0, ssize = 0;;) { 676 | if((last - middle) <= bufsize) { 677 | if((first < middle) && (middle < last)) { 678 | ss_mergebackward(T, PA, first, middle, last, buf, depth); 679 | } 680 | MERGE_CHECK(first, last, check); 681 | STACK_POP(first, middle, last, check); 682 | continue; 683 | } 684 | 685 | if((middle - first) <= bufsize) { 686 | if(first < middle) { 687 | ss_mergeforward(T, PA, first, middle, last, buf, depth); 688 | } 689 | MERGE_CHECK(first, last, check); 690 | STACK_POP(first, middle, last, check); 691 | continue; 692 | } 693 | 694 | for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; 695 | 0 < len; 696 | len = half, half >>= 1) { 697 | if(ss_compare(T, PA + GETIDX(*(middle + m + half)), 698 | PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { 699 | m += half + 1; 700 | half -= (len & 1) ^ 1; 701 | } 702 | } 703 | 704 | if(0 < m) { 705 | lm = middle - m, rm = middle + m; 706 | ss_blockswap(lm, middle, m); 707 | l = r = middle, next = 0; 708 | if(rm < last) { 709 | if(*rm < 0) { 710 | *rm = ~*rm; 711 | if(first < lm) { for(; *--l < 0;) { } next |= 4; } 712 | next |= 1; 713 | } else if(first < lm) { 714 | for(; *r < 0; ++r) { } 715 | next |= 2; 716 | } 717 | } 718 | 719 | if((l - first) <= (last - r)) { 720 | STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); 721 | middle = lm, last = l, check = (check & 3) | (next & 4); 722 | } else { 723 | if((next & 2) && (r == middle)) { next ^= 6; } 724 | STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); 725 | first = r, middle = rm, check = (next & 3) | (check & 4); 726 | } 727 | } else { 728 | if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { 729 | *middle = ~*middle; 730 | } 731 | MERGE_CHECK(first, last, check); 732 | STACK_POP(first, middle, last, check); 733 | } 734 | } 735 | #undef STACK_SIZE 736 | } 737 | 738 | #endif /* SS_BLOCKSIZE != 0 */ 739 | 740 | 741 | /*---------------------------------------------------------------------------*/ 742 | 743 | /*- Function -*/ 744 | 745 | /* Substring sort */ 746 | void 747 | sssort(const sauchar_t *T, const saidx_t *PA, 748 | saidx_t *first, saidx_t *last, 749 | saidx_t *buf, saidx_t bufsize, 750 | saidx_t depth, saidx_t n, saint_t lastsuffix) { 751 | saidx_t *a; 752 | #if SS_BLOCKSIZE != 0 753 | saidx_t *b, *middle, *curbuf; 754 | saidx_t j, k, curbufsize, limit; 755 | #endif 756 | saidx_t i; 757 | 758 | if(lastsuffix != 0) { ++first; } 759 | 760 | #if SS_BLOCKSIZE == 0 761 | ss_mintrosort(T, PA, first, last, depth); 762 | #else 763 | if((bufsize < SS_BLOCKSIZE) && 764 | (bufsize < (last - first)) && 765 | (bufsize < (limit = ss_isqrt(last - first)))) { 766 | if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } 767 | buf = middle = last - limit, bufsize = limit; 768 | } else { 769 | middle = last, limit = 0; 770 | } 771 | for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { 772 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 773 | ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); 774 | #elif 1 < SS_BLOCKSIZE 775 | ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); 776 | #endif 777 | curbufsize = last - (a + SS_BLOCKSIZE); 778 | curbuf = a + SS_BLOCKSIZE; 779 | if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } 780 | for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { 781 | ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); 782 | } 783 | } 784 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 785 | ss_mintrosort(T, PA, a, middle, depth); 786 | #elif 1 < SS_BLOCKSIZE 787 | ss_insertionsort(T, PA, a, middle, depth); 788 | #endif 789 | for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { 790 | if(i & 1) { 791 | ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); 792 | a -= k; 793 | } 794 | } 795 | if(limit != 0) { 796 | #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE 797 | ss_mintrosort(T, PA, middle, last, depth); 798 | #elif 1 < SS_BLOCKSIZE 799 | ss_insertionsort(T, PA, middle, last, depth); 800 | #endif 801 | ss_inplacemerge(T, PA, first, middle, last, depth); 802 | } 803 | #endif 804 | 805 | if(lastsuffix != 0) { 806 | /* Insert last type B* suffix. */ 807 | saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; 808 | for(a = first, i = *(first - 1); 809 | (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); 810 | ++a) { 811 | *(a - 1) = *a; 812 | } 813 | *(a - 1) = i; 814 | } 815 | } 816 | --------------------------------------------------------------------------------