├── .gitignore ├── BPR2 ├── .gitignore ├── AUTHORS ├── COPYING ├── ChangeLog ├── INSTALL ├── Makefile.am ├── Makefile.in ├── Makefile.init ├── NEWS ├── README ├── aclocal.m4 ├── autom4te.cache │ ├── output.0 │ ├── output.1 │ ├── requests │ ├── traces.0 │ └── traces.1 ├── bootstrap.sh ├── config.log ├── config.status ├── config │ ├── compile │ ├── config.guess │ ├── config.sub │ ├── depcomp │ ├── install-sh │ ├── kbs.m4 │ ├── libtool.m4 │ ├── ltmain.sh │ ├── missing │ └── mkinstalldirs ├── configure ├── configure.in ├── doxyBpr.cfg ├── libtool └── src │ ├── .deps │ └── fibstring-fib.main.Po │ ├── Extern │ ├── .deps │ │ └── libextern_la-randomlib.Plo │ ├── Makefile.am │ ├── Makefile.in │ ├── randomlib.c │ └── randomlib.h │ ├── Main │ ├── .deps │ │ ├── bprtime-bprtime.main.Po │ │ ├── fibstring-fib.main.Po │ │ └── periodicstr-periodicStrGen.main.Po │ ├── .kdbgrc.bktref │ ├── .kdbgrc.bprtime │ ├── .kdbgrc.makeclusteredsft │ ├── .kdbgrc.periodicstr │ ├── Makefile.am │ ├── Makefile.in │ ├── bprtime.main.c │ ├── fib.main.c │ └── periodicStrGen.main.c │ ├── Makefile.am │ ├── Makefile.in │ ├── Standard │ ├── .deps │ │ ├── libstandard_la-kbs_Alphabet.Plo │ │ ├── libstandard_la-kbs_Error.Plo │ │ ├── libstandard_la-kbs_Math.Plo │ │ ├── libstandard_la-kbs_String.Plo │ │ └── libstandard_la-kbs_Time.Plo │ ├── Makefile.am │ ├── Makefile.in │ ├── kbs_Alphabet.c │ ├── kbs_Alphabet.h │ ├── kbs_Error.c │ ├── kbs_Error.h │ ├── kbs_Limits.h │ ├── kbs_Math.c │ ├── kbs_Math.h │ ├── kbs_String.c │ ├── kbs_String.h │ ├── kbs_Time.c │ ├── kbs_Time.h │ └── kbs_Types.h │ ├── SuffixArray │ ├── .deps │ │ ├── libsuffixarray_la-kbs_SuffixArray.Plo │ │ ├── libsuffixarray_la-kbs_SuffixArrayAnnotated.Plo │ │ ├── libsuffixarray_la-kbs_SuffixArrayChecker.Plo │ │ └── libsuffixarray_la-kbs_SuffixArrayConstDStepAndPre.Plo │ ├── Makefile.am │ ├── Makefile.in │ ├── kbs_SuffixArray.c │ ├── kbs_SuffixArray.h │ ├── kbs_SuffixArrayAnnotated.c │ ├── kbs_SuffixArrayAnnotated.h │ ├── kbs_SuffixArrayChecker.c │ ├── kbs_SuffixArrayChecker.h │ ├── kbs_SuffixArrayConstDStepAndPre.c │ └── kbs_SuffixArrayConstDStepAndPre.h │ ├── Util │ ├── .deps │ │ ├── libutil_la-kbs_FibonacciString.Plo │ │ ├── libutil_la-kbs_RandomString.Plo │ │ └── libutil_la-kbs_RandomStringFile.Plo │ ├── Makefile.am │ ├── Makefile.in │ ├── kbs_FibonacciString.c │ ├── kbs_FibonacciString.h │ ├── kbs_RandomString.c │ ├── kbs_RandomString.h │ ├── kbs_RandomStringFile.c │ └── kbs_RandomStringFile.h │ ├── config.h │ ├── config.h.in │ └── stamp-h1 ├── DC3 ├── drittel.C ├── tryall.C └── util.h ├── README.md ├── deep-shallow ├── .gitignore ├── COPYRIGHT.GPL.txt ├── COPYRIGHT.MPL.txt ├── Makefile ├── README ├── blind2.c ├── bwt.c ├── bwt_aux.c ├── bwt_aux.h ├── common.h ├── deep2.c ├── ds.c ├── ds_ssort.h ├── globals.c ├── helped.c ├── lcp_aux.c ├── lcp_aux.h ├── shallow.c ├── suftest2.c ├── testlcp.c └── unbwt.c ├── gsa-is ├── .gitignore ├── LICENSE ├── README.md ├── experiments │ ├── Makefile │ ├── README.md │ ├── dataset │ │ ├── input-10000.txt │ │ ├── input-10000.txt.bwt │ │ ├── proteins-10000.fasta │ │ └── reads-10000.fastq │ ├── external │ │ ├── bitvector │ │ │ ├── Makefile │ │ │ ├── lib │ │ │ │ ├── document_array.cpp │ │ │ │ ├── document_array.hpp │ │ │ │ ├── utils.cpp │ │ │ │ ├── utils.hpp │ │ │ │ ├── utils_32.hpp │ │ │ │ └── utils_64.hpp │ │ │ └── main.cpp │ │ ├── malloc_count │ │ │ ├── README.md │ │ │ ├── malloc_count.c │ │ │ ├── malloc_count.h │ │ │ ├── memprofile.h │ │ │ ├── stack_count.c │ │ │ ├── stack_count.h │ │ │ ├── test-malloc_count │ │ │ │ ├── Makefile │ │ │ │ └── test.c │ │ │ └── test-memprofile │ │ │ │ ├── Makefile │ │ │ │ ├── memprofile.gnuplot │ │ │ │ ├── memprofile.pdf │ │ │ │ └── test.cc │ │ ├── sa-is │ │ │ ├── main.cpp │ │ │ ├── makefile │ │ │ ├── readme.txt │ │ │ ├── rel │ │ │ ├── sais │ │ │ └── sais.cpp │ │ └── saca-k │ │ │ ├── input.txt │ │ │ ├── main.cpp │ │ │ ├── makefile │ │ │ ├── readme.txt │ │ │ ├── saca-k │ │ │ └── saca-k.cpp │ ├── lib │ │ ├── document_array.c │ │ ├── document_array.h │ │ ├── file.c │ │ ├── file.h │ │ ├── lcp_array.c │ │ ├── lcp_array.h │ │ ├── suffix_array.c │ │ ├── suffix_array.h │ │ ├── utils.c │ │ └── utils.h │ ├── main.c │ └── scripts │ │ └── downloads.txt ├── gsacak.c ├── gsacak.h ├── gsais.c ├── gsais.h └── test.c ├── gsaca ├── .gitignore ├── LICENSE ├── README.md ├── benchmark │ ├── Makefile │ ├── README │ ├── data │ │ ├── README │ │ ├── largecorpus │ │ │ └── Makefile │ │ ├── pizzachilicorpus │ │ │ └── Makefile │ │ ├── repcorpus │ │ │ └── Makefile │ │ └── silesiacorpus │ │ │ └── Makefile │ ├── dc3 │ │ ├── dc3.C │ │ └── dc3.h │ ├── divsufsort │ │ ├── LICENSE │ │ ├── config.h │ │ ├── divsufsort.c │ │ ├── divsufsort.h │ │ ├── divsufsort_private.h │ │ ├── sssort.c │ │ ├── trsort.c │ │ └── utils.c │ ├── gsaca │ │ ├── gsaca.c │ │ └── gsaca.h │ ├── ka │ │ ├── BooleanString.cpp │ │ ├── BooleanString.h │ │ ├── LinearSuffixSort.cpp │ │ ├── LinearSuffixSort.h │ │ ├── Makefile │ │ ├── orig │ │ │ └── LinearSA.tar │ │ └── suftest.cpp │ ├── programlib │ │ ├── benchmark.cpp │ │ ├── constructsa.cpp │ │ └── corpusbenchmark.sh │ └── sais-lite-2.4.1 │ │ ├── COPYING │ │ ├── Makefile │ │ ├── README │ │ ├── is_orig.c │ │ ├── sais.c │ │ ├── sais.h │ │ ├── sais.hxx │ │ ├── suftest.c │ │ └── test.c ├── gsaca.c └── gsaca.h ├── libdivsufsort ├── .gitignore ├── CHANGELOG.md ├── CMakeLists.txt ├── CMakeModules │ ├── AppendCompilerFlags.cmake │ ├── CheckFunctionKeywords.cmake │ ├── CheckLFS.cmake │ ├── ProjectCPack.cmake │ └── cmake_uninstall.cmake.in ├── LICENSE ├── README.md ├── VERSION.cmake ├── examples │ ├── CMakeLists.txt │ ├── bwt.c │ ├── mksary.c │ ├── sasearch.c │ ├── suftest.c │ └── unbwt.c ├── include │ ├── CMakeLists.txt │ ├── config.h.cmake │ ├── divsufsort.h.cmake │ ├── divsufsort_private.h │ └── lfs.h.cmake ├── lib │ ├── CMakeLists.txt │ ├── divsufsort.c │ ├── sssort.c │ ├── trsort.c │ └── utils.c └── pkgconfig │ ├── CMakeLists.txt │ └── libdivsufsort.pc.cmake ├── msufsort ├── LICENSE ├── SConstruct └── src │ ├── SConscript │ ├── executable │ ├── SConscript │ └── msufsort │ │ ├── SConscript │ │ └── main.cpp │ ├── include │ ├── endian.h │ ├── endian │ │ ├── byte_swap.h │ │ ├── endian.h │ │ ├── endian_swap.h │ │ └── endian_type.h │ └── type_traits │ │ ├── enable_if_integral.h │ │ ├── enable_if_integral_or_enum.h │ │ ├── is_endian.h │ │ ├── opposite_endian.h │ │ └── remove_endian.h │ └── library │ ├── SConscript │ ├── msufsort.h │ └── msufsort │ ├── SConscript │ ├── msufsort.cpp │ └── msufsort.h ├── qsufsort ├── .gitignore ├── Makefile ├── qsufsort.c └── suftest.c ├── sa-ds ├── .gitignore ├── core.cpp ├── is.cpp ├── is.sln ├── is.suo ├── is.vcproj ├── makefile └── readme.txt ├── sa-is ├── .gitignore ├── main.cpp ├── makefile ├── readme.txt └── sais.cpp ├── saca-k ├── .gitignore ├── main.cpp ├── makefile ├── readme.txt └── saca-k.cpp └── sais-lite ├── .gitignore ├── COPYING ├── Makefile ├── README ├── is_orig.c ├── sais.c ├── sais.h ├── sais.hxx ├── suftest.c └── test.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | -------------------------------------------------------------------------------- /BPR2/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | Makefile 3 | *.*~ 4 | *~ 5 | *.lo 6 | *.la 7 | *.libs 8 | src/Main/bprtime 9 | src/Main/fibstring 10 | src/Main/periodicstr 11 | -------------------------------------------------------------------------------- /BPR2/AUTHORS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurpicz/saca-bench/dfbaa2e727d274c9c0265451dae97d7c737aba2b/BPR2/AUTHORS -------------------------------------------------------------------------------- /BPR2/ChangeLog: -------------------------------------------------------------------------------- 1 | Mapping of sequences without using modulo operation. 2 | 3 | 10/15/2006: Suffixes later determined by copy not written to suffix array during bucketing. 4 | 5 | 12/20/2006: New version with update of bucket for pivots in partitioning scheme. 6 | 7 | 11/01/2007: Release bpr2 with new partitioning scheme (see my PhD thesis). 8 | -------------------------------------------------------------------------------- /BPR2/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = src 2 | 3 | -------------------------------------------------------------------------------- /BPR2/Makefile.init: -------------------------------------------------------------------------------- 1 | config_dir = config 2 | acinclude_files = kbs.m4 libtool.m4 3 | 4 | all: acinclude autoconfig 5 | 6 | acinclude: 7 | @echo "Generating acinclude.m4" 8 | cd $(config_dir) ; \ 9 | cat $(acinclude_files) > ../acinclude.m4 10 | 11 | 12 | autoconfig: 13 | libtoolize --force --copy 14 | aclocal 15 | autoheader 16 | automake --add-missing --copy --include-deps --gnu 17 | autoconf 18 | @echo "Package ready for compilation. Now run './configure; make' to build." 19 | -------------------------------------------------------------------------------- /BPR2/NEWS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurpicz/saca-bench/dfbaa2e727d274c9c0265451dae97d7c737aba2b/BPR2/NEWS -------------------------------------------------------------------------------- /BPR2/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurpicz/saca-bench/dfbaa2e727d274c9c0265451dae97d7c737aba2b/BPR2/README -------------------------------------------------------------------------------- /BPR2/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm autom4te.cache/* 4 | make -f Makefile.init 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /BPR2/configure.in: -------------------------------------------------------------------------------- 1 | # -------------- 2 | # Initialization 3 | # -------------- 4 | 5 | AC_INIT(KBS bucket-pointer refinement, 2.0.0, Klaus-Bernd.Schuermann@CeBiTec.Uni-Bielefeld.DE, kbs-bpr) 6 | AC_CONFIG_SRCDIR(src/Standard/kbs_String.h) 7 | AM_CONFIG_HEADER(src/config.h) 8 | AC_CONFIG_AUX_DIR(config) 9 | 10 | 11 | 12 | AM_INIT_AUTOMAKE 13 | 14 | # ------- 15 | # Options 16 | # ------- 17 | 18 | 19 | DATE=`date +%Y-%m-%d` 20 | AC_SUBST(DATE) 21 | 22 | # ------ 23 | # Checks 24 | # ------ 25 | AC_ISC_POSIX 26 | AC_PROG_CC 27 | AC_EXEEXT 28 | 29 | AC_LANG_C 30 | 31 | AC_PROG_INSTALL 32 | #AC_PROG_RANLIB 33 | 34 | AC_DISABLE_SHARED 35 | #AC_DISABLE_STATIC 36 | AC_PROG_LIBTOOL 37 | #AM_PROG_LIBTOOL 38 | 39 | CC=gcc 40 | 41 | #for optimal running time 42 | CFLAGS="-O3 -fomit-frame-pointer -funroll-loops -fprefetch-loop-arrays -W -Winline -Wall" 43 | 44 | #for optimal running time on 64-bit machine 45 | #CFLAGS="-mptr64 -m64 -O3 -fomit-frame-pointer -funroll-loops -fprefetch-loop-arrays -W -Wall -Winline" 46 | 47 | #for debugging 48 | #CFLAGS="-g" 49 | 50 | # -------------------- 51 | # Verbose version info 52 | # -------------------- 53 | VERSION_INFO=`cat < 2 | #include 3 | #include 4 | 5 | #include "kbs_Error.h" 6 | #include "kbs_FibonacciString.h" 7 | #include "kbs_String.h" 8 | #include "kbs_Types.h" 9 | 10 | int main(int argc, char *argv[]) { 11 | if (argc != 3) { 12 | printf("%d\n", (int) argc); 13 | printf("usage: fibstring \n"); 14 | exit(0); 15 | } 16 | Kbs_Ulong length = atoi(argv[2]); 17 | if (chdir(argv[1]) == -1) { 18 | printf("unable to change directory %s\n", argv[1]); 19 | KBS_ERROR(KBS_ERROR_FILE); 20 | exit(KBS_ERROR_FILE); 21 | } 22 | Kbs_Ustring *string = kbs_genFibonacciLen_Ustring(length); 23 | if (string == NULL || string->str == NULL || string->strLength == 0) { 24 | printf("failed to generate periodic string - failed\n"); 25 | KBS_ERROR(KBS_ERROR_NULLPOINTER); 26 | exit(KBS_ERROR_NULLPOINTER); 27 | } 28 | Kbs_Char *fibStrFilename = kbs_genFibonacciFileName(length); 29 | FILE *file = fopen(fibStrFilename, "wb+"); 30 | if (file == NULL) { 31 | printf("failed to open file - %s failed\n", fibStrFilename); 32 | free(fibStrFilename); 33 | KBS_ERROR(KBS_ERROR_FILEOPEN); 34 | exit(KBS_ERROR_FILEOPEN); 35 | } 36 | if ( fwrite(string->str, sizeof(Kbs_Uchar), length, file) != length) { 37 | printf("File %s not written completely\n", fibStrFilename); 38 | } 39 | fclose(file); 40 | printf("File %s successfully generated\n", fibStrFilename); 41 | free(fibStrFilename); 42 | return 0; 43 | 44 | } /* main() */ 45 | -------------------------------------------------------------------------------- /BPR2/src/Main/periodicStrGen.main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "randomlib.h" 11 | #include "kbs_Error.h" 12 | #include "kbs_RandomString.h" 13 | #include "kbs_RandomStringFile.h" 14 | #include "kbs_Types.h" 15 | 16 | /*--------------------------------------------------------------------------*/ 17 | int main(int argc, char *argv[]) { 18 | if (argc != 5) { 19 | printf("%d\n", (int) argc); 20 | printf("usage: periodicstr \n"); 21 | exit(0); 22 | } 23 | Kbs_Uint alphabetSize = atoi(argv[2]); 24 | Kbs_Ulong lengthPeriod = atoi(argv[3]); 25 | Kbs_Ulong strLength = atoi(argv[4]); 26 | if (chdir(argv[1]) == -1) { 27 | printf("\nVerzeichnis %s nicht gefunden\n", argv[1]); 28 | KBS_ERROR(KBS_ERROR_FILE); 29 | exit(KBS_ERROR_FILE); 30 | } 31 | Kbs_Ustring *string = kbs_genRandPeriodic_UString(alphabetSize, lengthPeriod, strLength); 32 | if (string == NULL || string->str == NULL || string->strLength == 0) { 33 | printf("failed to generate periodic string - failed\n"); 34 | KBS_ERROR(KBS_ERROR_NULLPOINTER); 35 | exit(KBS_ERROR_NULLPOINTER); 36 | } 37 | Kbs_Char *periodicStrFilename = kbs_generate_PeriodicStrFileName(alphabetSize, lengthPeriod, strLength); 38 | FILE *file = fopen(periodicStrFilename, "wb+"); 39 | if (file == NULL) { 40 | printf("failed to open file - %s failed\n", periodicStrFilename); 41 | free(periodicStrFilename); 42 | KBS_ERROR(KBS_ERROR_FILEOPEN); 43 | exit(KBS_ERROR_FILEOPEN); 44 | } 45 | if ( fwrite(string->str, sizeof(Kbs_Uchar), strLength, file) != strLength) { 46 | printf("File %s not written completely\n", periodicStrFilename); 47 | } 48 | fclose(file); 49 | printf("File %s successfully generated\n", periodicStrFilename); 50 | free(periodicStrFilename); 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /BPR2/src/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = Extern Standard Util SuffixArray Main 2 | -------------------------------------------------------------------------------- /BPR2/src/Standard/.deps/libstandard_la-kbs_Math.Plo: -------------------------------------------------------------------------------- 1 | libstandard_la-kbs_Math.lo: kbs_Math.c /usr/include/stdc-predef.h \ 2 | kbs_Math.h kbs_Error.h kbs_Types.h 3 | 4 | /usr/include/stdc-predef.h: 5 | 6 | kbs_Math.h: 7 | 8 | kbs_Error.h: 9 | 10 | kbs_Types.h: 11 | -------------------------------------------------------------------------------- /BPR2/src/Standard/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | standarddir = $(includedir)/kbs_include 3 | 4 | standard_headers = \ 5 | kbs_Alphabet.h \ 6 | kbs_Error.h \ 7 | kbs_Limits.h \ 8 | kbs_Math.h \ 9 | kbs_String.h \ 10 | kbs_Time.h \ 11 | kbs_Types.h 12 | 13 | standard_modules = \ 14 | kbs_Alphabet.c \ 15 | kbs_Error.c \ 16 | kbs_Math.c \ 17 | kbs_String.c \ 18 | kbs_Time.c 19 | 20 | noinst_LTLIBRARIES = libstandard.la 21 | 22 | libstandard_la_SOURCES = \ 23 | $(standard_headers) \ 24 | $(standard_modules) 25 | 26 | libstandard_la_LIBADD = \ 27 | @top_srcdir@/src/Extern/libextern.la 28 | 29 | libstandard_la_CFLAGS = \ 30 | -I@top_srcdir@/src/Extern 31 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_Alphabet.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "kbs_Types.h" 5 | #include "kbs_Error.h" 6 | #include "kbs_String.h" 7 | #include "kbs_Alphabet.h" 8 | 9 | 10 | /*--------------------------------------------------------------------------*/ 11 | void kbs_sort_Alphabet(Kbs_Alphabet *const alphabet){ 12 | Kbs_Ulong i, k; 13 | if (alphabet == NULL) { 14 | fprintf(stdout, "Try to sort Null-alphabet\n"); 15 | KBS_ERROR(KBS_ERROR_NULLPOINTER); 16 | return; 17 | } 18 | if (alphabet->charArray == NULL) { 19 | fprintf(stdout, "Alphabet is not complete\n"); 20 | KBS_ERROR(KBS_ERROR_NULLPOINTER); 21 | return; 22 | } 23 | memset(alphabet->charArray, '\0', sizeof(Kbs_Uchar) * (alphabet->alphaSize + 1)); 24 | k = 0; 25 | for (i=0; ialphaMapping[i] >= 0) { 27 | alphabet->charArray[k] = i; 28 | alphabet->alphaMapping[i] = k; 29 | k++; 30 | } 31 | } 32 | if (k != alphabet->alphaSize) { 33 | printf("k ist ungleich alphabet size"); 34 | } 35 | } 36 | 37 | 38 | /*--------------------------------------------------------------------------*/ 39 | void kbs_delete_Alphabet(Kbs_Alphabet* alphabet) { 40 | if (alphabet == NULL) { 41 | return; 42 | } 43 | if (alphabet->charArray != NULL) { 44 | free(alphabet->charArray); 45 | } 46 | free(alphabet); 47 | } 48 | 49 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_Alphabet.h: -------------------------------------------------------------------------------- 1 | #ifndef KBS_ALPHABET_H 2 | #define KBS_ALPHABET_H 3 | 4 | #include "kbs_Types.h" 5 | 6 | #define KBS_MAX_ALPHABET_SIZE 256 7 | 8 | 9 | typedef struct kbs_alphabet{ 10 | Kbs_Uint alphaSize; /** size of the alphabet */ 11 | Kbs_Uchar *charArray; /** collection of characters in mapping order */ 12 | Kbs_Int alphaMapping[KBS_MAX_ALPHABET_SIZE]; /** map of characters to alphabet 0..alphabetSize-1 */ 13 | Kbs_Ulong charFreq[KBS_MAX_ALPHABET_SIZE]; /** frequency of each character in the resp. string */ 14 | Kbs_Uchar numberSpecialChar; /** just used for special purposes */ 15 | Kbs_Uchar *specialChar; /** array of special characters */ 16 | }Kbs_Alphabet; 17 | 18 | 19 | 20 | /** 21 | * The alphabet mapping is sorted, 22 | * such that the smallest ascii-value is mapped to 0 and so on 23 | * @param alphabet to be mapped in sorted order 24 | */ 25 | void kbs_sort_Alphabet(Kbs_Alphabet *const alphabet); 26 | 27 | 28 | 29 | /** 30 | * frees the alphabet 31 | * @param alphabet to be deleted 32 | */ 33 | void kbs_delete_Alphabet(Kbs_Alphabet *alphabet); 34 | 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_Error.c: -------------------------------------------------------------------------------- 1 | 2 | #include "stdlib.h" 3 | #include "stdio.h" 4 | #include "kbs_Types.h" 5 | #include "kbs_Error.h" 6 | 7 | void kbs_Error(Kbs_Uchar *filename, Kbs_Uint line, Kbs_Int errorCode) { 8 | fprintf(stderr, "Error in file %s, line %u:\n ", filename, line); 9 | if (errorCode == KBS_ERROR_MALLOC) { 10 | fprintf(stderr, "Allocation Error, not enough space\n"); 11 | exit(KBS_ERROR_MALLOC); 12 | } 13 | if (errorCode == KBS_ERROR_FILESTAT) { 14 | fprintf(stderr, "Unable to get stat of file\n"); 15 | } 16 | if (errorCode == KBS_ERROR_FILEMODE) { 17 | fprintf(stderr, "File is not in appropriate mode\n"); 18 | } 19 | if (errorCode == KBS_ERROR_FILEOPEN) { 20 | fprintf(stderr, "Unable to open file\n"); 21 | } 22 | if (errorCode == KBS_ERROR_MMAP) { 23 | fprintf(stderr, "Unable to compute memory mapping\n"); 24 | } 25 | if (errorCode == KBS_ERROR_FILEFORMAT) { 26 | fprintf(stderr, "The file has not the appropriate format\n"); 27 | } 28 | if (errorCode == KBS_ERROR_VALUEOVERFLOW) { 29 | fprintf(stderr, "Overflow of the given type\n"); 30 | } 31 | if (errorCode == KBS_ERROR_NULLPOINTER) { 32 | fprintf(stderr, "Try to access a NULL reference\n"); 33 | } 34 | if (errorCode == KBS_ERROR_TIME) { 35 | fprintf(stderr, "Unable to process the time\n"); 36 | } 37 | if (errorCode == KBS_ERROR_VALUEOUTOFBOUNDS) { 38 | fprintf(stderr, "The value of a variable is out of a given range\n"); 39 | } 40 | if (errorCode == KBS_ERROR_EMPTYCONTAINER) { 41 | fprintf(stderr, "Try to access elements of an empty set/container\n"); 42 | } 43 | 44 | } 45 | 46 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_Error.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef KBS_ERROR_H 3 | #define KBS_ERROR_H 4 | 5 | #include "kbs_Types.h" 6 | 7 | #define KBS_ERROR_MALLOC 1 /** identifies allocation error*/ 8 | #define KBS_ERROR_FILESTAT 2 /** identifies error to get stat of file */ 9 | #define KBS_ERROR_FILEMODE 3 /** identifies error in file permission */ 10 | #define KBS_ERROR_FILEOPEN 4 /** identifies error in opening a file */ 11 | #define KBS_ERROR_MMAP 5 /** identifies mmap error*/ 12 | #define KBS_ERROR_FILEFORMAT 6 /** */ 13 | #define KBS_ERROR_VALUEOVERFLOW 7 /** */ 14 | #define KBS_ERROR_NULLPOINTER 8 /** */ 15 | #define KBS_ERROR_TIME 9 /** */ 16 | #define KBS_ERROR_VALUEOUTOFBOUNDS 10 /** */ 17 | #define KBS_ERROR_EMPTYCONTAINER 11 /** */ 18 | #define KBS_ERROR_FILE 12 /** identifies error in file handling */ 19 | #define KBS_ERROR_OTHER 13 /** */ 20 | 21 | #define KBS_ERROR(CODE) kbs_Error((Kbs_Uchar *)__FILE__, __LINE__, CODE); 22 | 23 | void kbs_Error(Kbs_Uchar *filename, Kbs_Uint line, Kbs_Int errorCode); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_Limits.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef KBS_LIMITS_H 3 | #define KBS_LIMITS_H 4 | 5 | #include 6 | 7 | #define KBS_ULONG_MAX (ULONG_MAX) /* Has to be consistent with type Kbs_Ulong */ 8 | #define KBS_LONG_MAX (LONG_MAX) /* Has to be consistent with type Kbs_Long */ 9 | 10 | 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_Math.c: -------------------------------------------------------------------------------- 1 | 2 | #include "kbs_Math.h" 3 | #include "kbs_Error.h" 4 | 5 | 6 | /*--------------------------------------------------------*/ 7 | Kbs_Ulong kbs_power_Ulong(Kbs_Ulong base, Kbs_Ulong exp) { 8 | long p; 9 | if (exp == 0) { 10 | return 1; 11 | } 12 | if (exp == 1) { 13 | return base; 14 | } 15 | if (base == 4) { 16 | if (exp > 15) { 17 | KBS_ERROR(KBS_ERROR_VALUEOVERFLOW); 18 | } 19 | return 4<<(2*(exp-1)); 20 | } 21 | p=1; 22 | for (; exp>0; --exp) { 23 | p = p*base; 24 | } 25 | return p; 26 | } 27 | 28 | /*--------------------------------------------------------*/ 29 | Kbs_Long kbs_getExp_Ulong(const Kbs_Ulong base, const Kbs_Ulong value){ 30 | Kbs_Long exp = 0; 31 | Kbs_Ulong tmpValue = 1; 32 | while (tmpValue < value) { 33 | tmpValue *= base; 34 | exp++; 35 | } 36 | if (tmpValue == value) { 37 | return exp; 38 | } 39 | else { 40 | return -1; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_Math.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef KBS_MATH_H 3 | #define KBS_MATH_H 4 | 5 | #include "kbs_Error.h" 6 | #include "kbs_Types.h" 7 | 8 | #define MAX(a,b) (((a) > (b)) ? (a) : (b)) 9 | #define MIN(a,b) (((a) < (b)) ? (a) : (b)) 10 | 11 | #define KBS_ABS(a) (((a) < 0) ? (-a) : (a)) 12 | 13 | 14 | /** 15 | * Computes base^exp 16 | * @param base - base 17 | * @param exp - exponent 18 | * @return base^exp 19 | */ 20 | Kbs_Ulong kbs_power_Ulong(Kbs_Ulong base, Kbs_Ulong exp); 21 | 22 | /** 23 | * Computes exp of base^exp = value 24 | * @param base - base 25 | * @param value - value of base^exp 26 | * @return exp of base^exp = value, if not integer -1 27 | */ 28 | Kbs_Long kbs_getExp_Ulong(const Kbs_Ulong base, const Kbs_Ulong value); 29 | 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_String.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef KBS_STRING_H 3 | #define KBS_STRING_H 4 | 5 | #include "kbs_Alphabet.h" 6 | #include "kbs_Types.h" 7 | 8 | 9 | typedef struct kbs_ustring{ 10 | Kbs_Ulong strLength; /** length of the string */ 11 | Kbs_Uchar *str; /** character array of length strLength plus terminating 0 */ 12 | Kbs_Alphabet *alphabet; /** the alphabet of the string, if it is determined previously */ 13 | }Kbs_Ustring; 14 | 15 | 16 | #define KBS_STRING_EXTENSION_SIZE 32 17 | 18 | 19 | 20 | /*----------------------------------------------------------------------------*/ 21 | /** 22 | * Gets the Kbs_Ustring from a given file without the alphabet 23 | * @param filename - file containing the string. 24 | * @return Kbs_Ustring located in filename 25 | * @see kbs_getUstringWithAlphabet_FromFile 26 | */ 27 | Kbs_Ustring* kbs_getUstring_FromFile(const Kbs_Char *const filename); 28 | 29 | 30 | /*----------------------------------------------------------------------------*/ 31 | /** 32 | * Gets the Kbs_Ustring with its alphabet from a given file 33 | * @param filename - file containing the string. 34 | * @return Kbs_Ustring located in filename 35 | * @see kbs_getUstring_FromFile 36 | */ 37 | Kbs_Ustring* kbs_getUstringWithAlphabet_FromFile(Kbs_Char *filename); 38 | 39 | /*----------------------------------------------------------------------------*/ 40 | /** 41 | * frees a Kbs_Ustring 42 | * @param oldStr - Kbs_Ustring string to free 43 | */ 44 | void kbs_delete_Ustring(Kbs_Ustring* oldStr); 45 | 46 | 47 | /** 48 | * Shows the contents of Kbs_Ustring on standard out 49 | * @param thisString string to be shown 50 | * @see Kbs_Ustring 51 | */ 52 | void kbs_get_AlphabetForUstring(Kbs_Ustring *thisString); 53 | 54 | 55 | 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_Time.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef KBS_TIME_H 3 | #define KBS_TIME_H 4 | 5 | 6 | #include /* struct tms,times() */ 7 | #include "kbs_Error.h" 8 | #include "kbs_Types.h" 9 | 10 | 11 | 12 | typedef struct kbs_timestamp{ 13 | struct timeval realtime; /** real time in micro seconds */ 14 | struct tms systemTime; /** system time w.r.t. clock cycles */ 15 | }Kbs_TimeStamp; 16 | 17 | 18 | 19 | 20 | 21 | /** 22 | * @return current time stamp 23 | */ 24 | Kbs_TimeStamp *kbs_get_TimeStamp(); 25 | 26 | /** 27 | * Print time stamp to stdout 28 | * @param tStamp time stamp to show on stdout 29 | */ 30 | void kbs_show_TimeStamp(Kbs_TimeStamp *tStamp); 31 | 32 | /** 33 | * Computes difference in time values 34 | * @param first relative realtime value 35 | * @param second relative realtime value 36 | * @return real time between first and second 37 | */ 38 | struct timeval *kbs_get_DifferenceTimeval(struct timeval *first, struct timeval *second); 39 | 40 | /** 41 | * Computes difference in time values 42 | * @param first relative system time value 43 | * @param second relative system time value 44 | * @return system time between first and second 45 | */ 46 | struct tms *kbs_get_DifferenceTms(struct tms *first, struct tms *second); 47 | 48 | /** 49 | * Computes difference of time stamps 50 | * @param first relative system time value 51 | * @param second relative system time value 52 | * @return time stamp containing real and system time between first and second 53 | */ 54 | Kbs_TimeStamp *kbs_get_DifferenceTimeStamp(Kbs_TimeStamp *first, Kbs_TimeStamp *second); 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /BPR2/src/Standard/kbs_Types.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef KBS_TYPES_H 3 | #define KBS_TYPES_H 4 | 5 | 6 | /*if tpedefs are changed, also change according values in kbs_Limits.h */ 7 | 8 | typedef unsigned char Kbs_Uchar; 9 | typedef char Kbs_Char; 10 | 11 | typedef unsigned int Kbs_Uint; 12 | typedef int Kbs_Int; 13 | 14 | typedef unsigned long Kbs_Ulong; 15 | typedef long Kbs_Long; 16 | 17 | typedef double Kbs_Double; 18 | 19 | typedef int Kbs_FileDesc; 20 | 21 | typedef int Kbs_Bool; 22 | 23 | #define TRUE 1 24 | #define FALSE 0 25 | 26 | 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /BPR2/src/SuffixArray/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = . 2 | 3 | suffixarrayconstdir = $(includedir)/kbs_include 4 | 5 | suffixarrayconst_headers = \ 6 | kbs_SuffixArray.h \ 7 | kbs_SuffixArrayAnnotated.h \ 8 | kbs_SuffixArrayChecker.h \ 9 | kbs_SuffixArrayConstDStepAndPre.h 10 | 11 | 12 | 13 | suffixarrayconst_modules = \ 14 | kbs_SuffixArray.c \ 15 | kbs_SuffixArrayAnnotated.c \ 16 | kbs_SuffixArrayChecker.c \ 17 | kbs_SuffixArrayConstDStepAndPre.c 18 | 19 | noinst_LTLIBRARIES = libsuffixarray.la 20 | 21 | libsuffixarray_la_SOURCES = \ 22 | $(suffixarrayconst_headers) \ 23 | $(suffixarrayconst_modules) 24 | 25 | 26 | libsuffixarray_la_LIBADD = \ 27 | @top_srcdir@/src/Standard/libstandard.la \ 28 | @top_srcdir@/src/Util/libutil.la 29 | 30 | libsuffixarray_la_CFLAGS = \ 31 | -I@top_srcdir@/src/Standard \ 32 | -I@top_srcdir@/src/Util 33 | 34 | libsuffixarray_la_LDFLAGS = -lm 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /BPR2/src/SuffixArray/kbs_SuffixArray.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "kbs_Alphabet.h" 4 | #include "kbs_Error.h" 5 | #include "kbs_Limits.h" 6 | #include "kbs_Math.h" 7 | #include "kbs_String.h" 8 | #include "kbs_SuffixArray.h" 9 | #include "kbs_SuffixArrayConstDStepAndPre.h" 10 | #include "kbs_Time.h" 11 | #include "kbs_Types.h" 12 | 13 | 14 | 15 | 16 | 17 | 18 | /*---------------------------------------------------------------------------------------*/ 19 | void kbs_delete_SA_WithoutString(Kbs_SuffixArray *oldSA) { 20 | if (oldSA == NULL) { 21 | return; 22 | } 23 | if (oldSA->posArray != NULL) { 24 | free(oldSA->posArray); 25 | } 26 | if (oldSA->invPosArray != NULL) { 27 | free(oldSA->invPosArray); 28 | } 29 | if (oldSA->buckets != NULL) { 30 | free(oldSA->buckets); 31 | } 32 | if (oldSA->lcpArray != NULL) { 33 | free(oldSA->lcpArray); 34 | } 35 | free(oldSA); 36 | } 37 | 38 | /*---------------------------------------------------------------------------------------*/ 39 | void kbs_delete_SA_IncludingString(Kbs_SuffixArray *oldSA) { 40 | if (oldSA == NULL) { 41 | return; 42 | } 43 | kbs_delete_Ustring(oldSA->str); 44 | kbs_delete_SA_WithoutString(oldSA); 45 | } 46 | 47 | 48 | /*---------------------------------------------------------------------------------------*/ 49 | void kbs_show_SA(const Kbs_SuffixArray *const sa){ 50 | Kbs_Ulong i; 51 | Kbs_Char *tempCharArray; 52 | if (sa == NULL) { 53 | return; 54 | } 55 | if (sa->str->alphabet->alphaSize < ('Z'-'A')*2) 56 | for(i=0; istr->strLength; i++) { 57 | sa->str->str[i] += 'A'; 58 | } 59 | for(i=0; istr->strLength; i++) { 60 | tempCharArray = sa->str->str + sa->posArray[i]; 61 | printf("i:%lu - \t%lu\t%s\n", i, sa->posArray[i], tempCharArray); 62 | } 63 | } 64 | 65 | 66 | /*---------------------------------------------------------------------------------------*/ 67 | Kbs_Bool kbs_isEqual_SA(const Kbs_SuffixArray *const sa1, const Kbs_SuffixArray *const sa2) { 68 | Kbs_Ulong i; 69 | Kbs_Ulong size; 70 | if (sa1->str->strLength != sa2->str->strLength) { 71 | printf("size of SA1 %lu != size of SA2 %lu\n", sa1->str->strLength, sa2->str->strLength); 72 | fflush(stdout); 73 | return FALSE; 74 | } 75 | size = sa1->str->strLength; 76 | for (i=0; iposArray[i] != sa2->posArray[i]) { 78 | printf("Mismatch at position %lu: %lu != %lu\n", i, sa1->posArray[i], sa2->posArray[i]); 79 | fflush(stdout); 80 | } 81 | } 82 | fflush(stdout); 83 | return TRUE; 84 | } 85 | 86 | 87 | 88 | /*---------------------------------------------------------------------------------------*/ 89 | void kbs_show_LongArray(Kbs_Long *longArray, Kbs_Ulong length) { 90 | Kbs_Ulong i; 91 | if(longArray == NULL) { 92 | return; 93 | } 94 | printf("Long Array:\n"); 95 | for(i=0; i 6 | #include "kbs_Error.h" 7 | #include "kbs_SuffixArray.h" 8 | #include "kbs_SuffixArrayChecker.h" 9 | #include "kbs_Types.h" 10 | 11 | 12 | 13 | 14 | /** 15 | * straight forward checking of condition 1 (all suffix numbers are in range up to string length) 16 | * @param sa suffix array to be checked for correctness 17 | * @return suffix array is correct 18 | */ 19 | static Kbs_Bool check_Condition1(const Kbs_SuffixArray *const sa) { 20 | Kbs_Ulong i; 21 | for(i=0; i < sa->str->strLength; i++) { 22 | if (sa->posArray[i] >= sa->str->strLength) { 23 | printf("Condition 1 not satisfied: posArray[%d]= %d >= %d = string length\n", (int)(i), (int)sa->posArray[i], (int)sa->str->strLength); 24 | return FALSE; 25 | } 26 | } 27 | return TRUE; 28 | } 29 | 30 | /** 31 | * straight forward checking of condition 2 () 32 | * @param sa suffix array to be checked for correctness 33 | * @return suffix array is correct 34 | */ 35 | static Kbs_Bool check_Condition2(const Kbs_SuffixArray *const sa) { 36 | Kbs_Ulong i; 37 | Kbs_Ulong *pos = sa->posArray; 38 | Kbs_Uchar *s = sa->str->str; 39 | for(i=1; i < sa->str->strLength; i++) { 40 | if (s[pos[i-1]] > s[pos[i]]) { 41 | printf("Condition 2 not satisfied: s[pos[%d]]=%d > s[pos[%d]]=%d\n", (int)(i-1), (int)s[pos[i-1]], (int)i, (int)s[pos[i]]); 42 | printf("Condition 2 not satisfied: %d%d%d > %d%d%d \n", (int)s[pos[i-1]], (int)s[pos[i-1]+1], (int)s[pos[i-1]+2], (int)s[pos[i]],(int)s[pos[i]+1],(int)s[pos[i]+2]); 43 | printf("Condition 2 not satisfied: pos[%d]=%d > pos[%d]]=%d\n", (int)(i-1), (int)pos[i-1], (int)i, (int)pos[i]); 44 | return FALSE; 45 | } 46 | } 47 | return TRUE; 48 | } 49 | 50 | /** 51 | * straight forward checking of condition 3 52 | * @param sa suffix array to be checked for correctness 53 | * @return suffix array is correct 54 | */ 55 | static Kbs_Bool check_Condition3(const Kbs_SuffixArray *const sa) { 56 | Kbs_Ulong i; 57 | Kbs_Ulong *pos = sa->posArray; 58 | Kbs_Long* invPos = (Kbs_Long*) malloc(sizeof(Kbs_Long) * (sa->str->strLength + 1)); 59 | Kbs_Uchar *s = sa->str->str; 60 | for(i=0; i < sa->str->strLength; i++) { 61 | invPos[pos[i]] = i; 62 | } 63 | invPos[sa->str->strLength] = -1; 64 | Kbs_Long j, k; 65 | for(i=1; i < sa->str->strLength; i++) { 66 | if (s[pos[i-1]] == s[pos[i]]) { // && pos[i-1] != sa->str->strLength-1 67 | j = (Kbs_Long)invPos[pos[i-1] + 1]; 68 | k = (Kbs_Long)invPos[pos[i] + 1]; 69 | if (j>=k) { 70 | printf("Condition 3 not satisfied: invPos[pos[%d]+1]=%d >= invPos[pos[%d]+1]=%d\n", (int)(i-1), (int)j, (int)i, (int)k); 71 | free(invPos); 72 | return FALSE; 73 | } 74 | } 75 | } 76 | free(invPos); 77 | return TRUE; 78 | } 79 | 80 | 81 | /*---------------------------------------------------------------------*/ 82 | Kbs_Bool kbs_check_SuffixArray(const Kbs_SuffixArray *const sa) { 83 | if (sa == NULL || sa->str == NULL || sa->str->str == NULL || sa->posArray == NULL ) { 84 | KBS_ERROR(KBS_ERROR_NULLPOINTER); 85 | return FALSE; 86 | } 87 | return (check_Condition1(sa) && check_Condition2(sa) && check_Condition3(sa)); 88 | } 89 | 90 | 91 | #endif 92 | -------------------------------------------------------------------------------- /BPR2/src/SuffixArray/kbs_SuffixArrayChecker.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef KBS_SUFFIXARRAYCHECKER_H 3 | #define KBS_SUFFIXARRAYCHECKER_H 4 | 5 | #include "kbs_SuffixArray.h" 6 | #include "kbs_Types.h" 7 | 8 | 9 | 10 | 11 | /** 12 | An array sa[0,n-1] of length n is a suffix array of 13 | a string s[0,n-1] if and only if the following conditions hold
14 | 1. For all i between 0 and n-1 => sa[i] is between 0 and n-1
15 | 2. For all i between 1 and n-1 => s[sa[i-1]]<=s[sa[i]] is between 0 and n-1
16 | 3. For all i between 1 and n-1:
17 | if s[sa[i-1]]=s[sa[i]] and sa[i-1] != n-1
18 | => there exist j,k between 0 and n-1 19 | such that sa[j]=sa[i-1]+1 and sa[k]=sa[i]+1 20 | 21 | */ 22 | 23 | /** 24 | * straight forward checking of above conditions 25 | * @param sa suffix array to be checked for correctness 26 | * @return suffix array is correct 27 | */ 28 | Kbs_Bool kbs_check_SuffixArray(const Kbs_SuffixArray *const sa); 29 | 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /BPR2/src/SuffixArray/kbs_SuffixArrayConstDStepAndPre.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef KBS_SUFFIXARRAYCONSTDSTEPANDPRE_H 3 | #define KBS_SUFFIXARRAYCONSTDSTEPANDPRE_H 4 | 5 | #include "kbs_String.h" 6 | #include "kbs_SuffixArray.h" 7 | #include "kbs_Time.h" 8 | #include "kbs_Types.h" 9 | 10 | 11 | 12 | /** 13 | * Build the suffix array with the bpr algorithm. 14 | * The bucket references are realized via pointers refering into the suffix array. 15 | * For each suffix its final position or the last position in its bucket is 16 | * hold in an separate array. 17 | * First the smallest level-1 buckets are determined 18 | * For the previously sorted level-1 bucket 'a', the 2-level buckets 'ya' are determined 19 | * by a left-to-right scan of bucket 'a'. 20 | * The space demands are up to 10*(seq->strLength) 21 | * @param seq - string to build suffix array for. 22 | * @param q - prefix length for which the initial bucket sort is performed. 23 | * @return - the suffix array for the string seq, where invPosArray = bucketIndices = NULL. 24 | */ 25 | Kbs_SuffixArray *kbs_buildDstepUsePrePlusCopyFreqOrder_SuffixArray(Kbs_Ustring *const seq, register const Kbs_Ulong q); 26 | 27 | 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /BPR2/src/Util/.deps/libutil_la-kbs_FibonacciString.Plo: -------------------------------------------------------------------------------- 1 | libutil_la-kbs_FibonacciString.lo: kbs_FibonacciString.c \ 2 | /usr/include/stdc-predef.h /usr/include/stdio.h \ 3 | /usr/include/bits/libc-header-start.h /usr/include/features.h \ 4 | /usr/include/sys/cdefs.h /usr/include/bits/wordsize.h \ 5 | /usr/include/bits/long-double.h /usr/include/gnu/stubs.h \ 6 | /usr/include/gnu/stubs-64.h \ 7 | /usr/lib/gcc/x86_64-pc-linux-gnu/7.1.1/include/stddef.h \ 8 | /usr/include/bits/types.h /usr/include/bits/typesizes.h \ 9 | /usr/include/bits/types/__FILE.h /usr/include/bits/types/FILE.h \ 10 | /usr/include/libio.h /usr/include/_G_config.h \ 11 | /usr/include/bits/types/__mbstate_t.h \ 12 | /usr/lib/gcc/x86_64-pc-linux-gnu/7.1.1/include/stdarg.h \ 13 | /usr/include/bits/stdio_lim.h /usr/include/bits/sys_errlist.h \ 14 | /usr/include/bits/stdio.h /usr/include/string.h \ 15 | /usr/include/bits/types/locale_t.h /usr/include/bits/types/__locale_t.h \ 16 | /usr/include/strings.h /usr/include/math.h \ 17 | /usr/include/bits/math-vector.h /usr/include/bits/libm-simd-decl-stubs.h \ 18 | /usr/include/bits/floatn.h /usr/include/bits/huge_val.h \ 19 | /usr/include/bits/huge_valf.h /usr/include/bits/huge_vall.h \ 20 | /usr/include/bits/inf.h /usr/include/bits/nan.h \ 21 | /usr/include/bits/flt-eval-method.h /usr/include/bits/fp-logb.h \ 22 | /usr/include/bits/fp-fast.h \ 23 | /usr/include/bits/mathcalls-helper-functions.h \ 24 | /usr/include/bits/mathcalls.h /usr/include/bits/mathinline.h \ 25 | ../../src/Standard/kbs_Error.h ../../src/Standard/kbs_Types.h \ 26 | kbs_FibonacciString.h ../../src/Standard/kbs_String.h \ 27 | ../../src/Standard/kbs_Alphabet.h ../../src/Standard/kbs_Types.h 28 | 29 | /usr/include/stdc-predef.h: 30 | 31 | /usr/include/stdio.h: 32 | 33 | /usr/include/bits/libc-header-start.h: 34 | 35 | /usr/include/features.h: 36 | 37 | /usr/include/sys/cdefs.h: 38 | 39 | /usr/include/bits/wordsize.h: 40 | 41 | /usr/include/bits/long-double.h: 42 | 43 | /usr/include/gnu/stubs.h: 44 | 45 | /usr/include/gnu/stubs-64.h: 46 | 47 | /usr/lib/gcc/x86_64-pc-linux-gnu/7.1.1/include/stddef.h: 48 | 49 | /usr/include/bits/types.h: 50 | 51 | /usr/include/bits/typesizes.h: 52 | 53 | /usr/include/bits/types/__FILE.h: 54 | 55 | /usr/include/bits/types/FILE.h: 56 | 57 | /usr/include/libio.h: 58 | 59 | /usr/include/_G_config.h: 60 | 61 | /usr/include/bits/types/__mbstate_t.h: 62 | 63 | /usr/lib/gcc/x86_64-pc-linux-gnu/7.1.1/include/stdarg.h: 64 | 65 | /usr/include/bits/stdio_lim.h: 66 | 67 | /usr/include/bits/sys_errlist.h: 68 | 69 | /usr/include/bits/stdio.h: 70 | 71 | /usr/include/string.h: 72 | 73 | /usr/include/bits/types/locale_t.h: 74 | 75 | /usr/include/bits/types/__locale_t.h: 76 | 77 | /usr/include/strings.h: 78 | 79 | /usr/include/math.h: 80 | 81 | /usr/include/bits/math-vector.h: 82 | 83 | /usr/include/bits/libm-simd-decl-stubs.h: 84 | 85 | /usr/include/bits/floatn.h: 86 | 87 | /usr/include/bits/huge_val.h: 88 | 89 | /usr/include/bits/huge_valf.h: 90 | 91 | /usr/include/bits/huge_vall.h: 92 | 93 | /usr/include/bits/inf.h: 94 | 95 | /usr/include/bits/nan.h: 96 | 97 | /usr/include/bits/flt-eval-method.h: 98 | 99 | /usr/include/bits/fp-logb.h: 100 | 101 | /usr/include/bits/fp-fast.h: 102 | 103 | /usr/include/bits/mathcalls-helper-functions.h: 104 | 105 | /usr/include/bits/mathcalls.h: 106 | 107 | /usr/include/bits/mathinline.h: 108 | 109 | ../../src/Standard/kbs_Error.h: 110 | 111 | ../../src/Standard/kbs_Types.h: 112 | 113 | kbs_FibonacciString.h: 114 | 115 | ../../src/Standard/kbs_String.h: 116 | 117 | ../../src/Standard/kbs_Alphabet.h: 118 | 119 | ../../src/Standard/kbs_Types.h: 120 | -------------------------------------------------------------------------------- /BPR2/src/Util/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | utildir = $(includedir)/kbs_include 3 | 4 | util_headers = \ 5 | kbs_FibonacciString.h \ 6 | kbs_RandomString.h \ 7 | kbs_RandomStringFile.h 8 | 9 | util_modules = \ 10 | kbs_FibonacciString.c \ 11 | kbs_RandomString.c \ 12 | kbs_RandomStringFile.c 13 | 14 | noinst_LTLIBRARIES = \ 15 | libutil.la 16 | 17 | libutil_la_SOURCES = \ 18 | $(util_headers) \ 19 | $(util_modules) 20 | 21 | libutil_la_LIBADD = \ 22 | @top_srcdir@/src/Extern/libextern.la \ 23 | @top_srcdir@/src/Standard/libstandard.la 24 | 25 | libutil_la_CFLAGS = \ 26 | -I@top_srcdir@/src/Extern \ 27 | -I@top_srcdir@/src/Standard 28 | -------------------------------------------------------------------------------- /BPR2/src/Util/kbs_FibonacciString.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * FILE : words.c 3 | * AUTHOR : Jens Stoye 4 | * DATE : 12/1997 5 | *****************************************************************************/ 6 | 7 | /***************************************************************************** 8 | * INCLUDES 9 | *****************************************************************************/ 10 | #include 11 | #include 12 | #include // sqrt(),pow() 13 | 14 | #include "kbs_Error.h" 15 | #include "kbs_FibonacciString.h" 16 | #include "kbs_String.h" 17 | #include "kbs_Types.h" 18 | 19 | 20 | 21 | /*---------------------------------------------------------------------------*/ 22 | /* Write Fibonacci string of length (see Crochemore/Rytter, 1995) to fptr. 23 | * 24 | */ 25 | Kbs_Ustring *kbs_genFibonacciLen_Ustring(const Kbs_Ulong length) { 26 | 27 | if (length == 0) { 28 | return NULL; 29 | } 30 | Kbs_Ustring *fibString = (Kbs_Ustring *)malloc(sizeof(Kbs_Ustring *)); 31 | if (fibString == NULL) { 32 | KBS_ERROR(KBS_ERROR_MALLOC); 33 | } 34 | fibString->strLength = length; 35 | fibString->str = (Kbs_Uchar *)malloc(sizeof(Kbs_Uchar) * (length+1)); 36 | if (fibString->str == NULL) { 37 | KBS_ERROR(KBS_ERROR_MALLOC); 38 | } 39 | if(length == 1) fibString->str[0] = 'b'; 40 | else { 41 | fibString->str[0] = 'a'; 42 | fibString->str[1] = 'b'; 43 | Kbs_Ulong l1 = 1; 44 | Kbs_Ulong l2 = 2; 45 | while (l2 < length) { 46 | Kbs_Ulong k; 47 | for(k=0; k= length) { 49 | break; 50 | } 51 | fibString->str[l2+k] = fibString->str[k]; 52 | } 53 | Kbs_Ulong tmp = l1; 54 | l1 = l2; 55 | l2 += tmp; 56 | } 57 | } 58 | fibString->str[length] = '\0'; 59 | return fibString; 60 | } /* fib() */ 61 | 62 | 63 | /*--------------------------------------------------------------------------*/ 64 | Kbs_Char *kbs_genFibonacciFileName(Kbs_Ulong fileSize) { 65 | Kbs_Char *filename; 66 | filename = (Kbs_Char *) malloc(sizeof(Kbs_Char) * 100); 67 | if (filename == NULL) { 68 | KBS_ERROR(KBS_ERROR_MALLOC); 69 | } 70 | memset(filename, '\0', 100); 71 | sprintf(filename,"fib_s%d", (int)fileSize); 72 | return filename; 73 | } 74 | -------------------------------------------------------------------------------- /BPR2/src/Util/kbs_FibonacciString.h: -------------------------------------------------------------------------------- 1 | #ifndef KBS_FIBONACCISTRING_H 2 | #define KBS_FIBONACCISTRING_H 3 | 4 | #include 5 | #include 6 | 7 | #include "kbs_Error.h" 8 | #include "kbs_String.h" 9 | #include "kbs_Types.h" 10 | 11 | 12 | 13 | /*---------------------------------------------------------------------------*/ 14 | /* Write Fibonacci string of length (see Crochemore/Rytter, 1995) to fptr. 15 | * @param length - length of string to be generated 16 | * @return Fibonacci string of size length 17 | */ 18 | Kbs_Ustring *kbs_genFibonacciLen_Ustring(const Kbs_Ulong length); 19 | 20 | /*---------------------------------------------------------------------------*/ 21 | /* Generates filename for Fibonacci string of length fileSize 22 | * @param fileSize - length of string to be generated 23 | * @return Fibonacci string of length fileSize 24 | */ 25 | Kbs_Char *kbs_genFibonacciFileName(Kbs_Ulong fileSize); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /BPR2/src/Util/kbs_RandomString.c: -------------------------------------------------------------------------------- 1 | /* Creating of Test Data Version 0.1 from 12.11.01 12:15 Uhr */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "kbs_RandomStringFile.h" 13 | #include "randomlib.h" 14 | #include "kbs_Types.h" 15 | #include "kbs_Error.h" 16 | #include "kbs_String.h" 17 | 18 | 19 | 20 | 21 | 22 | /*--------------------------------------------------------------------------*/ 23 | Kbs_Uchar kbs_genRand_UChar(Kbs_Uint alphabetSize) { 24 | Kbs_Uchar letter; 25 | Kbs_Int lower, upper; 26 | if (alphabetSize>250) { 27 | printf("alphabetSize %du exceed maximum\n", alphabetSize); 28 | } 29 | lower = 1; 30 | upper = lower + alphabetSize - 1; 31 | letter = RandomInt(lower,upper); 32 | if (alphabetSize < 60) { 33 | letter += 'A'-1; 34 | } 35 | return letter; 36 | } 37 | 38 | /*--------------------------------------------------------------------------*/ 39 | Kbs_Ustring *kbs_genRand_UString(Kbs_Uint alphabetSize, Kbs_Ulong seqSize) { 40 | if (seqSize == 0) { 41 | return NULL; 42 | } 43 | Kbs_Ustring *string; 44 | string = (Kbs_Ustring *) malloc(sizeof(Kbs_Ustring)); 45 | if (string == NULL) { 46 | KBS_ERROR(KBS_ERROR_MALLOC); 47 | } 48 | string->strLength = seqSize; 49 | string->str = (Kbs_Uchar *) malloc(seqSize+1 * sizeof(Kbs_Uchar)); 50 | if (string->str == NULL) { 51 | KBS_ERROR(KBS_ERROR_MALLOC); 52 | } 53 | kbsRandomize(); 54 | Kbs_Ulong i; 55 | for (i=0; istr[i] = kbs_genRand_UChar(alphabetSize); 57 | } 58 | string->str[seqSize] = 0; 59 | return string; 60 | } 61 | 62 | /*--------------------------------------------------------------------------*/ 63 | Kbs_Ustring *kbs_genRandPeriodic_UString(Kbs_Uint alphabetSize, Kbs_Ulong lengthPeriod, Kbs_Ulong strLength) { 64 | if (strLength == 0) { 65 | return NULL; 66 | } 67 | Kbs_Ustring *string; 68 | string = (Kbs_Ustring *) malloc(sizeof(Kbs_Ustring)); 69 | if (string == NULL) { 70 | KBS_ERROR(KBS_ERROR_MALLOC); 71 | } 72 | string->strLength = strLength; 73 | string->str = (Kbs_Uchar *) malloc((strLength + 1) * sizeof(Kbs_Uchar)); 74 | if (string->str == NULL) { 75 | KBS_ERROR(KBS_ERROR_MALLOC); 76 | } 77 | kbsRandomize(); 78 | if (lengthPeriod > strLength || lengthPeriod == 0) { 79 | return kbs_genRand_UString(alphabetSize, strLength); 80 | } 81 | Kbs_Ulong i; 82 | for (i=0; istr[i] = kbs_genRand_UChar(alphabetSize); 84 | } 85 | for (i=lengthPeriod; istr[i] = string->str[i - lengthPeriod]; 87 | } 88 | string->str[strLength] = 0; 89 | return string; 90 | } 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /BPR2/src/Util/kbs_RandomString.h: -------------------------------------------------------------------------------- 1 | #ifndef KBS_RANDOMSTRING_H 2 | #define KBS_RANDOMSTRING_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "kbs_Types.h" 14 | #include "kbs_String.h" 15 | 16 | 17 | /* Prototypes */ 18 | 19 | 20 | Kbs_Uchar kbs_genRand_UChar(Kbs_Uint alphabetSize); 21 | Kbs_Ustring *kbs_genRand_UString(Kbs_Uint alphabetSize, Kbs_Ulong seqSize); 22 | Kbs_Ustring *kbs_genRandPeriodic_UString(Kbs_Uint alphabetSize, Kbs_Ulong lengthPeriod, Kbs_Ulong strLength); 23 | 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /BPR2/src/Util/kbs_RandomStringFile.c: -------------------------------------------------------------------------------- 1 | /* Creating of Test Data Version 0.1 from 12.11.01 12:15 Uhr */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "kbs_RandomStringFile.h" 13 | #include "kbs_RandomString.h" 14 | #include "randomlib.h" 15 | #include "kbs_Types.h" 16 | #include "kbs_Error.h" 17 | 18 | 19 | /*--------------------------------------------------------------------------*/ 20 | void kbs_make_RandomStringFile(Kbs_Char *testdatafile) { 21 | filesize = 0; 22 | file_ptr = fopen(testdatafile, "wb+"); 23 | if (file_ptr == NULL) { 24 | printf("kbs_make_RandomSeqFile - %s failed\n", testdatafile); 25 | exit(EXIT_FAILURE); 26 | } 27 | } 28 | 29 | /*--------------------------------------------------------------------------*/ 30 | void kbs_open_RandomStringFile(Kbs_Char *testdatafile) { 31 | file_ptr = fopen(testdatafile, "rb"); 32 | if (file_ptr == NULL) { 33 | printf("kbs_open_RandomSeqFile - %s failed\n", testdatafile); 34 | exit(EXIT_FAILURE); 35 | } 36 | } 37 | 38 | /*--------------------------------------------------------------------------*/ 39 | void kbs_close_RandomStringFile(void){ 40 | fclose(file_ptr); 41 | } 42 | 43 | 44 | /*--------------------------------------------------------------------------*/ 45 | void kbs_generate_RandomUStringFile(Kbs_Uint alphabetSize, Kbs_Char *filename, Kbs_Ulong seqSize) { 46 | Kbs_Ulong i; 47 | Kbs_Uchar *seq; 48 | kbs_make_RandomStringFile(filename); 49 | seq = (Kbs_Uchar *) malloc(seqSize * sizeof(Kbs_Uchar)); 50 | kbsRandomize(); 51 | for (i=0; i 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "kbs_Types.h" 14 | 15 | /* Constant Definitions */ 16 | 17 | #define RAND_ALPHABET_SIZE 4 18 | #define RAND_NUMBERFILES 10 19 | #define RAND_ATOMAR_SIZE 10000 20 | 21 | 22 | /* directories */ 23 | #define RANDOM_SEQ_DIR "./RandomStrings\0" 24 | 25 | 26 | 27 | 28 | 29 | /*--------------------------------------------------------------------------*/ 30 | /* Definition of the Testdatafilename */ 31 | #define bool int /* Definition of the datatype boolean */ 32 | #define true 1 /* Definition of true value */ 33 | #define false 0 /* Definition of false value */ 34 | 35 | 36 | 37 | 38 | /* Global Variables */ 39 | 40 | FILE *file_ptr; 41 | unsigned long filesize; 42 | 43 | /* Prototypes */ 44 | 45 | void kbs_make_RandomStringFile(Kbs_Char *testdatafile); 46 | void kbs_open_RandomStringFile(Kbs_Char *testdatafile); 47 | void kbs_close_RandomStringFile(void); 48 | /*static void kbs_appendUChar_RandomStringFile(Kbs_Uchar letter, Kbs_Int address );*/ 49 | 50 | void kbs_generate_RandomUStringFile(Kbs_Uint alphabetSize, Kbs_Char *filename, Kbs_Ulong seqSize); 51 | 52 | Kbs_Char *kbs_generate_RandomStringFileName(Kbs_Uint alphabetSize, Kbs_Ulong fileSize); 53 | 54 | 55 | Kbs_Char *kbs_generate_PeriodicStrFileName(const Kbs_Uint alphaSize, const Kbs_Ulong periodLen, const Kbs_Ulong strLen); 56 | 57 | /*--------------------------------------------------------------------------*/ 58 | void kbs_generate_NURandomFiles(Kbs_Uint n, Kbs_Ulong atomarSize, Kbs_Uint alphabetSize); 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /BPR2/src/config.h: -------------------------------------------------------------------------------- 1 | /* src/config.h. Generated from config.h.in by configure. */ 2 | /* src/config.h.in. Generated from configure.in by autoheader. */ 3 | 4 | /* Define to 1 if you have the header file. */ 5 | #define HAVE_DLFCN_H 1 6 | 7 | /* Define to 1 if you have the header file. */ 8 | #define HAVE_INTTYPES_H 1 9 | 10 | /* Define to 1 if you have the header file. */ 11 | #define HAVE_MEMORY_H 1 12 | 13 | /* Define to 1 if you have the header file. */ 14 | #define HAVE_STDINT_H 1 15 | 16 | /* Define to 1 if you have the header file. */ 17 | #define HAVE_STDLIB_H 1 18 | 19 | /* Define to 1 if you have the header file. */ 20 | #define HAVE_STRINGS_H 1 21 | 22 | /* Define to 1 if you have the header file. */ 23 | #define HAVE_STRING_H 1 24 | 25 | /* Define to 1 if you have the header file. */ 26 | #define HAVE_SYS_STAT_H 1 27 | 28 | /* Define to 1 if you have the header file. */ 29 | #define HAVE_SYS_TYPES_H 1 30 | 31 | /* Define to 1 if you have the header file. */ 32 | #define HAVE_UNISTD_H 1 33 | 34 | /* Define to the sub-directory where libtool stores uninstalled libraries. */ 35 | #define LT_OBJDIR ".libs/" 36 | 37 | /* Name of package */ 38 | #define PACKAGE "kbs-bpr" 39 | 40 | /* Define to the address where bug reports for this package should be sent. */ 41 | #define PACKAGE_BUGREPORT "Klaus-Bernd.Schuermann@CeBiTec.Uni-Bielefeld.DE" 42 | 43 | /* Define to the full name of this package. */ 44 | #define PACKAGE_NAME "KBS bucket-pointer refinement" 45 | 46 | /* Define to the full name and version of this package. */ 47 | #define PACKAGE_STRING "KBS bucket-pointer refinement 2.0.0" 48 | 49 | /* Define to the one symbol short name of this package. */ 50 | #define PACKAGE_TARNAME "kbs-bpr" 51 | 52 | /* Define to the home page for this package. */ 53 | #define PACKAGE_URL "" 54 | 55 | /* Define to the version of this package. */ 56 | #define PACKAGE_VERSION "2.0.0" 57 | 58 | /* Define to 1 if you have the ANSI C header files. */ 59 | #define STDC_HEADERS 1 60 | 61 | /* Version number of package */ 62 | #define VERSION "2.0.0" 63 | -------------------------------------------------------------------------------- /BPR2/src/config.h.in: -------------------------------------------------------------------------------- 1 | /* src/config.h.in. Generated from configure.in by autoheader. */ 2 | 3 | /* Define to 1 if you have the header file. */ 4 | #undef HAVE_DLFCN_H 5 | 6 | /* Define to 1 if you have the header file. */ 7 | #undef HAVE_INTTYPES_H 8 | 9 | /* Define to 1 if you have the header file. */ 10 | #undef HAVE_MEMORY_H 11 | 12 | /* Define to 1 if you have the header file. */ 13 | #undef HAVE_STDINT_H 14 | 15 | /* Define to 1 if you have the header file. */ 16 | #undef HAVE_STDLIB_H 17 | 18 | /* Define to 1 if you have the header file. */ 19 | #undef HAVE_STRINGS_H 20 | 21 | /* Define to 1 if you have the header file. */ 22 | #undef HAVE_STRING_H 23 | 24 | /* Define to 1 if you have the header file. */ 25 | #undef HAVE_SYS_STAT_H 26 | 27 | /* Define to 1 if you have the header file. */ 28 | #undef HAVE_SYS_TYPES_H 29 | 30 | /* Define to 1 if you have the header file. */ 31 | #undef HAVE_UNISTD_H 32 | 33 | /* Define to the sub-directory where libtool stores uninstalled libraries. */ 34 | #undef LT_OBJDIR 35 | 36 | /* Name of package */ 37 | #undef PACKAGE 38 | 39 | /* Define to the address where bug reports for this package should be sent. */ 40 | #undef PACKAGE_BUGREPORT 41 | 42 | /* Define to the full name of this package. */ 43 | #undef PACKAGE_NAME 44 | 45 | /* Define to the full name and version of this package. */ 46 | #undef PACKAGE_STRING 47 | 48 | /* Define to the one symbol short name of this package. */ 49 | #undef PACKAGE_TARNAME 50 | 51 | /* Define to the home page for this package. */ 52 | #undef PACKAGE_URL 53 | 54 | /* Define to the version of this package. */ 55 | #undef PACKAGE_VERSION 56 | 57 | /* Define to 1 if you have the ANSI C header files. */ 58 | #undef STDC_HEADERS 59 | 60 | /* Version number of package */ 61 | #undef VERSION 62 | -------------------------------------------------------------------------------- /BPR2/src/stamp-h1: -------------------------------------------------------------------------------- 1 | timestamp for src/config.h 2 | -------------------------------------------------------------------------------- /DC3/drittel.C: -------------------------------------------------------------------------------- 1 | inline bool leq(int a1, int a2, int b1, int b2) { // lexic. order for pairs 2 | return(a1 < b1 || a1 == b1 && a2 <= b2); 3 | } // and triples 4 | inline bool leq(int a1, int a2, int a3, int b1, int b2, int b3) { 5 | return(a1 < b1 || a1 == b1 && leq(a2,a3, b2,b3)); 6 | } 7 | // stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r 8 | static void radixPass(int* a, int* b, int* r, int n, int K) 9 | { // count occurrences 10 | int* c = new int[K + 1]; // counter array 11 | for (int i = 0; i <= K; i++) c[i] = 0; // reset counters 12 | for (int i = 0; i < n; i++) c[r[a[i]]]++; // count occurences 13 | for (int i = 0, sum = 0; i <= K; i++) { // exclusive prefix sums 14 | int t = c[i]; c[i] = sum; sum += t; 15 | } 16 | for (int i = 0; i < n; i++) b[c[r[a[i]]]++] = a[i]; // sort 17 | delete [] c; 18 | } 19 | 20 | // find the suffix array SA of s[0..n-1] in {1..K}^n 21 | // require s[n]=s[n+1]=s[n+2]=0, n>=2 22 | void suffixArray(int* s, int* SA, int n, int K) { 23 | int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2; 24 | int* s12 = new int[n02 + 3]; s12[n02]= s12[n02+1]= s12[n02+2]=0; 25 | int* SA12 = new int[n02 + 3]; SA12[n02]=SA12[n02+1]=SA12[n02+2]=0; 26 | int* s0 = new int[n0]; 27 | int* SA0 = new int[n0]; 28 | 29 | // generate positions of mod 1 and mod 2 suffixes 30 | // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1 31 | for (int i=0, j=0; i < n+(n0-n1); i++) if (i%3 != 0) s12[j++] = i; 32 | 33 | // lsb radix sort the mod 1 and mod 2 triples 34 | radixPass(s12 , SA12, s+2, n02, K); 35 | radixPass(SA12, s12 , s+1, n02, K); 36 | radixPass(s12 , SA12, s , n02, K); 37 | 38 | // find lexicographic names of triples 39 | int name = 0, c0 = -1, c1 = -1, c2 = -1; 40 | for (int i = 0; i < n02; i++) { 41 | if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) { 42 | name++; c0 = s[SA12[i]]; c1 = s[SA12[i]+1]; c2 = s[SA12[i]+2]; 43 | } 44 | if (SA12[i] % 3 == 1) { s12[SA12[i]/3] = name; } // left half 45 | else { s12[SA12[i]/3 + n0] = name; } // right half 46 | } 47 | 48 | // recurse if names are not yet unique 49 | if (name < n02) { 50 | suffixArray(s12, SA12, n02, name); 51 | // store unique names in s12 using the suffix array 52 | for (int i = 0; i < n02; i++) s12[SA12[i]] = i + 1; 53 | } else // generate the suffix array of s12 directly 54 | for (int i = 0; i < n02; i++) SA12[s12[i] - 1] = i; 55 | 56 | // stably sort the mod 0 suffixes from SA12 by their first character 57 | for (int i=0, j=0; i < n02; i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i]; 58 | radixPass(s0, SA0, s, n0, K); 59 | 60 | // merge sorted SA0 suffixes and sorted SA12 suffixes 61 | for (int p=0, t=n0-n1, k=0; k < n; k++) { 62 | #define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2) 63 | int i = GetI(); // pos of current offset 12 suffix 64 | int j = SA0[p]; // pos of current offset 0 suffix 65 | if (SA12[t] < n0 ? 66 | leq(s[i], s12[SA12[t] + n0], s[j], s12[j/3]) : 67 | leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0])) 68 | { // suffix from SA12 is smaller 69 | SA[k] = i; t++; 70 | if (t == n02) { // done --- only SA0 suffixes left 71 | for (k++; p < n0; p++, k++) SA[k] = SA0[p]; 72 | } 73 | } else { 74 | SA[k] = j; p++; 75 | if (p == n0) { // done --- only SA12 suffixes left 76 | for (k++; t < n02; t++, k++) SA[k] = GetI(); 77 | } 78 | } 79 | } 80 | delete [] s12; delete [] SA12; delete [] SA0; delete [] s0; 81 | } 82 | -------------------------------------------------------------------------------- /DC3/tryall.C: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace std; 5 | #define DEBUGLEVEL 1 6 | #include "util.h" 7 | 8 | void suffixArray(int* s, int* SA, int n, int K); 9 | 10 | void printV(int* a, int n, char *comment) { 11 | cout << comment << ":"; 12 | for (int i = 0; i < n; i++) { 13 | cout << a[i] << " " ; 14 | } 15 | cout << endl; 16 | } 17 | 18 | bool isPermutation(int *SA, int n) { 19 | bool *seen = new bool[n]; 20 | for (int i = 0; i < n; i++) seen[i] = 0; 21 | for (int i = 0; i < n; i++) seen[SA[i]] = 1; 22 | for (int i = 0; i < n; i++) if (!seen[i]) return 0; 23 | return 1; 24 | } 25 | 26 | bool sleq(int *s1, int *s2) { 27 | if (s1[0] < s2[0]) return 1; 28 | if (s1[0] > s2[0]) return 0; 29 | return sleq(s1+1, s2+1); 30 | } 31 | 32 | // is SA a sorted suffix array for s? 33 | bool isSorted(int *SA, int *s, int n) { 34 | for (int i = 0; i < n-1; i++) { 35 | if (!sleq(s+SA[i], s+SA[i+1])) return 0; 36 | } 37 | return 1; 38 | } 39 | 40 | // try all inbuts from {1,..,b}^n for 1 <= n <= nmax 41 | int main(int argc, char **argv) { 42 | //int n = 13; 43 | //int s1[] = {2,1,4,4,1,4,4,1,3,3,1,0,0,0}; // mississippi 44 | //int s2[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0}; 45 | //int n = 8; 46 | //int s1[] = {2,1,3,1,3,1,0,0,0}; // banana 47 | //int s2[] = {0,0,0,0,0,0,0,0,0}; 48 | int nmax = atoi(argv[1]); 49 | int b = atoi(argv[2]); 50 | // try all strings from (1..b)^n 51 | for (int n = 2; n <= nmax; n++) { 52 | cout << n << endl; 53 | int N = int(pow(double(b),n) + 0.5); 54 | int* s = new int[n+3]; 55 | int* SA = new int[n+3]; 56 | for (int i = 0; i < n; i++) s[i] = SA[i] = 1; 57 | s[n] = s[n+1] = s[n+2] = SA[n] = SA[n+1] = SA[n+2] = 0; 58 | for (int j =0; j < N; j++) { 59 | Debug1(printV(s, n, "s")); 60 | suffixArray(s, SA, n, b); 61 | Assert0(s[n] == 0); 62 | Assert0(s[n+1] == 0); 63 | Assert0(SA[n] == 0); 64 | Assert0(SA[n+1] == 0); 65 | Assert0(isPermutation(SA, n)); 66 | Assert0(isSorted(SA, s, n)); 67 | Debug1(printV(SA, n, "SA")); 68 | 69 | // generate next s 70 | int i; 71 | for (i = 0; s[i] == b; i++) s[i] = 1; 72 | s[i]++; 73 | } 74 | delete [] s; 75 | delete [] SA; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /DC3/util.h: -------------------------------------------------------------------------------- 1 | // this files contains all the application independent little 2 | // functions and macros used for the optimizer. 3 | // In particular Peters debug macros and Dags stuff 4 | // from dbasic.h cdefs, random,... 5 | 6 | //////////////// stuff originally from debug.h /////////////////////////////// 7 | // (c) 1997 Peter Sanders 8 | // some little utilities for debugging adapted 9 | // to the paros conventions 10 | 11 | 12 | #ifndef UTIL 13 | #define UTIL 14 | 15 | // default debug level. will be overidden e.g. if debug.h is included 16 | #ifndef DEBUGLEVEL 17 | #define DEBUGLEVEL 3 18 | #endif 19 | 20 | #if DEBUGLEVEL >= 0 21 | #define Debug0(A) A 22 | #else 23 | #define Debug0(A) 24 | #endif 25 | #if DEBUGLEVEL >= 1 26 | #define Debug1(A) A 27 | #else 28 | #define Debug1(A) 29 | #endif 30 | #if DEBUGLEVEL >= 2 31 | #define Debug2(A) A 32 | #else 33 | #define Debug2(A) 34 | #endif 35 | #if DEBUGLEVEL >= 3 36 | #define Debug3(A) A 37 | #else 38 | #define Debug3(A) 39 | #endif 40 | #if DEBUGLEVEL >= 4 41 | #define Debug4(A) A 42 | #else 43 | #define Debug4(A) 44 | #endif 45 | #if DEBUGLEVEL >= 5 46 | #define Debug5(A) A 47 | #else 48 | #define Debug5(A) 49 | #endif 50 | #if DEBUGLEVEL >= 6 51 | #define Debug6(A) A 52 | #else 53 | #define Debug6(A) 54 | #endif 55 | 56 | #define Assert(c) if(!(c))\ 57 | {cout << "\nAssertion violation " << __FILE__ << ":" << __LINE__ << endl;} 58 | #define Assert0(C) Debug0(Assert(C)) 59 | #define Assert1(C) Debug1(Assert(C)) 60 | #define Assert2(C) Debug2(Assert(C)) 61 | #define Assert3(C) Debug3(Assert(C)) 62 | #define Assert4(C) Debug4(Assert(C)) 63 | #define Assert5(C) Debug5(Assert(C)) 64 | 65 | #define Error(s) {cout << "\nError:" << s << " " << __FILE__ << ":" << __LINE__ << endl;} 66 | 67 | ////////////// min, max etc. ////////////////////////////////////// 68 | 69 | #ifndef Max 70 | #define Max(x,y) ((x)>=(y)?(x):(y)) 71 | #endif 72 | 73 | #ifndef Min 74 | #define Min(x,y) ((x)<=(y)?(x):(y)) 75 | #endif 76 | 77 | #ifndef Abs 78 | #define Abs(x) ((x) < 0 ? -(x) : (x)) 79 | #endif 80 | 81 | #ifndef PI 82 | #define PI 3.1415927 83 | #endif 84 | 85 | // is this the right definition of limit? 86 | inline double limit(double x, double bound) 87 | { 88 | if (x > bound) { return bound; } 89 | else if (x < -bound) { return -bound; } 90 | else return x; 91 | } 92 | 93 | /////////////////////// timing ///////////////////// 94 | #include 95 | 96 | 97 | // elapsed CPU time see also /usr/include/sys/time.h 98 | inline double cpuTime() 99 | { //struct timespec tp; 100 | 101 | return clock() * 1e-6; 102 | // clock_gettime(CLOCK_VIRTUAL, &tp); 103 | // return tp.tv_sec + tp.tv_nsec * 1e-9; 104 | } 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SACA Benchmark 2 | This repository contains the following C/C++ implementations of suffix array construction algorithms (SACAs). 3 | 4 | **There now exists the project [*SACABench*](https://github.com/sacabench/sacabench) that contains most of these algorithms and allows to easliy *build*, *run*, and *benchmark* them.** 5 | 6 | | Name | Publication | Author(s) | Year | License | 7 | | ------------- | ------------- | --------- | :--: | :-----: | 8 | | [BRP2](BPR2) |[An Incomplex Algorithm for Fast Suffix Array Construction](https://www.techfak.uni-bielefeld.de/~stoye/dropbox/alenex2005final.pdf) | K.-B. Schürmann and J. Stoye | 2005 | GPL v2 | 9 | | [DC3](DC3) | [Simple Linear Work Suffix Array Construction](https://www.cs.helsinki.fi/u/tpkarkka/publications/icalp03.pdf) |J. Kärkkäinen and P. Sanders | 2003| n/a | 10 | | [deep-shallow](deep-shallow) | [Engineering a Lightweight Suffix Array Construction Algorithm](https://link.springer.com/article/10.1007%2Fs00453-004-1094-1?LI=true)| G. Manzini and P. Ferragina | 2002 | GPL v2 | 11 | | [gsaca](gsaca) |[Linear-time Suffix Sorting – A New Approach for Suffix Array Construction](http://drops.dagstuhl.de/opus/volltexte/2016/6069/pdf/LIPIcs-CPM-2016-23.pdf) | U. Baier| 2016 | MIT | 12 | | [libdivsufsort](libdivsufsort) | - | Y. Mori | 2011 | MIT | 13 | | [qsufsort](qsufsort) | [Faster Suffix Sorting](https://pdfs.semanticscholar.org/7a84/e0577e51c42aabbf572b7a344f64738a6ea9.pdf) | N. J. Larsson and K. Sadakane | 1999 | own | 14 | | [sa-ds](sa-ds) | [Two Efficient Algorithms for Linear Time Suffix Array Construction](https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/ge-nong/Two%20Efficient%20Algorithms%20for%20Linear%20Time%20Suffix%20Array%20Construction.pdf) | G. Nong | 2012 | n/a | 15 | | [sa-is](sa-is) | [Two Efficient Algorithms for Linear Time Suffix Array Construction](https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/ge-nong/Two%20Efficient%20Algorithms%20for%20Linear%20Time%20Suffix%20Array%20Construction.pdf) | G. Nong | 2011 | n/a | 16 | | [SACA-K](saca-k) |[Practical Linear-Time O(1)-Workspace Suffix Sorting for Constant Alphabets](https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/ge-nong/saca-k-tois.pdf)|G. Nong|2011/13| n/a | 17 | | [sais-lite](sais-lite) | - | Y. Mori | 2010 | MIT | 18 | | [gsa-is](gsa-is) | [Inducing Enhanced Suffix Arrays for String Collections](http://www.sciencedirect.com/science/article/pii/S0304397517302621) | F. A. Louza, S. Gog and G. P. Telles | 2017 | MIT | 19 | | [msufsort](msufsort) | [(version 4 is currently in development)](https://github.com/michaelmaniscalco/msufsort) | M. Maniscalco | 2017 | MIT | 20 | 21 | 22 | Please note that all those files are licensed under their respective license (if available). 23 | I merely collected them to create a benchmark. 24 | 25 | If you know of any other *publicly* available SACA please send me a [mail](mailto:florian.kurpicz@tu-dortmund.de). 26 | -------------------------------------------------------------------------------- /deep-shallow/.gitignore: -------------------------------------------------------------------------------- 1 | bwt 2 | bwtlcp.a 3 | ds 4 | ds_ssort.a 5 | testlcp 6 | unbwt 7 | -------------------------------------------------------------------------------- /deep-shallow/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/sh 2 | 3 | CC=gcc 4 | 5 | #these are for testing 6 | #CFLAGS = -g -W -Wall -Winline -O2 7 | 8 | #these are for maximum speed 9 | CFLAGS=-g -O3 -fomit-frame-pointer -W -Wall -Winline -m32 \ 10 | -DDEBUG=0 -DNDEBUG=1 11 | 12 | 13 | .PHONY: all 14 | all : ds unbwt bwt testlcp 15 | 16 | 17 | # deep-shallow suffix sort algorithm 18 | ds: suftest2.o ds_ssort.a 19 | $(CC) $(CFLAGS) -o ds suftest2.o ds_ssort.a 20 | 21 | # archive containing the ds sort algorithm 22 | ds_ssort.a: globals.o ds.o shallow.o deep2.o helped.o blind2.o 23 | ar rc ds_ssort.a globals.o ds.o shallow.o deep2.o helped.o blind2.o 24 | 25 | # archive containing the bwt and lcp auxiliary routines 26 | bwtlcp.a: bwt_aux.o lcp_aux.o 27 | ar rc bwtlcp.a bwt_aux.o lcp_aux.o 28 | 29 | # compare several linear time lcp algorithms 30 | testlcp: testlcp.c bwtlcp.a ds_ssort.a 31 | $(CC) $(CFLAGS) -o testlcp testlcp.c bwtlcp.a ds_ssort.a 32 | 33 | # inverse bwt 34 | unbwt: unbwt.c 35 | $(CC) $(CFLAGS) -o unbwt unbwt.c 36 | 37 | # bwt using ds_ssort 38 | bwt: bwt.c ds_ssort.a 39 | $(CC) $(CFLAGS) -o bwt bwt.c ds_ssort.a 40 | 41 | # pattern rule for all objects files 42 | %.o: %.c *.h 43 | $(CC) -c $(CFLAGS) $< -o $@ 44 | 45 | clean: 46 | rm -f *.o *.a 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /deep-shallow/bwt_aux.h: -------------------------------------------------------------------------------- 1 | #define _BW_ALPHA_SIZE 256 2 | 3 | typedef unsigned char uchar; 4 | typedef unsigned char uint8; 5 | typedef int int32; 6 | 7 | 8 | // ---- struct containing the (uncompressed) bwt 9 | typedef struct { 10 | uchar *bwt; 11 | int size; 12 | int eof_pos; 13 | } bwt_data; 14 | 15 | 16 | // prototypes of bwt procedures defined in bwtlcp.a 17 | void _bw_sa2bwt(uchar *t, int32 n, int32 *sa, bwt_data *b); 18 | 19 | int32 _bw_bwt2ranknext(bwt_data *b, int32* occ, int32 *rank_next); 20 | int32 _bw_sa2ranknext(uchar *t,int32 n,int32 *sa,int32 *occ,int32 *rank_next); 21 | void _bw_ranknext2t(int32 *rank_next, int32 r0, bwt_data *b, uchar *t); 22 | void _bw_ranknext2sa(int32 *rank_next, int32 r0, int32 *sa); 23 | 24 | int32 _bw_bwt2rankprev(bwt_data *b, int32* occ, int32 *rank_prev); 25 | int32 _bw_sa2rankprev(uchar *t,int32 n,int32 *sa,int32 *occ,int32 *rank_prev); 26 | void _bw_rankprev2t(int32 *rank_prev, int32 rn1, bwt_data *b, uchar *t); 27 | void _bw_rankprev2sa(int32 *rank_prev, int32 n, int32 rn1, int32 *sa); 28 | -------------------------------------------------------------------------------- /deep-shallow/common.h: -------------------------------------------------------------------------------- 1 | /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2 | global defintion for the ds suffix-sort algorithm 3 | Giovanni Manzini 4 | 2-apr 2001 5 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> */ 6 | /* ----------- use assertion if DEBUG!=0 ------------- */ 7 | #ifndef DEBUG 8 | #define DEBUG 1 /* set DEBUG to 0 to remove assertions and extra checks */ 9 | #endif 10 | #if !DEBUG 11 | #define NDEBUG 1 /* do not compile assertions */ 12 | #endif 13 | #include 14 | #include 15 | 16 | /* ---------- types and costants ----------- */ 17 | typedef int32_t Int32; 18 | typedef uint32_t UInt32; 19 | typedef uint16_t UInt16; 20 | typedef char Char; 21 | typedef unsigned char UChar; 22 | typedef unsigned char Bool; 23 | typedef uint64_t UInt64; 24 | #define True ((Bool)1) 25 | #define False ((Bool)0) 26 | #define Cmp_overshoot 16 27 | #define Max_thresh 30 28 | 29 | 30 | #ifndef min 31 | #define min(a, b) ((a)<=(b) ? (a) : (b)) 32 | #endif 33 | 34 | #ifndef max 35 | #define max(a, b) ((a)>=(b) ? (a) : (b)) 36 | #endif 37 | 38 | 39 | #define MIN(a, b) ((a)<=(b) ? (a) : (b)) 40 | #define MAX(a, b) ((a)>=(b) ? (a) : (b)) 41 | 42 | 43 | // constant and macro for marking groups 44 | #define SETMASK (1 << 30) 45 | #define CLEARMASK (~(SETMASK)) 46 | #define IS_SORTED_BUCKET(sb) (ftab[sb] & SETMASK) 47 | #define BUCKET_FIRST(sb) (ftab[sb]&CLEARMASK) 48 | #define BUCKET_LAST(sb) ((ftab[sb+1]&CLEARMASK)-1) 49 | #define BUCKET_SIZE(sb) ((ftab[sb+1]&CLEARMASK)-(ftab[sb]&CLEARMASK)) 50 | 51 | int scmp3(unsigned char *p, unsigned char *q, int *l, int maxl); 52 | void pretty_putchar(int c); 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /deep-shallow/ds_ssort.h: -------------------------------------------------------------------------------- 1 | /* >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2 | Prototypes for the Deep Shallow Suffix Sort routines 3 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> */ 4 | void ds_ssort(unsigned char *t, int *sa, int n); 5 | int init_ds_ssort(int adist, int bs_ratio); 6 | -------------------------------------------------------------------------------- /deep-shallow/lcp_aux.h: -------------------------------------------------------------------------------- 1 | 2 | // prototypes of the lcp construction procedures defined in 3 | // bwtlcp.a (source files lcp_aux.c lcp5_aux.c) 4 | // before including this file you must first include bwt_aux.h 5 | 6 | int *_lcp_sa2lcp_13n(uint8 *t, int n, int *sa); 7 | int *_lcp_sa2lcp_9125n(uint8 *t, int n, int *sa); 8 | int *_lcp_sa2lcp_9n(uint8 *t, int n, int *sa, int *occ); 9 | int _lcp_sa2lcp_6n(uint8 *t, bwt_data *b, int *sa, int *occ); 10 | int _lcp_sa2lcp_5125n(uint8 *t, int n, int *sa, int *occ); 11 | int _lcp_sa2lcp_5125nme(uint8 *t, int n, int *sa, int *occ); 12 | 13 | 14 | -------------------------------------------------------------------------------- /gsa-is/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | experiments/gsais 3 | -------------------------------------------------------------------------------- /gsa-is/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Felipe A. Louza, Simon Gog, Guilherme P. Telles 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /gsa-is/experiments/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CFLAGS += -Wall 3 | #CFLAGS += -g -O0 4 | CFLAGS += -D_FILE_OFFSET_BITS=64 -m64 -O3 -fomit-frame-pointer -Wno-char-subscripts 5 | 6 | LFLAGS = -lm -lrt -ldl 7 | 8 | DIR = dataset/ 9 | INPUT = input-10000.txt 10 | 11 | ## 12 | 13 | K = 10000 14 | MODE = 6 15 | LCP_COMPUTE = 0 16 | DA_COMPUTE = 0 17 | BWT_COMPUTE = 0 18 | CHECK = 0 19 | OUTPUT = 0 20 | 21 | ## 22 | 23 | LIBOBJ = \ 24 | lib/utils.o\ 25 | lib/file.o\ 26 | lib/suffix_array.o\ 27 | lib/lcp_array.o\ 28 | lib/document_array.o\ 29 | ../gsais.o\ 30 | ../gsacak.o 31 | # external/malloc_count/malloc_count.o 32 | 33 | ## 34 | DEBUG = 0 35 | STDERR = 0 36 | M64 = 0 37 | ## 38 | 39 | DEFINES = -DDEBUG=$(DEBUG) -DSTDERR=$(STDERR) -DM64=$(M64) 40 | 41 | CFLAGS += $(DEFINES) 42 | 43 | 44 | all: compile 45 | 46 | clean: 47 | \rm lib/*.o ../*.o external/*.o gsais -f 48 | 49 | remove: 50 | \rm $(DIR)*.str $(DIR)*.sa $(DIR)*.lcp $(DIR)*.da $(DIR)*.da_bit -f 51 | 52 | #lib: lib/file.o lib/utils.o 53 | # $(CC) $(DEFINES) -c lib/utils.c -o lib/utils.o 54 | 55 | compile: main.c ${LIBOBJ} 56 | $(CC) $(CFLAGS) $(LFLAGS) -o gsais main.c ${LIBOBJ} 57 | 58 | run: 59 | ./gsais $(DIR) $(INPUT) $(K) $(MODE) $(LCP_COMPUTE) $(DA_COMPUTE) $(BWT_COMPUTE) $(CHECK) $(OUTPUT) 60 | 61 | valgrind: 62 | valgrind --tool=memcheck --leak-check=full --track-origins=yes ./gsais $(DIR) $(INPUT) $(K) $(MODE) $(LCP_COMPUTE) $(DA_COMPUTE) $(BWT_COMPUTE) $(CHECK) $(OUTPUT) 63 | -------------------------------------------------------------------------------- /gsa-is/experiments/dataset/input-10000.txt.bwt: -------------------------------------------------------------------------------- 1 | $aannbnnn$$ba#aaaaa -------------------------------------------------------------------------------- /gsa-is/experiments/external/bitvector/Makefile: -------------------------------------------------------------------------------- 1 | CCLIB=-lsdsl -ldivsufsort -ldivsufsort64 -Wno-comment -Wunused-variable 2 | VLIB= -g -O0 3 | 4 | LIB_DIR = ${HOME}/lib 5 | INC_DIR = ${HOME}/include 6 | MY_CXX_FLAGS= -std=c++11 -Wall -Wextra -DNDEBUG $(CODE_COVER) 7 | MY_CXX_OPT_FLAGS= -O3 -ffast-math -funroll-loops -m64 -fomit-frame-pointer -D_FILE_OFFSET_BITS=64 8 | MY_CXX=/usr/bin/c++ 9 | 10 | LFLAGS = -lm -lrt -ldl 11 | 12 | LIBOBJ = ../malloc_count/malloc_count.o\ 13 | lib/utils.o\ 14 | lib/document_array.o 15 | 16 | CXX_FLAGS=$(MY_CXX_FLAGS) $(MY_CXX_OPT_FLAGS) -I$(INC_DIR) -L$(LIB_DIR) $(LFLAGS) 17 | 18 | CLAGS= -DSYMBOLBYTES=1 19 | 20 | #### 21 | DIR = ../../dataset/ 22 | INPUT = input-10000.txt 23 | 24 | K = 10000 25 | MODE = 6 26 | CHECK = 1 27 | OUTPUT = 0 28 | 29 | DEBUG = 0 30 | SDV = 0 31 | DEFINES = -DDEBUG=$(DEBUG) -DSDV=$(SDV) 32 | 33 | #### 34 | 35 | all: compile 36 | 37 | utils: 38 | $(MY_CXX) $(CXX_FLAGS) $(DEFINES) -c lib/utils.cpp $(CCLIB) -o lib/utils.o 39 | 40 | document_array: 41 | $(MY_CXX) $(CXX_FLAGS) $(DEFINES) -c lib/document_array.cpp $(CCLIB) -o lib/document_array.o 42 | 43 | compile: document_array utils main.cpp ${LIBOBJ} 44 | $(MY_CXX) $(CXX_FLAGS) $(LIBOBJ) $(DEFINES) main.cpp $(CCLIB) -o main 45 | 46 | run: 47 | ../../gsais $(DIR) $(INPUT) $(K) 6 0 0 $(CHECK) 1 48 | ./main $(DIR) $(INPUT) $(K) $(CHECK) $(OUTPUT) 49 | 50 | clean: 51 | \rm lib/*.o src/*.o external/*.o main -f 52 | 53 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/bitvector/lib/document_array.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DAH 2 | #define DAH 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "utils.hpp" 9 | 10 | 11 | int document_array_LF(unsigned char* T, int_t* SA, int_t* DA, uint_t n, unsigned int SIGMA, int cs, unsigned char separator, int_t k); 12 | int document_array_LF_int(int_t* T, int_t* SA, int_t* DA, uint_t n, unsigned int SIGMA, int cs, unsigned char separator, int_t k); 13 | 14 | /**/ 15 | 16 | int document_array_check(unsigned char *T, int_t *SA, int_t *DA, uint_t n, int cs, unsigned char separator, int_t k); 17 | 18 | int document_array_check_int(int_t *T, int_t *SA, int_t *DA, uint_t n, int cs, int_t k); 19 | 20 | int document_array_print(unsigned char *T, int_t *SA, int_t *DA, size_t n, int cs); 21 | int document_array_write(int_t *DA, int_t n, char* c_file, const char* ext); 22 | int_t document_array_read(int_t** DA, char* c_file, const char* ext); 23 | 24 | /**/ 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/bitvector/lib/utils.hpp: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifndef M64 17 | #define M64 0 18 | #endif 19 | 20 | #if M64 21 | typedef int64_t int_t; 22 | typedef uint64_t uint_t; 23 | #define PRIdN PRId64 24 | #define U_MAX UINT64_MAX 25 | #define I_MAX INT64_MAX 26 | #define I_MIN INT64_MIN 27 | #else 28 | typedef int32_t int_t; 29 | typedef uint32_t uint_t; 30 | #define PRIdN PRId32 31 | #define U_MAX UINT32_MAX 32 | #define I_MAX INT32_MAX 33 | #define I_MIN INT32_MIN 34 | #endif 35 | 36 | typedef uint32_t int_text; 37 | 38 | 39 | /**********************************************************************/ 40 | 41 | #define swap(a,b) do { typeof(a) aux_a_b = (a); (a) = (b); (b) = aux_a_b; } while (0) 42 | 43 | #define min(a,b) ((a) < (b) ? (a) : (b)) 44 | #define max(a,b) ((a) > (b) ? (a) : (b)) 45 | 46 | void time_start(time_t *t_time, clock_t *c_clock); 47 | double time_stop(time_t t_time, clock_t c_clock); 48 | 49 | void die(const char* where); 50 | void dies(const char* where, char* format, ...); 51 | 52 | /**********************************************************************/ 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/bitvector/lib/utils_32.hpp: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifndef M64 17 | #define M64 0 18 | #endif 19 | 20 | #if M64 21 | typedef int64_t int_t; 22 | typedef uint64_t uint_t; 23 | #define PRIdN PRId64 24 | #define U_MAX UINT64_MAX 25 | #define I_MAX INT64_MAX 26 | #define I_MIN INT64_MIN 27 | #else 28 | typedef int32_t int_t; 29 | typedef uint32_t uint_t; 30 | #define PRIdN PRId32 31 | #define U_MAX UINT32_MAX 32 | #define I_MAX INT32_MAX 33 | #define I_MIN INT32_MIN 34 | #endif 35 | 36 | typedef uint32_t int_text; 37 | 38 | 39 | /**********************************************************************/ 40 | 41 | #define swap(a,b) do { typeof(a) aux_a_b = (a); (a) = (b); (b) = aux_a_b; } while (0) 42 | 43 | #define min(a,b) ((a) < (b) ? (a) : (b)) 44 | #define max(a,b) ((a) > (b) ? (a) : (b)) 45 | 46 | void time_start(time_t *t_time, clock_t *c_clock); 47 | double time_stop(time_t t_time, clock_t c_clock); 48 | 49 | void die(const char* where); 50 | void dies(const char* where, char* format, ...); 51 | 52 | /**********************************************************************/ 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/bitvector/lib/utils_64.hpp: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifndef M64 17 | #define M64 1 18 | #endif 19 | 20 | #if M64 21 | typedef int64_t int_t; 22 | typedef uint64_t uint_t; 23 | #define PRIdN PRId64 24 | #define U_MAX UINT64_MAX 25 | #define I_MAX INT64_MAX 26 | #define I_MIN INT64_MIN 27 | #else 28 | typedef int32_t int_t; 29 | typedef uint32_t uint_t; 30 | #define PRIdN PRId32 31 | #define U_MAX UINT32_MAX 32 | #define I_MAX INT32_MAX 33 | #define I_MIN INT32_MIN 34 | #endif 35 | 36 | typedef uint32_t int_text; 37 | 38 | 39 | /**********************************************************************/ 40 | 41 | #define swap(a,b) do { typeof(a) aux_a_b = (a); (a) = (b); (b) = aux_a_b; } while (0) 42 | 43 | #define min(a,b) ((a) < (b) ? (a) : (b)) 44 | #define max(a,b) ((a) > (b) ? (a) : (b)) 45 | 46 | void time_start(time_t *t_time, clock_t *c_clock); 47 | double time_stop(time_t t_time, clock_t c_clock); 48 | 49 | void die(const char* where); 50 | void dies(const char* where, char* format, ...); 51 | 52 | /**********************************************************************/ 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/malloc_count/malloc_count.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * malloc_count.h 3 | * 4 | * Header containing prototypes of user-callable functions to retrieve run-time 5 | * information about malloc()/free() allocation. 6 | * 7 | ****************************************************************************** 8 | * Copyright (C) 2013 Timo Bingmann 9 | * 10 | * Permission is hereby granted, free of charge, to any person obtaining a copy 11 | * of this software and associated documentation files (the "Software"), to 12 | * deal in the Software without restriction, including without limitation the 13 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 14 | * sell copies of the Software, and to permit persons to whom the Software is 15 | * furnished to do so, subject to the following conditions: 16 | * 17 | * The above copyright notice and this permission notice shall be included in 18 | * all copies or substantial portions of the Software. 19 | * 20 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 26 | * IN THE SOFTWARE. 27 | *****************************************************************************/ 28 | 29 | #ifndef _MALLOC_COUNT_H_ 30 | #define _MALLOC_COUNT_H_ 31 | 32 | #include 33 | 34 | #ifdef __cplusplus 35 | extern "C" { /* for inclusion from C++ */ 36 | #endif 37 | 38 | /* returns the currently allocated amount of memory */ 39 | extern size_t malloc_count_current(void); 40 | 41 | /* returns the current peak memory allocation */ 42 | extern size_t malloc_count_peak(void); 43 | 44 | /* resets the peak memory allocation to current */ 45 | extern void malloc_count_reset_peak(void); 46 | 47 | /* typedef of callback function */ 48 | typedef void (*malloc_count_callback_type)(void* cookie, size_t current); 49 | 50 | /* supply malloc_count with a callback function that is invoked on each change 51 | * of the current allocation. The callback function must not use 52 | * malloc()/realloc()/free() or it will go into an endless recursive loop! */ 53 | extern void malloc_count_set_callback(malloc_count_callback_type cb, 54 | void* cookie); 55 | 56 | /* user function which prints current and peak allocation to stderr */ 57 | extern void malloc_count_print_status(void); 58 | 59 | #ifdef __cplusplus 60 | } /* extern "C" */ 61 | #endif 62 | 63 | #endif /* _MALLOC_COUNT_H_ */ 64 | 65 | /*****************************************************************************/ 66 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/malloc_count/stack_count.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * stack_count.c 3 | * 4 | * Header containing two functions to monitor stack usage of a program. 5 | * 6 | ****************************************************************************** 7 | * Copyright (C) 2013 Timo Bingmann 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy 10 | * of this software and associated documentation files (the "Software"), to 11 | * deal in the Software without restriction, including without limitation the 12 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 13 | * sell copies of the Software, and to permit persons to whom the Software is 14 | * furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included in 17 | * all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 | * IN THE SOFTWARE. 26 | *****************************************************************************/ 27 | 28 | #include "stack_count.h" 29 | 30 | #include 31 | 32 | /* default stack size on Linux is 8 MiB, so fill 75% of it. */ 33 | static const size_t stacksize = 6*1024*1024; 34 | 35 | /* "clear" the stack by writing a sentinel value into it. */ 36 | void* stack_count_clear(void) 37 | { 38 | const size_t asize = stacksize / sizeof(uint32_t); 39 | uint32_t stack[asize]; /* allocated on stack */ 40 | uint32_t* p = stack; 41 | while ( p < stack + asize ) *p++ = 0xDEADC0DEu; 42 | return p; 43 | } 44 | 45 | /* checks the maximum usage of the stack since the last clear call. */ 46 | size_t stack_count_usage(void* lastbase) 47 | { 48 | const size_t asize = stacksize / sizeof(uint32_t); 49 | uint32_t* p = (uint32_t*)lastbase - asize; /* calculate top of last clear */ 50 | while ( *p == 0xDEADC0DEu ) ++p; 51 | return ((uint32_t*)lastbase - p) * sizeof(uint32_t); 52 | } 53 | 54 | /*****************************************************************************/ 55 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/malloc_count/stack_count.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * stack_count.h 3 | * 4 | * Header containing two functions to monitor stack usage of a program. 5 | * 6 | ****************************************************************************** 7 | * Copyright (C) 2013 Timo Bingmann 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy 10 | * of this software and associated documentation files (the "Software"), to 11 | * deal in the Software without restriction, including without limitation the 12 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 13 | * sell copies of the Software, and to permit persons to whom the Software is 14 | * furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included in 17 | * all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 | * IN THE SOFTWARE. 26 | *****************************************************************************/ 27 | 28 | #ifndef _STACK_COUNT_H_ 29 | #define _STACK_COUNT_H_ 30 | 31 | #include 32 | 33 | #ifdef __cplusplus 34 | extern "C" { /* for inclusion from C++ */ 35 | #endif 36 | 37 | /* "clear" the stack by writing a sentinel value into it. */ 38 | extern void* stack_count_clear(void); 39 | 40 | /* checks the maximum usage of the stack since the last clear call. */ 41 | extern size_t stack_count_usage(void* lastbase); 42 | 43 | #ifdef __cplusplus 44 | } /* extern "C" */ 45 | #endif 46 | 47 | #endif /* _STACK_COUNT_H_ */ 48 | 49 | /*****************************************************************************/ 50 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/malloc_count/test-malloc_count/Makefile: -------------------------------------------------------------------------------- 1 | # Simplistic Makefile for malloc_count example 2 | 3 | CC = gcc 4 | CFLAGS = -g -W -Wall -ansi -I.. 5 | LDFLAGS = 6 | LIBS = -ldl 7 | OBJS = test.o ../malloc_count.o ../stack_count.o 8 | 9 | all: test 10 | 11 | %.o: %.c 12 | $(CC) $(CFLAGS) -c -o $@ $< 13 | 14 | test: $(OBJS) 15 | $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) 16 | 17 | clean: 18 | rm -f *.o test 19 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/malloc_count/test-malloc_count/test.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * test-malloc_count/test.c 3 | * 4 | * Small program to test malloc_count hooks and user functions. 5 | * 6 | ****************************************************************************** 7 | * Copyright (C) 2013 Timo Bingmann 8 | * 9 | * Permission is hereby granted, free of charge, to any person obtaining a copy 10 | * of this software and associated documentation files (the "Software"), to 11 | * deal in the Software without restriction, including without limitation the 12 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 13 | * sell copies of the Software, and to permit persons to whom the Software is 14 | * furnished to do so, subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be included in 17 | * all copies or substantial portions of the Software. 18 | * 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 | * IN THE SOFTWARE. 26 | *****************************************************************************/ 27 | 28 | #include "malloc_count.h" 29 | #include "stack_count.h" 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | void function_use_stack() 36 | { 37 | char data[64*1024]; 38 | memset(data, 1, sizeof(data)); 39 | } 40 | 41 | int main() 42 | { 43 | /* allocate and free some memory */ 44 | void* a = malloc(2*1024*1024); 45 | free(a); 46 | 47 | /* query malloc_count for information */ 48 | printf("our peak memory allocation: %lld\n", 49 | (long long)malloc_count_peak()); 50 | 51 | /* use realloc() */ 52 | void* b = malloc(3*1024*1024); 53 | malloc_count_print_status(); 54 | 55 | b = realloc(b, 2*1024*1024); 56 | malloc_count_print_status(); 57 | 58 | b = realloc(b, 4*1024*1024); 59 | malloc_count_print_status(); 60 | 61 | free(b); 62 | 63 | /* some unusual realloc calls */ 64 | void* c = realloc(NULL, 1*1024*1024); 65 | c = realloc(c, 0); 66 | 67 | /* show how stack_count works */ 68 | { 69 | void* base = stack_count_clear(); 70 | function_use_stack(); 71 | printf("maximum stack usage: %lld\n", 72 | (long long)stack_count_usage(base)); 73 | } 74 | 75 | return 0; 76 | } 77 | 78 | /*****************************************************************************/ 79 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/malloc_count/test-memprofile/Makefile: -------------------------------------------------------------------------------- 1 | # Simplistic Makefile for malloc_count example 2 | 3 | CC = gcc 4 | CXX = g++ 5 | CFLAGS = -g -W -Wall -ansi -I.. 6 | CXXFLAGS = -g -W -Wall -ansi -I.. 7 | LDFLAGS = 8 | LIBS = -ldl 9 | OBJS = test.o ../malloc_count.o 10 | 11 | all: test 12 | 13 | %.o: %.c 14 | $(CC) $(CFLAGS) -c -o $@ $< 15 | 16 | %.o: %.cc 17 | $(CXX) $(CXXFLAGS) -c -o $@ $< 18 | 19 | test: $(OBJS) 20 | $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) 21 | 22 | clean: 23 | rm -f *.o test 24 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/malloc_count/test-memprofile/memprofile.gnuplot: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gnuplot 2 | 3 | set terminal pdf size 28cm,18cm linewidth 2.0 4 | set output "memprofile.pdf" 5 | 6 | set key top right 7 | set grid xtics ytics 8 | 9 | set title 'Memory Profile of Test Program' 10 | set xlabel 'Time [s]' 11 | set ylabel 'Memory Usage [MiB]' 12 | 13 | plot \ 14 | 'memprofile.txt' using 1:($2 / 1024/1024) title 'memprofile' with lines 15 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/malloc_count/test-memprofile/memprofile.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kurpicz/saca-bench/dfbaa2e727d274c9c0265451dae97d7c737aba2b/gsa-is/experiments/external/malloc_count/test-memprofile/memprofile.pdf -------------------------------------------------------------------------------- /gsa-is/experiments/external/malloc_count/test-memprofile/test.cc: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * test-memprofile/test.cc 3 | * 4 | * Example to write a memory profile. 5 | * 6 | ****************************************************************************** 7 | * Copyright (C) 2013 Timo Bingmann 8 | * 9 | * This program is free software: you can redistribute it and/or modify it 10 | * under the terms of the GNU General Public License as published by the Free 11 | * Software Foundation, either version 3 of the License, or (at your option) 12 | * any later version. 13 | * 14 | * This program is distributed in the hope that it will be useful, but WITHOUT 15 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 17 | * more details. 18 | * 19 | * You should have received a copy of the GNU General Public License along with 20 | * this program. If not, see . 21 | *****************************************************************************/ 22 | 23 | #include "memprofile.h" 24 | 25 | #include 26 | #include 27 | 28 | int main() 29 | { 30 | MemProfile mp("memprofile.txt", 0.1, 1024); 31 | 32 | { 33 | std::vector v; 34 | for (size_t i = 0; i < 10000000; ++i) 35 | v.push_back(i); 36 | } 37 | 38 | { 39 | std::set v; 40 | for (size_t i = 0; i < 200000; ++i) 41 | v.insert(i); 42 | } 43 | 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /gsa-is/experiments/external/sa-is/main.cpp: -------------------------------------------------------------------------------- 1 | // This is a sample program for showing how to call SAIS. 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | void SAIS(unsigned char *s, int *SA, int n, int K, int cs, int level); 8 | 9 | // uncomment the below line to verify the result SA 10 | //#define _verify_sa 11 | 12 | // output values: 13 | // 1: s1s2 16 | int sless(unsigned char *s1, unsigned char *s2, int n) { 17 | for(int i=0; i s2[i]) return -1; 20 | } 21 | return 0; 22 | } 23 | 24 | // test if SA is sorted for the input string s 25 | bool isSorted(int *SA, unsigned char *s, int n) { 26 | for(int i = 0; i < n-1; i++) { 27 | int d=SA[i]SA[i]) 30 | return 0; 31 | } 32 | return 1; 33 | } 34 | 35 | int main(int argc, char **argv) { 36 | fprintf( stderr, "\nComputing suffix array by SA-IS on " ); 37 | if ( argc > 1 ) { 38 | freopen( argv[ 1 ], "rb", stdin ); 39 | fprintf( stderr, "%s", argv[ 1 ] ); 40 | } else 41 | fprintf( stderr, "stdin" ); 42 | fprintf( stderr, " to " ); 43 | if ( argc > 2 ) { 44 | freopen( argv[ 2 ], "wb", stdout ); 45 | fprintf( stderr, "%s", argv[ 2 ] ); 46 | } else 47 | fprintf( stderr, "stdout" ); 48 | fprintf( stderr, "\n" ); 49 | #if !defined( unix ) 50 | setmode( fileno( stdin ), O_BINARY ); 51 | setmode( fileno( stdout ), O_BINARY ); 52 | #endif 53 | 54 | // Allocate 5 bytes memory for input string and output suffix array 55 | fseek(stdin, 0, SEEK_END); 56 | int n=ftell(stdin); 57 | if(n==0) { 58 | fprintf(stderr, "\nEmpty string, nothing to sort!"); 59 | return 0; 60 | } 61 | n++; // append the virtual sentinel 62 | fprintf(stderr, "Allocating input and output space: %ld bytes = %.2lf MB", 5*n, (double)5*n/1024/1024); 63 | unsigned char *s_ch=new unsigned char[n]; 64 | int *SA = new int[n]; 65 | if(s_ch==NULL || SA==NULL) { 66 | delete [] s_ch; delete [] SA; 67 | fprintf(stderr, "\nInsufficient memory, exit!"); 68 | return 0; 69 | } 70 | 71 | // read the string into buffer. 72 | fprintf(stderr, "\nReading input string..."); 73 | fseek(stdin, 0, SEEK_SET ); 74 | fread((unsigned char *) s_ch, 1, n-1, stdin); 75 | 76 | fprintf(stderr, "\nConstructing the suffix array..."); 77 | 78 | s_ch[n-1]=0; // set the virtual sentinel 79 | 80 | clock_t start, finish; 81 | double duration; 82 | start = clock(); 83 | 84 | SAIS(s_ch, SA, n, 256, sizeof(char), 0); 85 | 86 | finish = clock(); 87 | duration = (double)(finish - start) / CLOCKS_PER_SEC; 88 | 89 | fprintf(stderr, "\nSize: %d bytes, Time: %5.3f seconds\n", n-1, duration); 90 | 91 | #ifdef _verify_sa 92 | fprintf(stderr, "\nVerifying the suffix array..."); 93 | fprintf(stderr, "\nIsSorted: %d", (int)isSorted(SA+1, s_ch, n-1)); 94 | #endif 95 | 96 | /* 97 | fprintf(stderr, "\nOutputing the suffix array..."); 98 | for(unsigned int i=1; i 4 | #include 5 | #include 6 | 7 | void SACA_K(unsigned char *s, unsigned int *SA, unsigned int n, 8 | unsigned int K, unsigned int m, int level); 9 | 10 | // uncomment the below line to verify the result SA 11 | //#define _verify_sa 12 | 13 | // output values: 14 | // 1: s1s2 17 | int sless(unsigned char *s1, unsigned char *s2, unsigned int n) { 18 | for(unsigned int i=0; i s2[i]) return -1; 21 | } 22 | return 0; 23 | } 24 | 25 | // test if SA is sorted for the input string s 26 | bool isSorted(unsigned int *SA, unsigned char *s, unsigned int n) { 27 | for(unsigned int i = 0; i < n-1; i++) { 28 | unsigned int d=SA[i]SA[i]) 31 | return false; 32 | } 33 | return true; 34 | } 35 | 36 | int main(int argc, char **argv) { 37 | fprintf(stderr, "\nComputing suffix array by SACA-K on "); 38 | if (argc>1) { 39 | freopen(argv[1], "rb", stdin); 40 | fprintf(stderr, "%s", argv[1]); 41 | } else 42 | fprintf(stderr, "stdin"); 43 | fprintf(stderr, " to "); 44 | if (argc>2) { 45 | freopen(argv[2], "wb", stdout); 46 | fprintf(stderr, "%s", argv[2]); 47 | } else 48 | fprintf(stderr, "stdout"); 49 | fprintf(stderr, "\n"); 50 | #if !defined(unix) 51 | setmode(fileno(stdin), O_BINARY); 52 | setmode(fileno(stdout), O_BINARY); 53 | #endif 54 | 55 | // Allocate 5 bytes memory for input string and output suffix array 56 | fseek(stdin, 0, SEEK_END); 57 | unsigned int n=ftell(stdin); 58 | if(n==0) { 59 | fprintf(stderr, "\nEmpty string, nothing to sort!"); 60 | return 0; 61 | } 62 | else if(n+1==0) { 63 | fprintf(stderr, "\nCan not sort file of n>=%u bytes!", n); 64 | return 0; 65 | } 66 | 67 | n++; // append the virtual sentinel 68 | fprintf(stderr, "Allocating input and output space: %u bytes = %.2lf MB", 5*n, (double)5*n/1024/1024); 69 | unsigned char *s_ch=new unsigned char[n]; 70 | unsigned int *SA = new unsigned int[n]; 71 | if(s_ch==NULL || SA==NULL) { 72 | delete [] s_ch; delete [] SA; 73 | fprintf(stderr, "\nInsufficient memory, exit!"); 74 | return 0; 75 | } 76 | 77 | // read the string into buffer. 78 | fprintf(stderr, "\nReading input string..."); 79 | fseek(stdin, 0, SEEK_SET ); 80 | fread((unsigned char *) s_ch, 1, n-1, stdin); 81 | // set the virtual sentinel 82 | s_ch[n-1]=0; 83 | 84 | clock_t start, finish; 85 | double duration; 86 | start = clock(); 87 | 88 | fprintf(stderr, "\nConstructing the suffix array..."); 89 | SACA_K(s_ch, SA, n, 256, n, 0); 90 | 91 | finish = clock(); 92 | duration = (double)(finish - start) / CLOCKS_PER_SEC; 93 | 94 | fprintf(stderr, "\nSize: %u bytes, Time: %5.3f seconds\n", n-1, duration); 95 | 96 | #ifdef _verify_sa 97 | fprintf(stderr, "\nVerifying the suffix array..."); 98 | fprintf(stderr, "\nSorted: %d", (int)isSorted(SA+1, s_ch, n-1)); 99 | #endif 100 | 101 | /* 102 | fprintf(stderr, "\nOutputing the suffix array..."); 103 | for(unsigned int i=1; i 5 | #include 6 | #include 7 | 8 | #include "utils.h" 9 | #include "file.h" 10 | 11 | 12 | int document_array_LF(unsigned char* T, int_t* SA, int_t* DA, uint_t n, unsigned int SIGMA, int cs, unsigned char separator, uint_t k); 13 | int document_array_LF_int(int_t* T, int_t* SA, int_t* DA, uint_t n, unsigned int SIGMA, int cs, unsigned char separator, uint_t k); 14 | 15 | /**/ 16 | 17 | int document_array_check(unsigned char *T, int_t *SA, int_t *DA, uint_t n, int cs, unsigned char separator, uint_t k); 18 | 19 | int document_array_check_int(int_t *T, int_t *SA, int_t *DA, uint_t n, int cs, uint_t k); 20 | 21 | int document_array_print(unsigned char *T, int_t *SA, int_t *DA, size_t n, int cs); 22 | int document_array_write(int_t *DA, int_t n, char* c_file, const char* ext); 23 | int_t document_array_read(int_t** DA, char* c_file, const char* ext); 24 | 25 | /**/ 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /gsa-is/experiments/lib/file.h: -------------------------------------------------------------------------------- 1 | #ifndef FILE_H 2 | #define FILE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "utils.h" 11 | 12 | 13 | 14 | /*******************************************************************/ 15 | int file_chdir(char* dir); 16 | 17 | FILE* file_open(char *c_file, const char * c_mode); 18 | int file_close(FILE* f_in); 19 | 20 | size_t file_size(FILE* f_in); 21 | 22 | int file_write(FILE* f_out, uint_t array); 23 | uint_t file_read(FILE* f_in); 24 | 25 | char* file_load(FILE* f_in) ; 26 | char** file_load_multiple(char* c_file, int k, int_t* n) ; 27 | 28 | int file_text_write(unsigned char *str, int_t n, char* c_file, const char* ext); 29 | int file_text_int_write(int_t *str_int, int_t n, char* c_file, const char* ext); 30 | 31 | int_t file_text_read(unsigned char** str, char* c_file, const char* ext); 32 | int_t file_text_int_read(int_t** str, char* c_file, const char* ext); 33 | 34 | int file_bwt_write(unsigned char *str, int_t *SA, int_t n, char* c_file, const char* ext); 35 | int file_bwt_int_write(int_t *str_int, int *SA, int_t n, char* c_file, const char* ext); 36 | 37 | /*******************************************************************/ 38 | 39 | 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /gsa-is/experiments/lib/lcp_array.h: -------------------------------------------------------------------------------- 1 | #ifndef LCPH 2 | #define LCPH 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "utils.h" 9 | #include "file.h" 10 | 11 | 12 | /** 13 | * Constructs the lcp array of a given suffix array. 14 | * @param T[0..n-1] The input string. 15 | * @param SA[0..n-1] The input suffix array. 16 | * @param n The length of the given string. 17 | * @return 0 if no error occurred, -1 otherwise. 18 | */ 19 | int lcp_kasai(char* T, int_t* SA, uint_t n, int_t* LCP); 20 | 21 | /** 22 | * Constructs the lcp array of a given suffix array. 23 | * @param T[0..n-1] The input string. 24 | * @param SA[0..n-1] The input suffix array. 25 | * @param n The length of the given string. 26 | */ 27 | int lcp_PHI(unsigned char* T, int_t* SA, int_t* LCP, uint_t n, int cs, unsigned char separator); 28 | int lcp_PHI_int(int_t* T, int_t* SA, int_t* LCP, uint_t n, int cs); 29 | 30 | int lcp_array_check(unsigned char *T, int_t *SA, int_t *LCP, uint_t n, int cs, unsigned char separator); 31 | int lcp_array_check_phi(unsigned char *T, int_t *SA, int_t *LCP, uint_t n, int cs, unsigned char separator); 32 | 33 | int lcp_array_check_lcp(unsigned char *T, int_t *SA, int_t *LCP, uint_t n, int cs, unsigned char separator); 34 | 35 | int lcp_array_print(unsigned char *T, int_t *SA, int_t *LCP, size_t n, int cs); 36 | 37 | int lcp_array_write(int_t *LCP, int_t n, char* c_file, const char* ext); 38 | int_t lcp_array_read(int_t** LCP, char* c_file, const char* ext); 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /gsa-is/experiments/lib/suffix_array.c: -------------------------------------------------------------------------------- 1 | #include "suffix_array.h" 2 | 3 | #define chr(i) (cs==sizeof(int_t)?((int_t*)T)[i]:((unsigned char *)T)[i]) 4 | 5 | /*******************************************************************/ 6 | 7 | int suffix_array_write(int_t *SA, int_t n, char* c_file, const char* ext){ 8 | 9 | FILE *f_out; 10 | char *c_out = malloc((strlen(c_file)+strlen(ext)+3)*sizeof(char)); 11 | 12 | sprintf(c_out, "%s.%s", c_file, ext); 13 | f_out = file_open(c_out, "wb"); 14 | 15 | fwrite(SA, sizeof(int_t), n, f_out); 16 | 17 | file_close(f_out); 18 | free(c_out); 19 | 20 | return 1; 21 | } 22 | 23 | /*******************************************************************/ 24 | 25 | int_t suffix_array_read(int_t** SA, char* c_file, const char* ext){ 26 | 27 | FILE *f_in; 28 | char *c_in = malloc((strlen(c_file)+strlen(ext)+3)*sizeof(char)); 29 | 30 | sprintf(c_in, "%s.%s", c_file, ext); 31 | f_in = file_open(c_in, "rb"); 32 | 33 | fseek(f_in, 0L, SEEK_END); 34 | size_t size = ftell(f_in); 35 | rewind(f_in); 36 | 37 | int_t n = size/sizeof(int_t); 38 | 39 | *SA = (int_t*) malloc(n*sizeof(int_t)); 40 | fread(*SA, sizeof(int_t), n, f_in); 41 | 42 | file_close(f_in); 43 | free(c_in); 44 | 45 | return n; 46 | } 47 | 48 | /*******************************************************************/ 49 | 50 | int_t suffix_array_print(unsigned char *T, int_t *SA, size_t n, int cs){ 51 | 52 | int_t i; 53 | for(i=0; i chr(b)) 77 | return 0; 78 | else if(chr(a)==sentinel && chr(b)==sentinel){// $_i < $_j iff i < j 79 | if(a < b) 80 | return 1; 81 | else 82 | return 0; 83 | } 84 | a++; 85 | b++; 86 | } 87 | 88 | return 1; 89 | } 90 | 91 | /*******************************************************************/ 92 | 93 | int_t suffix_array_check(unsigned char *T, int_t *SA, size_t n, int cs, unsigned char sentinel){ 94 | 95 | int_t i,j,k; 96 | 97 | for (i = 0; i < n-1; i++) { 98 | size_t min = SA[i+1] 5 | #include 6 | #include 7 | #include 8 | 9 | #include "utils.h" 10 | #include "file.h" 11 | 12 | int suffix_array_write(int_t *SA, int_t n, char* c_file, const char* ext); 13 | int_t suffix_array_read(int_t** SA, char* c_file, const char* ext); 14 | 15 | int_t suffix_array_print(unsigned char *T, int_t *SA, size_t len, int cs); 16 | int_t suffix_array_check(unsigned char *T, int_t *SA, size_t len, int cs, unsigned char sentinel); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /gsa-is/experiments/lib/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifndef UCHAR_SIZE 17 | #define UCHAR_SIZE 256 18 | #endif 19 | 20 | #define END_MARKER '$' 21 | 22 | #ifndef M64 23 | #define M64 0 24 | #endif 25 | 26 | #if M64 27 | typedef int64_t int_t; 28 | typedef uint64_t uint_t; 29 | #define PRIdN PRId64 30 | #define U_MAX UINT64_MAX 31 | #define I_MAX INT64_MAX 32 | #define I_MIN INT64_MIN 33 | #else 34 | typedef int32_t int_t; 35 | typedef uint32_t uint_t; 36 | #define PRIdN PRId32 37 | #define U_MAX UINT32_MAX 38 | #define I_MAX INT32_MAX 39 | #define I_MIN INT32_MIN 40 | #endif 41 | 42 | /**********************************************************************/ 43 | 44 | #define swap(a,b) do { typeof(a) aux_a_b = (a); (a) = (b); (b) = aux_a_b; } while (0) 45 | 46 | #define min(a,b) ((a) < (b) ? (a) : (b)) 47 | #define max(a,b) ((a) > (b) ? (a) : (b)) 48 | 49 | void time_start(time_t *t_time, clock_t *c_clock); 50 | double time_stop(time_t t_time, clock_t c_clock); 51 | 52 | void die(const char* where); 53 | void dies(const char* where, char* format, ...); 54 | 55 | int_t print_int(int_t* A, int_t n); 56 | int_t print_char(char* A, int_t n); 57 | int_t min_range(int_t* A, int_t l, int_t r); 58 | 59 | 60 | /**********************************************************************/ 61 | 62 | int_t* cat_int(unsigned char** R, int k, int_t *n); 63 | unsigned char* cat_char(unsigned char** R, int k, int_t *n); 64 | 65 | // double log2(double i); 66 | void qsort2(void *array, size_t nitems, size_t size, int (*cmp)(void*,void*)); 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /gsa-is/experiments/scripts/downloads.txt: -------------------------------------------------------------------------------- 1 | //donwload pizzachili small-dataset 2 | wget http://pizzachili.dcc.uchile.cl/texts/xml/dblp.xml.50MB.gz 3 | wget http://pizzachili.dcc.uchile.cl/texts/dna/dna.50MB.gz 4 | wget http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.de.txt.gz 5 | wget http://pizzachili.dcc.uchile.cl/texts/nlang/english.50MB.gz 6 | wget http://pizzachili.dcc.uchile.cl/repcorpus/artificial/fib41.gz 7 | wget http://pizzachili.dcc.uchile.cl/repcorpus/real/kernel.gz 8 | wget http://pizzachili.dcc.uchile.cl/texts/protein/proteins.50MB.gz 9 | wget http://pizzachili.dcc.uchile.cl/texts/code/sources.50MB.gz 10 | 11 | //extract 12 | mkdir pizza 13 | mv *.gz pizza 14 | cd pizza 15 | gunzip *.gz 16 | 17 | //add .txt to extension 18 | for i in *; do mv $i $i.txt; done 19 | -------------------------------------------------------------------------------- /gsa-is/gsacak.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Authors: Felipe A. Louza, Simon Gog, Guilherme P. Telles 3 | * contact: louza@ic.unicamp.br 4 | * 03/04/2017 5 | */ 6 | 7 | /* 8 | * This code is a modification of SACA-K algorithm by G. Nong, which can be 9 | * retrieved at: http://code.google.com/p/ge-nong/ 10 | * 11 | * Our version of SACA-K, called gSACA-K, maintain the theoretical bounds of the 12 | * original algorithm to construct the generalized suffix array. 13 | * 14 | * Our algorithm gSACA-K can also computes the LCP-array and the Document-array 15 | * with no additional costs. 16 | * 17 | * gsacak(s, SA, NULL, NULL, n) //computes only SA 18 | * gsacak(s, SA, LCP, NULL, n) //computes SA and LCP 19 | * gsacak(s, SA, NULL, DA, n) //computes SA and DA 20 | * gsacak(s, SA, LCP, DA, n) //computes SA, LCP and DA 21 | * 22 | */ 23 | 24 | #ifndef GSACAK_H 25 | #define GSACAK_H 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #define max(a,b) ((a) > (b) ? (a) : (b)) 35 | 36 | #ifndef DEBUG 37 | #define DEBUG 0 38 | #endif 39 | 40 | #ifndef M64 41 | #define M64 0 42 | #endif 43 | 44 | #if M64 45 | typedef int64_t int_t; 46 | typedef uint64_t uint_t; 47 | #define PRIdN PRId64 48 | #define U_MAX UINT64_MAX 49 | #define I_MAX INT64_MAX 50 | #define I_MIN INT64_MIN 51 | #else 52 | typedef int32_t int_t; 53 | typedef uint32_t uint_t; 54 | #define PRIdN PRId32 55 | #define U_MAX UINT32_MAX 56 | #define I_MAX INT32_MAX 57 | #define I_MIN INT32_MIN 58 | #endif 59 | 60 | /** @brief computes the suffix array of string s[0..n-1] 61 | * 62 | * @param s input string with s[n-1]=0 63 | * @param SA suffix array 64 | * @param n string length 65 | * @return -1 if an error occured, otherwise the depth of the recursive calls. 66 | */ 67 | int sacak(unsigned char *s, uint_t *SA, uint_t n); 68 | 69 | /** @brief computes the suffix array of string s[0..n-1] 70 | * @param k alphabet size 71 | */ 72 | int sacak_int(int_t *s, uint_t *SA, uint_t n, uint_t k); 73 | 74 | 75 | /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] 76 | * 77 | * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 78 | * @param SA suffix array 79 | * @param LCP LCP array 80 | * @param DA Document array 81 | * @param n string length 82 | * 83 | * @return depth of the recursive calls. 84 | */ 85 | int gsacak(unsigned char *s, uint_t *SA, int_t *LCP, int_t *DA, uint_t n); 86 | 87 | /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] 88 | * 89 | * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 90 | * @param SA suffix array 91 | * @param LCP LCP array 92 | * @param DA Document array 93 | * @param n string length 94 | * @param K alphabet size 95 | * 96 | * @return depth of the recursive calls. 97 | */ 98 | int gsacak_int(uint_t *s, uint_t *SA, int_t *LCP, int_t *DA, uint_t n, uint_t k); 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /gsa-is/gsais.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Authors: Felipe A. Louza, Simon Gog, Guilherme P. Telles 3 | * contact: louza@ic.unicamp.br 4 | * 03/04/2017 5 | */ 6 | 7 | /* 8 | * This code is a modification of SA-IS algorithm by G. Nong et al., which can be 9 | * retrieved at: http://code.google.com/p/ge-nong/ 10 | * 11 | * Our version of SA-IS, called gSAIS, maintain the theoretical bounds of the 12 | * original algorithm to construct the generalized suffix array. 13 | * 14 | * Our algorithm gSAIS can also computes the LCP-array and the Document-array 15 | * with no additional costs. 16 | * 17 | * gsais(s, SA, NULL, NULL, n) //computes only SA 18 | * gsais(s, SA, LCP, NULL, n) //computes SA and LCP 19 | * gsais(s, SA, NULL, DA, n) //computes SA and DA 20 | * gsais(s, SA, LCP, DA, n) //computes SA, LCP and DA 21 | * 22 | */ 23 | 24 | #ifndef GSAIS_H 25 | #define GSAIS_H 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #define max(a,b) ((a) > (b) ? (a) : (b)) 35 | 36 | #ifndef DEBUG 37 | #define DEBUG 0 38 | #endif 39 | 40 | #ifndef M64 41 | #define M64 0 42 | #endif 43 | 44 | #if M64 45 | typedef int64_t int_t; 46 | typedef uint64_t uint_t; 47 | #define PRIdN PRId64 48 | #define U_MAX UINT64_MAX 49 | #define I_MAX INT64_MAX 50 | #define I_MIN INT64_MIN 51 | #else 52 | typedef int32_t int_t; 53 | typedef uint32_t uint_t; 54 | #define PRIdN PRId32 55 | #define U_MAX UINT32_MAX 56 | #define I_MAX INT32_MAX 57 | #define I_MIN INT32_MIN 58 | #endif 59 | 60 | /** @brief computes the suffix array of string s[0..n-1] 61 | * 62 | * @param s input string with s[n-1]=0 63 | * @param SA suffix array 64 | * @param n string length 65 | * @return -1 if an error occured, otherwise the depth of the recursive calls. 66 | */ 67 | int sais(unsigned char *s, uint_t *SA, uint_t n); 68 | 69 | /** @brief computes the suffix array of string s[0..n-1] 70 | * @param k alphabet size 71 | */ 72 | int sais_int(int_t *s, uint_t *SA, uint_t n, uint_t k); 73 | 74 | 75 | /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] 76 | * 77 | * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 78 | * @param SA suffix array 79 | * @param LCP LCP array 80 | * @param DA Document array 81 | * @param n string length 82 | * 83 | * @return depth of the recursive calls. 84 | */ 85 | int gsais(unsigned char *s, uint_t *SA, int_t *LCP, int_t *DA, uint_t n); 86 | 87 | /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] 88 | * 89 | * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 90 | * @param SA suffix array 91 | * @param LCP LCP array 92 | * @param DA Document array 93 | * @param n string length 94 | * @param K alphabet size 95 | * 96 | * @return depth of the recursive calls. 97 | */ 98 | int gsais_int(uint_t *s, uint_t *SA, int_t *LCP, int_t *DA, uint_t n, uint_t k); 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /gsa-is/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "gsacak.h" 6 | #include "experiments/external/malloc_count/malloc_count.h" //memory counter 7 | 8 | int main(int argc, char *argv[]){ 9 | 10 | printf("sizeof(int_t) = %zu bytes\n", sizeof(int_t)); 11 | unsigned char *Text; 12 | uint_t n=0; 13 | 14 | // intput data 15 | if(argc>=2){ 16 | 17 | //concatenate all strings s_1, s_2, .., s_d in s_1$s_2$..%s_d$# 18 | int i = 2, sum=0; 19 | for(; i<= argc; i++){ 20 | sum += strlen((argv[i-1]))+1; 21 | } 22 | n = sum+1; 23 | Text = malloc(n*sizeof(unsigned char)); 24 | sum=0; 25 | for(i=2; i<= argc; i++){ 26 | sscanf(argv[i-1], "%s", &Text[sum]); 27 | sum += strlen((argv[i-1]))+1; 28 | Text[sum-1]=1;//separator 29 | } 30 | Text[n-1]=0; 31 | printf("N = %d\n", n); 32 | } 33 | else{ 34 | unsigned char *S[3] = {"banana", "anaba", "anan"}; 35 | int i, sum=0; 36 | for(i=0; i< 3; i++){ 37 | sum += strlen((S[i]))+1; 38 | } 39 | n = sum+1; 40 | Text = malloc(n*sizeof(unsigned char)); 41 | sum=0; 42 | for(i=0; i< 3; i++){ 43 | sscanf(S[i], "%s", &Text[sum]); 44 | sum += strlen((S[i]))+1; 45 | Text[sum-1]=1;//separator 46 | } 47 | Text[n-1]=0; 48 | printf("N = %d\n", n); 49 | } 50 | 51 | int i, j; 52 | printf("Text = "); 53 | for(i=0;i The MIT License (MIT) 48 | > 49 | > Copyright (c) 2015 Uwe Baier All Rights Reserved. 50 | > 51 | > Permission is hereby granted, free of charge, to any person obtaining a copy 52 | > of this software and associated documentation files (the "Software"), to deal 53 | > in the Software without restriction, including without limitation the rights 54 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 55 | > copies of the Software, and to permit persons to whom the Software is 56 | > furnished to do so, subject to the following conditions: 57 | > 58 | > The above copyright notice and this permission notice shall be included in all 59 | > copies or substantial portions of the Software. 60 | > 61 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 62 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 63 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 64 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 65 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 66 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 67 | > SOFTWARE. 68 | 69 | ## Author 70 | * Uwe Baier 71 | 72 | -------------------------------------------------------------------------------- /gsaca/benchmark/Makefile: -------------------------------------------------------------------------------- 1 | ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) 2 | SACA_HEADERS := divsufsort/divsufsort.h \ 3 | sais-lite-2.4.1/sais.h \ 4 | dc3/dc3.h \ 5 | ka/LinearSuffixSort.h \ 6 | gsaca/gsaca.h 7 | SACA_IMPL := divsufsort/divsufsort.c divsufsort/sssort.c divsufsort/trsort.c divsufsort/utils.c \ 8 | sais-lite-2.4.1/sais.c \ 9 | dc3/dc3.C \ 10 | ka/LinearSuffixSort.cpp ka/BooleanString.cpp \ 11 | gsaca/gsaca.c 12 | all: constructsa benchmark 13 | 14 | constructsa: constructsa.x 15 | benchmark: benchmark.x corpusbenchmark.sh 16 | 17 | constructsa.x: programlib/constructsa.cpp $(SACA_HEADERS) $(SACA_IMPL) 18 | g++ -std=c++11 -DNDEBUG -Wall -Wextra -O3 -ffast-math -funroll-loops \ 19 | -o constructsa.x programlib/constructsa.cpp $(SACA_IMPL) 20 | 21 | benchmark.x: programlib/benchmark.cpp $(SACA_HEADERS) $(SACA_IMPL) 22 | ifneq ("$(wildcard /proc/sys/kernel/perf_event_paranoid)","") 23 | $(eval DCMR := -DMEASURECMR) 24 | else 25 | $(eval DCMR := ) 26 | @echo "Warning: It seems like your kernel has no PERF_EVENT support" 27 | @echo "(file /proc/sys/kernel/perf_event_paranoid is not available)" 28 | @echo "To measure cache miss ratios, please install PERF" 29 | endif 30 | g++ -std=c++11 -DNDEBUG $(DCMR) -Wall -Wextra -O3 -ffast-math -funroll-loops \ 31 | -o benchmark.x programlib/benchmark.cpp $(SACA_IMPL) 32 | 33 | corpusbenchmark.sh: programlib/corpusbenchmark.sh 34 | cp programlib/corpusbenchmark.sh corpusbenchmark.sh 35 | chmod a+x corpusbenchmark.sh 36 | -------------------------------------------------------------------------------- /gsaca/benchmark/data/README: -------------------------------------------------------------------------------- 1 | To set up any corpus, just go to the directory and execute make. 2 | Required Programs: curl, zip, gzip, bzip2 3 | Downloads were last visited on 8.10.2015 4 | -------------------------------------------------------------------------------- /gsaca/benchmark/data/largecorpus/Makefile: -------------------------------------------------------------------------------- 1 | all: bible.txt E.coli world192.txt 2 | 3 | bible.txt: 4 | curl -O http://www.data-compression.info/files/corpora/largecanterburycorpus.zip 5 | unzip -u largecanterburycorpus.zip 6 | rm largecanterburycorpus.zip 7 | 8 | E.coli: 9 | curl -O http://www.data-compression.info/files/corpora/largecanterburycorpus.zip 10 | unzip -u largecanterburycorpus.zip 11 | rm largecanterburycorpus.zip 12 | 13 | world192.txt: 14 | curl -O http://www.data-compression.info/files/corpora/largecanterburycorpus.zip 15 | unzip -u largecanterburycorpus.zip 16 | rm largecanterburycorpus.zip 17 | -------------------------------------------------------------------------------- /gsaca/benchmark/data/pizzachilicorpus/Makefile: -------------------------------------------------------------------------------- 1 | all: sources.200MB proteins.200MB dna.200MB english.200MB dblp.xml.200MB 2 | 3 | sources.200MB: 4 | curl -O http://pizzachili.dcc.uchile.cl/texts/code/sources.200MB.gz 5 | gunzip sources.200MB.gz 6 | 7 | proteins.200MB: 8 | curl -O http://pizzachili.dcc.uchile.cl/texts/protein/proteins.200MB.gz 9 | gunzip proteins.200MB.gz 10 | 11 | dna.200MB: 12 | curl -O http://pizzachili.dcc.uchile.cl/texts/dna/dna.200MB.gz 13 | gunzip dna.200MB.gz 14 | 15 | english.200MB: 16 | curl -O http://pizzachili.dcc.uchile.cl/texts/nlang/english.200MB.gz 17 | gunzip english.200MB.gz 18 | 19 | dblp.xml.200MB: 20 | curl -O http://pizzachili.dcc.uchile.cl/texts/xml/dblp.xml.200MB.gz 21 | gunzip dblp.xml.200MB.gz 22 | -------------------------------------------------------------------------------- /gsaca/benchmark/data/repcorpus/Makefile: -------------------------------------------------------------------------------- 1 | all: Escherichia_Coli influenza para coreutils world_leaders 2 | 3 | Escherichia_Coli: 4 | curl -O http://pizzachili.dcc.uchile.cl/repcorpus/real/Escherichia_Coli.gz 5 | gunzip Escherichia_Coli.gz 6 | 7 | influenza: 8 | curl -O http://pizzachili.dcc.uchile.cl/repcorpus/real/influenza.gz 9 | gunzip influenza.gz 10 | 11 | para: 12 | curl -O http://pizzachili.dcc.uchile.cl/repcorpus/real/para.gz 13 | gunzip para.gz 14 | 15 | coreutils: 16 | curl -O http://pizzachili.dcc.uchile.cl/repcorpus/real/coreutils.gz 17 | gunzip coreutils.gz 18 | 19 | world_leaders: 20 | curl -O http://pizzachili.dcc.uchile.cl/repcorpus/real/world_leaders.gz 21 | gunzip world_leaders.gz 22 | -------------------------------------------------------------------------------- /gsaca/benchmark/data/silesiacorpus/Makefile: -------------------------------------------------------------------------------- 1 | all: dickens nci webster 2 | 3 | dickens: 4 | curl -O http://sun.aei.polsl.pl/~sdeor/corpus/dickens.bz2 5 | bunzip2 -f dickens.bz2 6 | 7 | nci: 8 | curl -O http://sun.aei.polsl.pl/~sdeor/corpus/nci.bz2 9 | bunzip2 -f nci.bz2 10 | 11 | webster: 12 | curl -O http://sun.aei.polsl.pl/~sdeor/corpus/webster.bz2 13 | bunzip2 -f webster.bz2 14 | -------------------------------------------------------------------------------- /gsaca/benchmark/dc3/dc3.h: -------------------------------------------------------------------------------- 1 | #ifndef _DC3_H 2 | #define _DC3_H 1 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif /* __cplusplus */ 7 | 8 | //expects a nullterminated string (S[n-1] = 0), computes suffix array. 9 | //returns 0 on success, -1 if n < 0, or -2 if out of memory 10 | int dc3(const unsigned char *S, int *SA, int n); 11 | 12 | #ifdef __cplusplus 13 | } /* extern "C" */ 14 | #endif /* __cplusplus */ 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /gsaca/benchmark/divsufsort/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2003 Yuta Mori All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /gsaca/benchmark/divsufsort/config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * config.h for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _CONFIG_H 28 | #define _CONFIG_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | /** Define to the version of this package. **/ 35 | #define PROJECT_VERSION_FULL "2.0.2" 36 | 37 | /** Define to 1 if you have the header files. **/ 38 | #define HAVE_INTTYPES_H 1 39 | #define HAVE_STDDEF_H 1 40 | #define HAVE_STDINT_H 1 41 | #define HAVE_STDLIB_H 1 42 | #define HAVE_STRING_H 1 43 | #define HAVE_STRINGS_H 1 44 | #define HAVE_MEMORY_H 1 45 | #define HAVE_SYS_TYPES_H 1 46 | 47 | /** for WinIO **/ 48 | /* #undef HAVE_IO_H */ 49 | /* #undef HAVE_FCNTL_H */ 50 | /* #undef HAVE__SETMODE */ 51 | /* #undef HAVE_SETMODE */ 52 | /* #undef HAVE__FILENO */ 53 | /* #undef HAVE_FOPEN_S */ 54 | /* #undef HAVE__O_BINARY */ 55 | #ifndef HAVE__SETMODE 56 | # if HAVE_SETMODE 57 | # define _setmode setmode 58 | # define HAVE__SETMODE 1 59 | # endif 60 | # if HAVE__SETMODE && !HAVE__O_BINARY 61 | # define _O_BINARY 0 62 | # define HAVE__O_BINARY 1 63 | # endif 64 | #endif 65 | 66 | /** for inline **/ 67 | #ifndef INLINE 68 | # define INLINE inline 69 | #endif 70 | 71 | /** for VC++ warning **/ 72 | #ifdef _MSC_VER 73 | #pragma warning(disable: 4127) 74 | #endif 75 | 76 | 77 | #ifdef __cplusplus 78 | } /* extern "C" */ 79 | #endif /* __cplusplus */ 80 | 81 | #endif /* _CONFIG_H */ 82 | -------------------------------------------------------------------------------- /gsaca/benchmark/gsaca/gsaca.c: -------------------------------------------------------------------------------- 1 | ../../gsaca.c -------------------------------------------------------------------------------- /gsaca/benchmark/gsaca/gsaca.h: -------------------------------------------------------------------------------- 1 | ../../gsaca.h -------------------------------------------------------------------------------- /gsaca/benchmark/ka/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for suftest 2 | 3 | # sources 4 | OBJS = BooleanString.o LinearSuffixSort.o suftest.o 5 | TARGET = suftest 6 | MAKEFILE = Makefile 7 | 8 | # options 9 | CC = g++ 10 | LD = g++ 11 | #OUTPUT_OPTION = -o $@ 12 | CFLAGS = -O2 -g 13 | CXXFLAGS = -O2 -g 14 | CPPFLAGS = -Wall #-DNDEBUG 15 | LDFLAGS = 16 | LDLIBS = 17 | #TARGET_ARCH = 18 | 19 | 20 | # targets 21 | .PHONY: all 22 | all: $(TARGET) 23 | $(TARGET): $(OBJS) 24 | 25 | distclean: clean 26 | clean: 27 | $(RM) $(TARGET) $(OBJS) 28 | 29 | # dependencies 30 | $(OBJS): BooleanString.h LinearSuffixSort.h $(MAKEFILE) 31 | 32 | -------------------------------------------------------------------------------- /gsaca/benchmark/ka/suftest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * suftest.cpp 3 | */ 4 | 5 | #include "LinearSuffixSort.h" 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | static 12 | int 13 | sufcheck(const unsigned char *T, const int *SA, 14 | int n, int verbose) { 15 | int C[256]; 16 | int i = 0, p, t = 0; 17 | int c; 18 | int err = 0; 19 | 20 | if(1 <= verbose) { ::fprintf(stderr, "sufchecker: "); } 21 | 22 | /* Check arguments. */ 23 | if((T == NULL) || (SA == NULL) || (n < 0)) { err = -1; } 24 | 25 | /* ranges. */ 26 | if(err == 0) { 27 | for(i = 0; i <= n; ++i) { 28 | if((SA[i] < 0) || (n < SA[i])) { 29 | err = -2; 30 | break; 31 | } 32 | } 33 | } 34 | 35 | /* first characters. */ 36 | if(err == 0) { 37 | for(i = 1; i < n; ++i) { 38 | if(T[SA[i]] > T[SA[i + 1]]) { 39 | err = -3; 40 | break; 41 | } 42 | } 43 | } 44 | 45 | /* suffixes. */ 46 | if(err == 0) { 47 | for(i = 0; i < 256; ++i) { C[i] = 0; } 48 | for(i = 0; i < n; ++i) { ++C[T[i]]; } 49 | for(i = 0, p = 1; i < 256; ++i) { 50 | t = C[i]; 51 | C[i] = p; 52 | p += t; 53 | } 54 | 55 | for(i = 0; i <= n; ++i) { 56 | p = SA[i]; 57 | if(0 < p) { 58 | c = T[--p]; 59 | t = C[c]; 60 | } else { 61 | p = n; 62 | c = -1; 63 | t = 0; 64 | } 65 | if(p != SA[t]) { 66 | err = -4; 67 | break; 68 | } 69 | if(0 <= c) { 70 | ++C[c]; 71 | if((n < C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } 72 | } 73 | } 74 | } 75 | 76 | if(1 <= verbose) { 77 | if(err == 0) { 78 | ::fprintf(stderr, "Done.\n"); 79 | } else if(verbose == 1) { 80 | ::fprintf(stderr, "Error.\n"); 81 | } else if(err == -1) { 82 | ::fprintf(stderr, "Invalid arguments.\n"); 83 | } else if(err == -2) { 84 | ::fprintf(stderr, "Out of the range [0,%d].\n SA[%d]=%d\n", 85 | (int)n, (int)i, (int)SA[i]); 86 | } else if(err == -3) { 87 | ::fprintf(stderr, "Suffixes in wrong order.\n" 88 | " T[SA[%d]=%d]=%d > T[SA[%d]=%d]=%d\n", 89 | i, SA[i], T[SA[i]], 90 | i + 1, SA[i + 1], T[SA[i + 1]]); 91 | } else if(err == -4) { 92 | ::fprintf(stderr, "Suffix in wrong position.\n"); 93 | if(0 <= t) { ::fprintf(stderr, " SA[%d]=%d or\n", (int)t, (int)SA[t]); } 94 | ::fprintf(stderr, " SA[%d]=%d\n", (int)i, (int)SA[i]); 95 | } 96 | } 97 | 98 | return err; 99 | } 100 | 101 | int 102 | main(int argc, const char *argv[]) { 103 | unsigned char *T; 104 | int *A; 105 | int n; 106 | 107 | /* Check arguments. */ 108 | if(argc != 2) { 109 | ::fprintf(stderr, "usage: %s FILE\n", argv[0]); 110 | ::exit(EXIT_FAILURE); 111 | } 112 | 113 | /* Open a file for reading. */ 114 | ifstream fs(argv[1], ios_base::in | ios_base::binary); 115 | 116 | /* Get the file size. */ 117 | fs.seekg(0, ios_base::end); 118 | n = fs.tellg(); 119 | fs.seekg(0, ios_base::beg); 120 | 121 | /* Allocate n bytes of memory. */ 122 | T = new unsigned char[n + 1]; 123 | 124 | /* Read n bytes of data. */ 125 | fs.read((char *)T, n); 126 | T[n] = 0; 127 | fs.close(); 128 | 129 | /* Construct the suffix array. */ 130 | A = LinearSuffixSort(T, n + 1); 131 | 132 | /* Check the suffix array. */ 133 | if(sufcheck(T, A, n, 3) != 0) { ::exit(1); } 134 | 135 | /* Deallocate memory. */ 136 | delete[] A; 137 | delete[] T; 138 | 139 | return 0; 140 | } 141 | -------------------------------------------------------------------------------- /gsaca/benchmark/programlib/corpusbenchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ $# -ne 1 ]; then 4 | echo "USAGE: $0 CORPUS" 5 | echo "CORPUS: name of corpus to be benchmarked (data/CORPUS must be a directory)" 6 | exit 1 7 | fi 8 | #check if corpus exists 9 | cdir="data/$1" 10 | if [ ! -d $cdir ]; then 11 | echo "Corpus $1 does not exist (data/$1 is not a directory)" 12 | exit 1 13 | fi 14 | #create data of corpus 15 | if [ -e "$cdir/Makefile" ]; then 16 | curdir=$(pwd) 17 | cd $cdir 18 | make 19 | cd $curdir 20 | fi 21 | #create benchmark if not done yet 22 | make benchmark.x 23 | #run benchmark (10 repeats) and output results to file 24 | ./benchmark.x -r 10 $(find $cdir -type f ! -name Makefile) | tee "results/$1.dat" 25 | -------------------------------------------------------------------------------- /gsaca/benchmark/sais-lite-2.4.1/COPYING: -------------------------------------------------------------------------------- 1 | The sais-lite copyright is as follows: 2 | 3 | Copyright (c) 2008-2010 Yuta Mori All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person 6 | obtaining a copy of this software and associated documentation 7 | files (the "Software"), to deal in the Software without 8 | restriction, including without limitation the rights to use, 9 | copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the 11 | Software is furnished to do so, subject to the following 12 | conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | OTHER DEALINGS IN THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /gsaca/benchmark/sais-lite-2.4.1/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for suftest and test 2 | 3 | # options 4 | CC = gcc 5 | #CXX = g++ 6 | #OUTPUT_OPTION = -o $@ 7 | CFLAGS = -O3 -fomit-frame-pointer 8 | #CXXFLAGS = -O3 -fomit-frame-pointer 9 | CPPFLAGS = -Wall -DNDEBUG 10 | LDFLAGS = 11 | LDLIBS = 12 | #TARGET_ARCH = 13 | 14 | # targets 15 | .PHONY: all 16 | all: suftest 17 | suftest: sais.o suftest.o 18 | test: 19 | $(CC) -O -g -Wall test.c sais.c -o test 20 | ./test 21 | $(RM) test test.exe 22 | 23 | distclean: clean 24 | clean: 25 | $(RM) suftest suftest.exe test test.exe sais.o suftest.o 26 | 27 | # dependencies 28 | sais.o suftest.o: sais.h Makefile 29 | -------------------------------------------------------------------------------- /gsaca/benchmark/sais-lite-2.4.1/README: -------------------------------------------------------------------------------- 1 | 2 | sais-lite-2.4.0 3 | ---------------------- 4 | 5 | This archive contains the source code of the implementation of 6 | the IS based linear suffix array construction algorithm 7 | described in the paper: 8 | 9 | Ge Nong, Sen Zhang and Wai Hong Chan 10 | Two Efficient Algorithms for Linear Suffix Array Construction 11 | 2008? 12 | http://www.cs.sysu.edu.cn/nong/index.files/Two%20Efficient%20Algorithms%20for%20Linear%20Suffix%20Array%20Construction.pdf 13 | 14 | 15 | Yuta Mori 16 | -------------------------------------------------------------------------------- /gsaca/benchmark/sais-lite-2.4.1/sais.h: -------------------------------------------------------------------------------- 1 | /* 2 | * sais.h for sais-lite 3 | * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _SAIS_H 28 | #define _SAIS_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | /* find the suffix array SA of T[0..n-1] 35 | use a working space (excluding T and SA) of at most 2n+O(lg n) */ 36 | int 37 | sais(const unsigned char *T, int *SA, int n); 38 | 39 | /* find the suffix array SA of T[0..n-1] in {0..k-1}^n 40 | use a working space (excluding T and SA) of at most MAX(4k,2n) */ 41 | int 42 | sais_int(const int *T, int *SA, int n, int k); 43 | 44 | /* burrows-wheeler transform */ 45 | int 46 | sais_bwt(const unsigned char *T, unsigned char *U, int *A, int n); 47 | int 48 | sais_int_bwt(const int *T, int *U, int *A, int n, int k); 49 | 50 | 51 | #ifdef __cplusplus 52 | } /* extern "C" */ 53 | #endif /* __cplusplus */ 54 | 55 | #endif /* _SAIS_H */ 56 | -------------------------------------------------------------------------------- /gsaca/gsaca.h: -------------------------------------------------------------------------------- 1 | /* 2 | * gsaca.h for gsaca 3 | * Copyright (c) 2015 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef _GSACA_H 25 | #define _GSACA_H 1 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif /* __cplusplus */ 30 | 31 | /** 32 | * constructs the suffix array of the given string S. 33 | * @param S a nullterminated string of length n, i.e. S[n-1] = '\0'. 34 | * @param SA an array of size n, where the suffix array will be stored to. 35 | * @param n length of the string and the suffix array. 36 | * @return 0 if construction worked, -1 on illegal parameters, 37 | * or -2 if not enough memory is available. 38 | */ 39 | int gsaca(const unsigned char *S, int *SA, int n); 40 | 41 | /** 42 | * constructs the suffix array of the given string S. 43 | * @param S a nullterminated string of length n, i.e. S[n-1] = '\0'. 44 | * @param SA an array of size n, where the suffix array will be stored to. 45 | * @param n length of the string and the suffix array. 46 | * @return 0 if construction worked, -1 on illegal parameters, 47 | * or -2 if not enough memory is available. 48 | */ 49 | int gsaca_unsigned(const unsigned char *S, unsigned int *SA, unsigned int n); 50 | 51 | #ifdef __cplusplus 52 | } /* extern "C" */ 53 | #endif /* __cplusplus */ 54 | 55 | #endif /* _GSACA_H */ 56 | -------------------------------------------------------------------------------- /libdivsufsort/.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # CMake files/directories 32 | build/ 33 | -------------------------------------------------------------------------------- /libdivsufsort/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # libdivsufsort Change Log 2 | 3 | See full changelog at: https://github.com/y-256/libdivsufsort/commits 4 | 5 | ## [2.0.1] - 2010-11-11 6 | ### Fixed 7 | * Wrong variable used in `divbwt` function 8 | * Enclose some string variables with double quotation marks in include/CMakeLists.txt 9 | * Fix typo in include/CMakeLists.txt 10 | 11 | ## 2.0.0 - 2008-08-23 12 | ### Changed 13 | * Switch the build system to [CMake](http://www.cmake.org/) 14 | * Improve the performance of the suffix-sorting algorithm 15 | 16 | ### Added 17 | * OpenMP support 18 | * 64-bit version of divsufsort 19 | 20 | [Unreleased]: https://github.com/y-256/libdivsufsort/compare/2.0.1...HEAD 21 | [2.0.1]: https://github.com/y-256/libdivsufsort/compare/2.0.0...2.0.1 22 | -------------------------------------------------------------------------------- /libdivsufsort/CMakeModules/AppendCompilerFlags.cmake: -------------------------------------------------------------------------------- 1 | include(CheckCSourceCompiles) 2 | include(CheckCXXSourceCompiles) 3 | 4 | macro(append_c_compiler_flags _flags _name _result) 5 | set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) 6 | string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}") 7 | string(TOUPPER "${cname}" cname) 8 | foreach(flag ${_flags}) 9 | string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}") 10 | string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}") 11 | string(TOUPPER "${flagname}" flagname) 12 | set(have_flag "HAVE_${cname}_${flagname}") 13 | set(CMAKE_REQUIRED_FLAGS "${flag}") 14 | check_c_source_compiles("int main() { return 0; }" ${have_flag}) 15 | if(${have_flag}) 16 | set(${_result} "${${_result}} ${flag}") 17 | endif(${have_flag}) 18 | endforeach(flag) 19 | set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS}) 20 | endmacro(append_c_compiler_flags) 21 | 22 | macro(append_cxx_compiler_flags _flags _name _result) 23 | set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) 24 | string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}") 25 | string(TOUPPER "${cname}" cname) 26 | foreach(flag ${_flags}) 27 | string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}") 28 | string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}") 29 | string(TOUPPER "${flagname}" flagname) 30 | set(have_flag "HAVE_${cname}_${flagname}") 31 | set(CMAKE_REQUIRED_FLAGS "${flag}") 32 | check_cxx_source_compiles("int main() { return 0; }" ${have_flag}) 33 | if(${have_flag}) 34 | set(${_result} "${${_result}} ${flag}") 35 | endif(${have_flag}) 36 | endforeach(flag) 37 | set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS}) 38 | endmacro(append_cxx_compiler_flags) 39 | -------------------------------------------------------------------------------- /libdivsufsort/CMakeModules/CheckFunctionKeywords.cmake: -------------------------------------------------------------------------------- 1 | include(CheckCSourceCompiles) 2 | 3 | macro(check_function_keywords _wordlist) 4 | set(${_result} "") 5 | foreach(flag ${_wordlist}) 6 | string(REGEX REPLACE "[-+/ ()]" "_" flagname "${flag}") 7 | string(TOUPPER "${flagname}" flagname) 8 | set(have_flag "HAVE_${flagname}") 9 | check_c_source_compiles("${flag} void func(); void func() { } int main() { func(); return 0; }" ${have_flag}) 10 | if(${have_flag} AND NOT ${_result}) 11 | set(${_result} "${flag}") 12 | # break() 13 | endif(${have_flag} AND NOT ${_result}) 14 | endforeach(flag) 15 | endmacro(check_function_keywords) 16 | -------------------------------------------------------------------------------- /libdivsufsort/CMakeModules/ProjectCPack.cmake: -------------------------------------------------------------------------------- 1 | # If the cmake version includes cpack, use it 2 | IF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake") 3 | SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${PROJECT_DESCRIPTION}") 4 | SET(CPACK_PACKAGE_VENDOR "${PROJECT_VENDOR}") 5 | SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.md") 6 | SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") 7 | SET(CPACK_PACKAGE_VERSION_MAJOR "${PROJECT_VERSION_MAJOR}") 8 | SET(CPACK_PACKAGE_VERSION_MINOR "${PROJECT_VERSION_MINOR}") 9 | SET(CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}") 10 | # SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${PROJECT_NAME} ${PROJECT_VERSION}") 11 | SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION_FULL}") 12 | 13 | IF(NOT DEFINED CPACK_SYSTEM_NAME) 14 | SET(CPACK_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") 15 | ENDIF(NOT DEFINED CPACK_SYSTEM_NAME) 16 | 17 | IF(${CPACK_SYSTEM_NAME} MATCHES Windows) 18 | IF(CMAKE_CL_64) 19 | SET(CPACK_SYSTEM_NAME win64-${CMAKE_SYSTEM_PROCESSOR}) 20 | ELSE(CMAKE_CL_64) 21 | SET(CPACK_SYSTEM_NAME win32-${CMAKE_SYSTEM_PROCESSOR}) 22 | ENDIF(CMAKE_CL_64) 23 | ENDIF(${CPACK_SYSTEM_NAME} MATCHES Windows) 24 | 25 | IF(NOT DEFINED CPACK_PACKAGE_FILE_NAME) 26 | SET(CPACK_PACKAGE_FILE_NAME "${CPACK_SOURCE_PACKAGE_FILE_NAME}-${CPACK_SYSTEM_NAME}") 27 | ENDIF(NOT DEFINED CPACK_PACKAGE_FILE_NAME) 28 | 29 | SET(CPACK_PACKAGE_CONTACT "${PROJECT_CONTACT}") 30 | IF(UNIX) 31 | SET(CPACK_STRIP_FILES "") 32 | SET(CPACK_SOURCE_STRIP_FILES "") 33 | # SET(CPACK_PACKAGE_EXECUTABLES "ccmake" "CMake") 34 | ENDIF(UNIX) 35 | SET(CPACK_SOURCE_IGNORE_FILES "/CVS/" "/build/" "/\\\\.build/" "/\\\\.svn/" "~$") 36 | # include CPack model once all variables are set 37 | INCLUDE(CPack) 38 | ENDIF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake") 39 | -------------------------------------------------------------------------------- /libdivsufsort/CMakeModules/cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 2 | MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"") 3 | ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 4 | 5 | FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) 6 | STRING(REGEX REPLACE "\n" ";" files "${files}") 7 | 8 | SET(NUM 0) 9 | FOREACH(file ${files}) 10 | IF(EXISTS "$ENV{DESTDIR}${file}") 11 | MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - found") 12 | SET(UNINSTALL_CHECK_${NUM} 1) 13 | ELSE(EXISTS "$ENV{DESTDIR}${file}") 14 | MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - not found") 15 | SET(UNINSTALL_CHECK_${NUM} 0) 16 | ENDIF(EXISTS "$ENV{DESTDIR}${file}") 17 | MATH(EXPR NUM "1 + ${NUM}") 18 | ENDFOREACH(file) 19 | 20 | SET(NUM 0) 21 | FOREACH(file ${files}) 22 | IF(${UNINSTALL_CHECK_${NUM}}) 23 | MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"") 24 | EXEC_PROGRAM( 25 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 26 | OUTPUT_VARIABLE rm_out 27 | RETURN_VALUE rm_retval 28 | ) 29 | IF(NOT "${rm_retval}" STREQUAL 0) 30 | MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"") 31 | ENDIF(NOT "${rm_retval}" STREQUAL 0) 32 | ENDIF(${UNINSTALL_CHECK_${NUM}}) 33 | MATH(EXPR NUM "1 + ${NUM}") 34 | ENDFOREACH(file) 35 | 36 | FILE(REMOVE "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 37 | -------------------------------------------------------------------------------- /libdivsufsort/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2003 Yuta Mori All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /libdivsufsort/VERSION.cmake: -------------------------------------------------------------------------------- 1 | set(PROJECT_VERSION_MAJOR "2") 2 | set(PROJECT_VERSION_MINOR "0") 3 | set(PROJECT_VERSION_PATCH "2") 4 | set(PROJECT_VERSION_EXTRA "-1") 5 | set(PROJECT_VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}") 6 | set(PROJECT_VERSION_FULL "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}${PROJECT_VERSION_EXTRA}") 7 | 8 | set(LIBRARY_VERSION "3.0.1") 9 | set(LIBRARY_SOVERSION "3") 10 | 11 | ## Git revision number ## 12 | if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") 13 | execute_process(COMMAND git describe --tags HEAD 14 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 15 | OUTPUT_VARIABLE GIT_DESCRIBE_TAGS ERROR_QUIET) 16 | if(GIT_DESCRIBE_TAGS) 17 | string(REGEX REPLACE "^v(.*)" "\\1" GIT_REVISION "${GIT_DESCRIBE_TAGS}") 18 | string(STRIP "${GIT_REVISION}" GIT_REVISION) 19 | if(GIT_REVISION) 20 | set(PROJECT_VERSION_FULL "${GIT_REVISION}") 21 | endif(GIT_REVISION) 22 | endif(GIT_DESCRIBE_TAGS) 23 | endif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") 24 | -------------------------------------------------------------------------------- /libdivsufsort/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Add definitions ## 2 | add_definitions(-D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64) 3 | 4 | ## Targets ## 5 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include" 6 | "${CMAKE_CURRENT_BINARY_DIR}/../include") 7 | link_directories("${CMAKE_CURRENT_BINARY_DIR}/../lib") 8 | foreach(src suftest mksary sasearch bwt unbwt) 9 | add_executable(${src} ${src}.c) 10 | target_link_libraries(${src} divsufsort) 11 | endforeach(src) 12 | -------------------------------------------------------------------------------- /libdivsufsort/include/config.h.cmake: -------------------------------------------------------------------------------- 1 | /* 2 | * config.h for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _CONFIG_H 28 | #define _CONFIG_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | /** Define to the version of this package. **/ 35 | #cmakedefine PROJECT_VERSION_FULL "${PROJECT_VERSION_FULL}" 36 | 37 | /** Define to 1 if you have the header files. **/ 38 | #cmakedefine HAVE_INTTYPES_H 1 39 | #cmakedefine HAVE_STDDEF_H 1 40 | #cmakedefine HAVE_STDINT_H 1 41 | #cmakedefine HAVE_STDLIB_H 1 42 | #cmakedefine HAVE_STRING_H 1 43 | #cmakedefine HAVE_STRINGS_H 1 44 | #cmakedefine HAVE_MEMORY_H 1 45 | #cmakedefine HAVE_SYS_TYPES_H 1 46 | 47 | /** for WinIO **/ 48 | #cmakedefine HAVE_IO_H 1 49 | #cmakedefine HAVE_FCNTL_H 1 50 | #cmakedefine HAVE__SETMODE 1 51 | #cmakedefine HAVE_SETMODE 1 52 | #cmakedefine HAVE__FILENO 1 53 | #cmakedefine HAVE_FOPEN_S 1 54 | #cmakedefine HAVE__O_BINARY 1 55 | #ifndef HAVE__SETMODE 56 | # if HAVE_SETMODE 57 | # define _setmode setmode 58 | # define HAVE__SETMODE 1 59 | # endif 60 | # if HAVE__SETMODE && !HAVE__O_BINARY 61 | # define _O_BINARY 0 62 | # define HAVE__O_BINARY 1 63 | # endif 64 | #endif 65 | 66 | /** for inline **/ 67 | #ifndef INLINE 68 | # define INLINE @INLINE@ 69 | #endif 70 | 71 | /** for VC++ warning **/ 72 | #ifdef _MSC_VER 73 | #pragma warning(disable: 4127) 74 | #endif 75 | 76 | 77 | #ifdef __cplusplus 78 | } /* extern "C" */ 79 | #endif /* __cplusplus */ 80 | 81 | #endif /* _CONFIG_H */ 82 | -------------------------------------------------------------------------------- /libdivsufsort/include/lfs.h.cmake: -------------------------------------------------------------------------------- 1 | /* 2 | * lfs.h for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _LFS_H 28 | #define _LFS_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | #ifndef __STRICT_ANSI__ 35 | # define LFS_OFF_T @LFS_OFF_T@ 36 | # define LFS_FOPEN @LFS_FOPEN@ 37 | # define LFS_FTELL @LFS_FTELL@ 38 | # define LFS_FSEEK @LFS_FSEEK@ 39 | # define LFS_PRId @LFS_PRID@ 40 | #else 41 | # define LFS_OFF_T long 42 | # define LFS_FOPEN fopen 43 | # define LFS_FTELL ftell 44 | # define LFS_FSEEK fseek 45 | # define LFS_PRId "ld" 46 | #endif 47 | #ifndef PRIdOFF_T 48 | # define PRIdOFF_T LFS_PRId 49 | #endif 50 | 51 | 52 | #ifdef __cplusplus 53 | } /* extern "C" */ 54 | #endif /* __cplusplus */ 55 | 56 | #endif /* _LFS_H */ 57 | -------------------------------------------------------------------------------- /libdivsufsort/lib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include" 2 | "${CMAKE_CURRENT_BINARY_DIR}/../include") 3 | 4 | set(divsufsort_SRCS divsufsort.c sssort.c trsort.c utils.c) 5 | 6 | ## libdivsufsort ## 7 | add_library(divsufsort ${divsufsort_SRCS}) 8 | install(TARGETS divsufsort 9 | RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR} 10 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 11 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) 12 | set_target_properties(divsufsort PROPERTIES 13 | VERSION "${LIBRARY_VERSION}" 14 | SOVERSION "${LIBRARY_SOVERSION}" 15 | DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL 16 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples") 17 | 18 | ## libdivsufsort64 ## 19 | if(BUILD_DIVSUFSORT64) 20 | add_library(divsufsort64 ${divsufsort_SRCS}) 21 | install(TARGETS divsufsort64 22 | RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR} 23 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 24 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) 25 | set_target_properties(divsufsort64 PROPERTIES 26 | VERSION "${LIBRARY_VERSION}" 27 | SOVERSION "${LIBRARY_SOVERSION}" 28 | DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL 29 | COMPILE_FLAGS "-DBUILD_DIVSUFSORT64" 30 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples") 31 | endif(BUILD_DIVSUFSORT64) 32 | -------------------------------------------------------------------------------- /libdivsufsort/pkgconfig/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## generate libdivsufsort.pc ## 2 | set(W64BIT "") 3 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" @ONLY) 4 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR}) 5 | if(BUILD_DIVSUFSORT64) 6 | set(W64BIT "64") 7 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" @ONLY) 8 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR}) 9 | endif(BUILD_DIVSUFSORT64) 10 | -------------------------------------------------------------------------------- /libdivsufsort/pkgconfig/libdivsufsort.pc.cmake: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix} 3 | libdir=@CMAKE_INSTALL_LIBDIR@ 4 | includedir=@CMAKE_INSTALL_INCLUDEDIR@ 5 | 6 | Name: @PROJECT_NAME@@W64BIT@ 7 | Description: @PROJECT_DESCRIPTION@ 8 | Version: @PROJECT_VERSION_FULL@ 9 | URL: @PROJECT_URL@ 10 | Libs: -L${libdir} -ldivsufsort@W64BIT@ 11 | Cflags: -I${includedir} 12 | -------------------------------------------------------------------------------- /msufsort/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Michael Maniscalco 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /msufsort/src/SConscript: -------------------------------------------------------------------------------- 1 | Import('env', 'install_dir') 2 | 3 | #=============================================================================== 4 | # default list of subdirectories to build 5 | #=============================================================================== 6 | 7 | sub_directories = ['library', 'executable'] 8 | 9 | 10 | #=============================================================================== 11 | # invoke SConscript on each subdirectory 12 | #=============================================================================== 13 | 14 | for directory_name in sub_directories: 15 | env.SConscript('%s/SConscript' % directory_name, {'env' : env, 'install_dir' : install_dir}) 16 | 17 | -------------------------------------------------------------------------------- /msufsort/src/executable/SConscript: -------------------------------------------------------------------------------- 1 | Import('env', 'install_dir') 2 | 3 | 4 | #=============================================================================== 5 | # list of subdirectories to build 6 | #=============================================================================== 7 | 8 | sub_directories = ['msufsort'] 9 | 10 | 11 | #=============================================================================== 12 | # invoke SConscript on each subdirectory 13 | #=============================================================================== 14 | 15 | for directory_name in sub_directories: 16 | env.SConscript('%s/SConscript' % directory_name, {'env' : env, 'install_dir' : install_dir + '/bin/'}) 17 | 18 | -------------------------------------------------------------------------------- /msufsort/src/executable/msufsort/SConscript: -------------------------------------------------------------------------------- 1 | Import('env', 'install_dir') 2 | 3 | 4 | target_name = 'msufsort' 5 | target_build_number = '1.0' 6 | 7 | 8 | #=============================================================================== 9 | # target specific settings (appended to env default settings) 10 | #=============================================================================== 11 | 12 | target_specific_lib_dependencies = [ 13 | 'msufsort', 'pthread' 14 | ] 15 | 16 | target_specific_lib_dependency_paths = [] 17 | target_specific_include_paths = [] 18 | target_specific_compiler_flags = [] 19 | target_specific_linker_flags = [] 20 | target_specific_configurations = [] 21 | 22 | 23 | #=============================================================================== 24 | # target's source files 25 | #=============================================================================== 26 | 27 | target_source_files = ['./main.cpp'] 28 | 29 | 30 | #=============================================================================== 31 | # build target and install 32 | #=============================================================================== 33 | 34 | target_env = env.Clone() 35 | target_env.Append(LIBS = target_specific_lib_dependencies) 36 | target_env.Append(LIBPATH = target_specific_lib_dependency_paths) 37 | target_env.Append(LIBFLAGS = target_specific_linker_flags) 38 | target_env.Append(CPPPATH = target_specific_include_paths) 39 | target_env.Append(CPPDEFINES = {'BUILD_NUM' : target_build_number}) 40 | target_env.Append(CPPFLAGS = target_specific_compiler_flags) 41 | for configuration in target_specific_configurations: 42 | target_env.ParseConfig(configuration) 43 | target_object = target_env.Program(target_name, target_source_files) 44 | target_env.Install(install_dir, target_object) 45 | -------------------------------------------------------------------------------- /msufsort/src/include/endian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | //============================================================================== 4 | // 5 | // endian types 6 | // 7 | //============================================================================== 8 | 9 | #include "./endian/endian.h" 10 | 11 | -------------------------------------------------------------------------------- /msufsort/src/include/endian/byte_swap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | namespace maniscalco 9 | { 10 | 11 | namespace impl 12 | { 13 | 14 | template class byte_swap_impl; 15 | 16 | template 17 | class byte_swap_impl::type> 18 | { 19 | public: 20 | 21 | inline T operator() 22 | ( 23 | T value 24 | ) const 25 | { 26 | return value; 27 | } 28 | 29 | }; // bytes_swap_impl 30 | 31 | 32 | template 33 | class byte_swap_impl::type> 34 | { 35 | public: 36 | 37 | inline T operator() 38 | ( 39 | T value 40 | ) const 41 | { 42 | return ((value >> 8) | (value << 8)); 43 | } 44 | 45 | }; // bytes_swap_impl 46 | 47 | 48 | template 49 | class byte_swap_impl::type> 50 | { 51 | public: 52 | 53 | inline T operator() 54 | ( 55 | T value 56 | ) const 57 | { 58 | return __builtin_bswap32(value); 59 | } 60 | 61 | }; // bytes_swap_impl 62 | 63 | 64 | template 65 | class byte_swap_impl::type> 66 | { 67 | public: 68 | 69 | inline T operator() 70 | ( 71 | T value 72 | ) const 73 | { 74 | return __builtin_bswap64(value); 75 | } 76 | 77 | }; // bytes_swap_impl 78 | 79 | } // namespace impl 80 | 81 | 82 | template 83 | T byte_swap 84 | ( 85 | T 86 | ); 87 | 88 | } // namespace maniscalco 89 | 90 | 91 | //============================================================================== 92 | template 93 | static inline T maniscalco::byte_swap 94 | ( 95 | T value 96 | ) 97 | { 98 | return impl::byte_swap_impl()(value); 99 | } 100 | 101 | -------------------------------------------------------------------------------- /msufsort/src/include/endian/endian_swap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "./endian_type.h" 5 | #include "./byte_swap.h" 6 | 7 | namespace maniscalco 8 | { 9 | 10 | namespace impl 11 | { 12 | 13 | template 14 | class endian_swap_impl; 15 | 16 | template 17 | class endian_swap_impl::value>::type> 19 | { 20 | public: 21 | 22 | template 23 | inline InputType operator() 24 | ( 25 | InputType input 26 | ) const 27 | { 28 | return input; 29 | } 30 | }; // specialized for no op 31 | 32 | template 33 | class endian_swap_impl::value>::type> 35 | { 36 | public: 37 | 38 | template 39 | inline InputType operator() 40 | ( 41 | InputType input 42 | ) const 43 | { 44 | return byte_swap(input); 45 | } 46 | }; // specialized for byte swap 47 | 48 | } // namespace impl 49 | 50 | 51 | template 52 | InputType endian_swap 53 | ( 54 | InputType 55 | ); 56 | 57 | } // namespace maniscalco 58 | 59 | 60 | //============================================================================== 61 | template 62 | static inline InputType maniscalco::endian_swap 63 | ( 64 | InputType value 65 | ) 66 | { 67 | return ::maniscalco::impl::endian_swap_impl()(value); 68 | } 69 | 70 | -------------------------------------------------------------------------------- /msufsort/src/include/endian/endian_type.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | 6 | namespace maniscalco 7 | { 8 | 9 | struct big_endian_type{}; 10 | struct little_endian_type{}; 11 | 12 | using network_order_type = big_endian_type; 13 | 14 | #ifdef BOOST_LITTLE_ENDIAN 15 | using host_order_type = little_endian_type; 16 | #else 17 | using host_order_type = big_endian_type; 18 | #endif 19 | 20 | } // namespace maniscalco 21 | 22 | -------------------------------------------------------------------------------- /msufsort/src/include/type_traits/enable_if_integral.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | 6 | namespace maniscalco 7 | { 8 | 9 | template using enable_if_integral = typename std::enable_if::value>::type; 10 | 11 | } // namespace maniscalco 12 | 13 | 14 | -------------------------------------------------------------------------------- /msufsort/src/include/type_traits/enable_if_integral_or_enum.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | 6 | namespace maniscalco 7 | { 8 | 9 | template using enable_if_integral_or_enum = 10 | typename std::enable_if::value || std::is_enum::value>::type; 11 | 12 | } // namespace maniscalco 13 | 14 | 15 | -------------------------------------------------------------------------------- /msufsort/src/include/type_traits/is_endian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | 6 | namespace maniscalco 7 | { 8 | 9 | template struct is_endian : public std::false_type{}; 10 | template struct is_endian >::value>::type> : public std::true_type{}; 11 | 12 | } // namespace maniscalco 13 | 14 | -------------------------------------------------------------------------------- /msufsort/src/include/type_traits/opposite_endian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace maniscalco 7 | { 8 | 9 | template struct opposite_endian; 10 | 11 | 12 | //============================================================================== 13 | template <> 14 | struct opposite_endian 15 | { 16 | using type = little_endian_type; 17 | }; 18 | 19 | 20 | //============================================================================== 21 | template <> 22 | struct opposite_endian 23 | { 24 | using type = big_endian_type; 25 | }; 26 | 27 | } // namespace maniscalco 28 | 29 | -------------------------------------------------------------------------------- /msufsort/src/include/type_traits/remove_endian.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | 6 | namespace maniscalco 7 | { 8 | 9 | template struct remove_endian; 10 | 11 | 12 | //============================================================================== 13 | template 14 | struct remove_endian::value>::type> 15 | { 16 | using type = typename T::value_type; 17 | }; 18 | 19 | 20 | //============================================================================== 21 | template 22 | struct remove_endian::value>::type> 23 | { 24 | using type = T; 25 | }; 26 | 27 | } // namespace maniscalco 28 | 29 | -------------------------------------------------------------------------------- /msufsort/src/library/SConscript: -------------------------------------------------------------------------------- 1 | Import('env', 'install_dir') 2 | 3 | 4 | #=============================================================================== 5 | # list of subdirectories to build 6 | #=============================================================================== 7 | 8 | sub_directories = ['msufsort'] 9 | 10 | 11 | #=============================================================================== 12 | # invoke SConscript on each subdirectory 13 | #=============================================================================== 14 | 15 | for directory_name in sub_directories: 16 | env.SConscript('%s/SConscript' % directory_name, {'env' : env, 'install_dir' : install_dir + '/lib/'}) 17 | 18 | -------------------------------------------------------------------------------- /msufsort/src/library/msufsort.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "./msufsort/msufsort.h" 4 | 5 | -------------------------------------------------------------------------------- /msufsort/src/library/msufsort/SConscript: -------------------------------------------------------------------------------- 1 | Import('env', 'install_dir') 2 | 3 | 4 | target_name = 'msufsort' 5 | target_build_number = '4.0a' 6 | 7 | 8 | #=============================================================================== 9 | # target specific settings (appended to env default settings) 10 | #=============================================================================== 11 | 12 | target_specific_lib_dependencies = [] 13 | target_specific_lib_dependency_paths = [] 14 | target_specific_include_paths = [] 15 | target_specific_compiler_flags = [] 16 | target_specific_linker_flags = [] 17 | target_specific_configurations = [] 18 | 19 | 20 | #=============================================================================== 21 | # target's source files 22 | #=============================================================================== 23 | 24 | target_source_files = ['./msufsort.cpp'] 25 | 26 | 27 | #=============================================================================== 28 | # build target and install 29 | #=============================================================================== 30 | 31 | target_env = env.Clone() 32 | target_env.Append(LIBS = target_specific_lib_dependencies) 33 | target_env.Append(LIBPATH = target_specific_lib_dependency_paths) 34 | target_env.Append(LIBFLAGS = target_specific_linker_flags) 35 | target_env.Append(CPPPATH = target_specific_include_paths) 36 | target_env.Append(CPPDEFINES = {'BUILD_NUM' : target_build_number}) 37 | target_env.Append(CPPFLAGS = target_specific_compiler_flags) 38 | for configuration in target_specific_configurations: 39 | target_env.ParseConfig(configuration) 40 | target_object = target_env.Library(target_name, target_source_files) 41 | target_env.Install(install_dir, target_object) 42 | -------------------------------------------------------------------------------- /qsufsort/.gitignore: -------------------------------------------------------------------------------- 1 | suftest 2 | -------------------------------------------------------------------------------- /qsufsort/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for suftest 2 | 3 | # options 4 | CC = gcc 5 | #CXX = g++ 6 | #OUTPUT_OPTION = -o $@ 7 | CFLAGS = -O3 -fomit-frame-pointer 8 | #CXXFLAGS = -O3 -fomit-frame-pointer 9 | CPPFLAGS = -Wall -DNDEBUG 10 | LDFLAGS = 11 | LDLIBS = 12 | #TARGET_ARCH = 13 | 14 | # targets 15 | .PHONY: all 16 | all: suftest 17 | suftest: qsufsort.o suftest.o 18 | test: 19 | $(CC) -O -g -Wall test.c qsufsort.c -o test 20 | ./test 21 | $(RM) test test.exe 22 | 23 | distclean: clean 24 | clean: 25 | $(RM) suftest suftest.exe test test.exe qsufsort.o suftest.o 26 | 27 | # dependencies 28 | qsufsort.o suftest.o: Makefile 29 | -------------------------------------------------------------------------------- /sa-ds/.gitignore: -------------------------------------------------------------------------------- 1 | is 2 | -------------------------------------------------------------------------------- /sa-ds/is.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace std; 5 | #define DEBUGLEVEL 1 6 | #include 7 | #if !defined( unix ) 8 | #include 9 | #include 10 | #endif 11 | 12 | // comment to disable verifying the result SA 13 | #define _verify_sa 14 | 15 | void SA_DS(unsigned char *s, int *SA, int n, int K, int m, int level); 16 | 17 | // output values: 18 | // 1: s1s2 21 | int sless(unsigned char *s1, unsigned char *s2, unsigned int n) { 22 | for(unsigned int i=0; i s2[i]) return -1; 25 | } 26 | return 0; 27 | } 28 | 29 | // test if SA is sorted for the input string s 30 | bool isSorted(unsigned int *SA, unsigned char *s, unsigned int n) { 31 | for(unsigned int i = 0; i < n-1; i++) { 32 | unsigned int d=SA[i]SA[i]) 35 | return false; 36 | } 37 | return true; 38 | } 39 | 40 | int main(int argc, char **argv) { 41 | fprintf( stderr, "\nComputing suffix array by SA-DS (d=3) on " ); 42 | if ( argc > 1 ) { 43 | freopen( argv[ 1 ], "rb", stdin ); 44 | fprintf( stderr, "%s", argv[ 1 ] ); 45 | } else 46 | fprintf( stderr, "stdin" ); 47 | fprintf( stderr, " to " ); 48 | if ( argc > 2 ) { 49 | freopen( argv[ 2 ], "wb", stdout ); 50 | fprintf( stderr, "%s", argv[ 2 ] ); 51 | } else 52 | fprintf( stderr, "stdout" ); 53 | fprintf( stderr, "\n" ); 54 | #if !defined( unix ) 55 | setmode( fileno( stdin ), O_BINARY ); 56 | setmode( fileno( stdout ), O_BINARY ); 57 | #endif 58 | 59 | // Allocate 5(n+1) bytes memory for input string and output suffix array 60 | fseek(stdin, 0, SEEK_END); 61 | int n=ftell(stdin); 62 | if(n<=0) { 63 | fprintf(stderr, "Empty file, nothing to sort, exit!"); 64 | return 0; 65 | } 66 | 67 | n++; // count for the virtual sentinel 68 | fprintf(stderr, "Allocating input and output space: %d bytes = %.3lf MB", 5*n, (double)5*n/1024/1024); 69 | unsigned char *s_ch=new unsigned char[n]; 70 | int *SA = new int[n]; 71 | if(s_ch==NULL || SA==NULL) { 72 | delete [] s_ch; delete [] SA; 73 | fprintf(stderr, "\nInsufficient memory, exit!"); 74 | return 0; 75 | } 76 | 77 | // read the string into buffer. 78 | fprintf(stderr, "\nReading input string..."); 79 | fseek(stdin, 0, SEEK_SET ); 80 | fread((unsigned char *) s_ch, 1, n-1, stdin); 81 | s_ch[n-1]=0; // append the virtual sentinel 82 | 83 | clock_t start, finish; 84 | double duration; 85 | start = clock(); 86 | 87 | SA_DS(s_ch, SA, n, 255, n, 0); 88 | 89 | finish = clock(); 90 | duration = (double)(finish - start) / CLOCKS_PER_SEC; 91 | 92 | fprintf( stderr, "\nSize: %d bytes, Time: %5.4f seconds\n", n-1, duration); 93 | 94 | #ifdef _verify_sa 95 | fprintf( stderr, "\nVerifying the suffix array..."); 96 | fprintf( stderr, "\nSorted: %d", (int)isSorted((unsigned int *)SA, (unsigned char *)s_ch, (unsigned int)n)); 97 | #endif 98 | 99 | fprintf( stderr, "\nOutputing the suffix array..."); 100 | for(int i=1; i 4 | #include 5 | #include 6 | 7 | void SAIS(unsigned char *s, int *SA, int n, int K, int cs, int level); 8 | 9 | // uncomment the below line to verify the result SA 10 | //#define _verify_sa 11 | 12 | // output values: 13 | // 1: s1s2 16 | int sless(unsigned char *s1, unsigned char *s2, int n) { 17 | for(int i=0; i s2[i]) return -1; 20 | } 21 | return 0; 22 | } 23 | 24 | // test if SA is sorted for the input string s 25 | bool isSorted(int *SA, unsigned char *s, int n) { 26 | for(int i = 0; i < n-1; i++) { 27 | int d=SA[i]SA[i]) 30 | return 0; 31 | } 32 | return 1; 33 | } 34 | 35 | int main(int argc, char **argv) { 36 | fprintf( stderr, "\nComputing suffix array by SA-IS on " ); 37 | if ( argc > 1 ) { 38 | freopen( argv[ 1 ], "rb", stdin ); 39 | fprintf( stderr, "%s", argv[ 1 ] ); 40 | } else 41 | fprintf( stderr, "stdin" ); 42 | fprintf( stderr, " to " ); 43 | if ( argc > 2 ) { 44 | freopen( argv[ 2 ], "wb", stdout ); 45 | fprintf( stderr, "%s", argv[ 2 ] ); 46 | } else 47 | fprintf( stderr, "stdout" ); 48 | fprintf( stderr, "\n" ); 49 | #if !defined( unix ) 50 | setmode( fileno( stdin ), O_BINARY ); 51 | setmode( fileno( stdout ), O_BINARY ); 52 | #endif 53 | 54 | // Allocate 5 bytes memory for input string and output suffix array 55 | fseek(stdin, 0, SEEK_END); 56 | int n=ftell(stdin); 57 | if(n==0) { 58 | fprintf(stderr, "\nEmpty string, nothing to sort!"); 59 | return 0; 60 | } 61 | n++; // append the virtual sentinel 62 | fprintf(stderr, "Allocating input and output space: %ld bytes = %.2lf MB", 5*n, (double)5*n/1024/1024); 63 | unsigned char *s_ch=new unsigned char[n]; 64 | int *SA = new int[n]; 65 | if(s_ch==NULL || SA==NULL) { 66 | delete [] s_ch; delete [] SA; 67 | fprintf(stderr, "\nInsufficient memory, exit!"); 68 | return 0; 69 | } 70 | 71 | // read the string into buffer. 72 | fprintf(stderr, "\nReading input string..."); 73 | fseek(stdin, 0, SEEK_SET ); 74 | fread((unsigned char *) s_ch, 1, n-1, stdin); 75 | 76 | fprintf(stderr, "\nConstructing the suffix array..."); 77 | 78 | s_ch[n-1]=0; // set the virtual sentinel 79 | 80 | clock_t start, finish; 81 | double duration; 82 | start = clock(); 83 | 84 | SAIS(s_ch, SA, n, 256, sizeof(char), 0); 85 | 86 | finish = clock(); 87 | duration = (double)(finish - start) / CLOCKS_PER_SEC; 88 | 89 | fprintf(stderr, "\nSize: %d bytes, Time: %5.3f seconds\n", n-1, duration); 90 | 91 | #ifdef _verify_sa 92 | fprintf(stderr, "\nVerifying the suffix array..."); 93 | fprintf(stderr, "\nIsSorted: %d", (int)isSorted(SA+1, s_ch, n-1)); 94 | #endif 95 | 96 | /* 97 | fprintf(stderr, "\nOutputing the suffix array..."); 98 | for(unsigned int i=1; i 4 | #include 5 | #include 6 | 7 | void SACA_K(unsigned char *s, unsigned int *SA, unsigned int n, 8 | unsigned int K, unsigned int m, int level); 9 | 10 | // uncomment the below line to verify the result SA 11 | //#define _verify_sa 12 | 13 | // output values: 14 | // 1: s1s2 17 | int sless(unsigned char *s1, unsigned char *s2, unsigned int n) { 18 | for(unsigned int i=0; i s2[i]) return -1; 21 | } 22 | return 0; 23 | } 24 | 25 | // test if SA is sorted for the input string s 26 | bool isSorted(unsigned int *SA, unsigned char *s, unsigned int n) { 27 | for(unsigned int i = 0; i < n-1; i++) { 28 | unsigned int d=SA[i]SA[i]) 31 | return false; 32 | } 33 | return true; 34 | } 35 | 36 | int main(int argc, char **argv) { 37 | fprintf(stderr, "\nComputing suffix array by SACA-K on "); 38 | if (argc>1) { 39 | freopen(argv[1], "rb", stdin); 40 | fprintf(stderr, "%s", argv[1]); 41 | } else 42 | fprintf(stderr, "stdin"); 43 | fprintf(stderr, " to "); 44 | if (argc>2) { 45 | freopen(argv[2], "wb", stdout); 46 | fprintf(stderr, "%s", argv[2]); 47 | } else 48 | fprintf(stderr, "stdout"); 49 | fprintf(stderr, "\n"); 50 | #if !defined(unix) 51 | setmode(fileno(stdin), O_BINARY); 52 | setmode(fileno(stdout), O_BINARY); 53 | #endif 54 | 55 | // Allocate 5 bytes memory for input string and output suffix array 56 | fseek(stdin, 0, SEEK_END); 57 | unsigned int n=ftell(stdin); 58 | if(n==0) { 59 | fprintf(stderr, "\nEmpty string, nothing to sort!"); 60 | return 0; 61 | } 62 | else if(n+1==0) { 63 | fprintf(stderr, "\nCan not sort file of n>=%u bytes!", n); 64 | return 0; 65 | } 66 | 67 | n++; // append the virtual sentinel 68 | fprintf(stderr, "Allocating input and output space: %u bytes = %.2lf MB", 5*n, (double)5*n/1024/1024); 69 | unsigned char *s_ch=new unsigned char[n]; 70 | unsigned int *SA = new unsigned int[n]; 71 | if(s_ch==NULL || SA==NULL) { 72 | delete [] s_ch; delete [] SA; 73 | fprintf(stderr, "\nInsufficient memory, exit!"); 74 | return 0; 75 | } 76 | 77 | // read the string into buffer. 78 | fprintf(stderr, "\nReading input string..."); 79 | fseek(stdin, 0, SEEK_SET ); 80 | fread((unsigned char *) s_ch, 1, n-1, stdin); 81 | // set the virtual sentinel 82 | s_ch[n-1]=0; 83 | 84 | clock_t start, finish; 85 | double duration; 86 | start = clock(); 87 | 88 | fprintf(stderr, "\nConstructing the suffix array..."); 89 | SACA_K(s_ch, SA, n, 256, n, 0); 90 | 91 | finish = clock(); 92 | duration = (double)(finish - start) / CLOCKS_PER_SEC; 93 | 94 | fprintf(stderr, "\nSize: %u bytes, Time: %5.3f seconds\n", n-1, duration); 95 | 96 | #ifdef _verify_sa 97 | fprintf(stderr, "\nVerifying the suffix array..."); 98 | fprintf(stderr, "\nSorted: %d", (int)isSorted(SA+1, s_ch, n-1)); 99 | #endif 100 | 101 | /* 102 | fprintf(stderr, "\nOutputing the suffix array..."); 103 | for(unsigned int i=1; i 16 | -------------------------------------------------------------------------------- /sais-lite/sais.h: -------------------------------------------------------------------------------- 1 | /* 2 | * sais.h for sais-lite 3 | * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _SAIS_H 28 | #define _SAIS_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | /* find the suffix array SA of T[0..n-1] 35 | use a working space (excluding T and SA) of at most 2n+O(lg n) */ 36 | int 37 | sais(const unsigned char *T, int *SA, int n); 38 | 39 | /* find the suffix array SA of T[0..n-1] in {0..k-1}^n 40 | use a working space (excluding T and SA) of at most MAX(4k,2n) */ 41 | int 42 | sais_int(const int *T, int *SA, int n, int k); 43 | 44 | /* burrows-wheeler transform */ 45 | int 46 | sais_bwt(const unsigned char *T, unsigned char *U, int *A, int n); 47 | int 48 | sais_int_bwt(const int *T, int *U, int *A, int n, int k); 49 | 50 | 51 | #ifdef __cplusplus 52 | } /* extern "C" */ 53 | #endif /* __cplusplus */ 54 | 55 | #endif /* _SAIS_H */ 56 | --------------------------------------------------------------------------------