├── .github └── workflows │ └── build.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── external ├── adaptive.c ├── burstsortA.c ├── burstsortL.c ├── cradix-improved.c ├── cradix.c ├── forward16.c ├── forward8.c ├── lcp-quicksort.cpp ├── mbmradix.c ├── mkqsort.c ├── msd.c ├── multikey.c ├── nilsson.h ├── parallel_string_radix_sort.cpp ├── parallel_string_radix_sort.h ├── quicksort.c ├── utils.c └── utils.h ├── report ├── Makefile ├── README ├── benchmark ├── collect-memusage-statistics ├── collect-oprofile-statistics ├── generate-algs-xml ├── htmlreport.xsl ├── oprofile-simplify.xsl ├── process-memusage-statistics ├── process-oprofile-statistics └── sortable.js ├── src ├── burstsort.cpp ├── burstsort2.cpp ├── burstsort_mkq.cpp ├── funnelsort.cpp ├── losertree.h ├── mergesort.cpp ├── mergesort_lcp.cpp ├── mergesort_losertree.cpp ├── mergesort_unstable.cpp ├── msd_a.cpp ├── msd_a2.cpp ├── msd_ce.cpp ├── msd_ci.cpp ├── msd_dyn_block.cpp ├── msd_dyn_vector.cpp ├── msd_lsd.cpp ├── multikey_block.cpp ├── multikey_cache.cpp ├── multikey_dynamic.cpp ├── multikey_multipivot.cpp ├── multikey_simd.cpp ├── routine.h ├── routines.c ├── routines.h ├── sortstring.c ├── util │ ├── cpus_allowed.c │ ├── cpus_allowed.h │ ├── debug.h │ ├── get_char.h │ ├── insertion_sort.h │ ├── median.h │ ├── sdt.h │ ├── timing.c │ ├── timing.h │ ├── vmainfo.c │ └── vmainfo.h ├── vector_bagwell.h ├── vector_block.h ├── vector_brodnik.h ├── vector_malloc.h └── vector_realloc.h └── unit-test └── main.cpp /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build and unit test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | name: ${{ matrix.build_type }} GCC on ${{ matrix.os }} 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | build_type: [Release, Debug] 13 | os: [ubuntu-20.04, ubuntu-18.04] 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Install dependencies 17 | run: sudo apt-get install cmake 18 | - name: cmake 19 | run: cmake -B builddir -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} 20 | - name: make 21 | run: make -j $(nproc) -C builddir 22 | - name: unit test 23 | run: ./builddir/unit-test 24 | build-llvm: 25 | name: ${{ matrix.build_type }} Clang on ${{ matrix.os }} 26 | runs-on: ${{ matrix.os }} 27 | strategy: 28 | fail-fast: false 29 | matrix: 30 | build_type: [Release, Debug] 31 | os: [ubuntu-20.04] 32 | steps: 33 | - uses: actions/checkout@v2 34 | - name: Install dependencies 35 | run: sudo apt-get install cmake clang systemtap-sdt-dev 36 | - name: cmake 37 | run: CC=clang CXX=clang++ cmake -B builddir -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} 38 | - name: make 39 | run: make -j $(nproc) -C builddir 40 | - name: unit test 41 | run: ./builddir/unit-test 42 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | set(CMAKE_CXX_STANDARD 11) 3 | include(CheckIncludeFile) 4 | 5 | project(sortstring) 6 | include_directories(src src/util) 7 | 8 | link_libraries(rt) 9 | 10 | set(INTERNAL_SRCS 11 | src/funnelsort.cpp 12 | src/msd_a.cpp 13 | src/msd_a2.cpp 14 | src/msd_lsd.cpp 15 | src/msd_ce.cpp 16 | src/msd_ci.cpp 17 | src/msd_dyn_block.cpp 18 | src/msd_dyn_vector.cpp 19 | src/burstsort.cpp 20 | src/burstsort2.cpp 21 | src/burstsort_mkq.cpp 22 | src/multikey_simd.cpp 23 | src/multikey_dynamic.cpp 24 | src/multikey_block.cpp 25 | src/multikey_multipivot.cpp 26 | src/multikey_cache.cpp 27 | src/mergesort.cpp 28 | src/mergesort_unstable.cpp 29 | src/mergesort_losertree.cpp 30 | src/mergesort_lcp.cpp 31 | src/routines.c 32 | src/util/timing.c 33 | src/util/cpus_allowed.c 34 | src/util/vmainfo.c) 35 | 36 | set(EXTERNAL_SRCS 37 | external/lcp-quicksort.cpp 38 | external/mbmradix.c 39 | external/quicksort.c 40 | external/mkqsort.c 41 | external/forward8.c 42 | external/cradix.c 43 | external/cradix-improved.c 44 | external/msd.c 45 | external/multikey.c 46 | external/burstsortL.c 47 | external/utils.c 48 | external/adaptive.c 49 | external/burstsortA.c 50 | external/forward16.c 51 | external/parallel_string_radix_sort.cpp) 52 | 53 | check_include_file(sys/sdt.h HAVE_SYS_SDT_H) 54 | if(HAVE_SYS_SDT_H) 55 | add_definitions(-DHAVE_SYS_SDT_H=1) 56 | endif() 57 | 58 | set_source_files_properties(external/adaptive.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare) 59 | set_source_files_properties(external/quicksort.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare) 60 | 61 | add_executable(sortstring src/sortstring.c ${INTERNAL_SRCS} ${EXTERNAL_SRCS}) 62 | 63 | add_executable(unit-test unit-test/main.cpp ${INTERNAL_SRCS} ${EXTERNAL_SRCS}) 64 | target_compile_definitions(unit-test PUBLIC UNIT_TEST) 65 | 66 | add_definitions(-Drestrict=__restrict__) 67 | set(CMAKE_CXX_FLAGS_RELEASE "-fopenmp -g -DNDEBUG -march=native ${CMAKE_CXX_FLAGS_RELEASE}") 68 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-fopenmp -g -DNDEBUG -march=native ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") 69 | set(CMAKE_C_FLAGS_RELEASE "-fopenmp -g -DNDEBUG -march=native ${CMAKE_C_FLAGS_RELEASE}") 70 | set(CMAKE_C_FLAGS_RELWITHDEBINFO "-fopenmp -g -DNDEBUG -march=native ${CMAKE_C_FLAGS_RELWITHDEBINFO}") 71 | set(CMAKE_CXX_FLAGS "-Wall -Wextra ${CMAKE_CXX_FLAGS}") 72 | set(CMAKE_C_FLAGS "-Wall -Wextra -std=c99 ${CMAKE_C_FLAGS}") 73 | 74 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O1 -g -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2") 75 | set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O1 -g -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2") 76 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any person obtaining a copy 2 | of this software and associated documentation files (the "Software"), to deal 3 | in the Software without restriction, including without limitation the rights 4 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 5 | copies of the Software, and to permit persons to whom the Software is 6 | furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in 9 | all copies or substantial portions of the Software. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A collection of string sorting algorithm implementations 2 | ======================================================== 3 | 4 | This collection features several string sorting algorithm implementations, that 5 | have been tuned to take better advantage of modern hardware. Classic 6 | implementations tend to optimize instruction counts, but when sorting large 7 | collections of strings, we also need to focus on memory issues. All algorithms 8 | are implemented using C and C++. 9 | 10 | Technical details: 11 | * All of the implementations sort the strings by raw byte values. This 12 | means that they are mainly intended for research use. 13 | * Includes several variants of known and efficient (string) sorting 14 | algorithms, such as MSD radix sort, burstsort and multi-key-quicksort. 15 | * Emphasis on reducing cache misses and memory stalls. 16 | * Includes the tools to create a HTML report, that can be 17 | used to compare the provided implementations. The report includes details 18 | such as TLB, L1 and L2 cache misses, run times and memory peak usage. 19 | * Supports Linux huge pages. For more information, see below. 20 | 21 | 22 | License 23 | ------- 24 | 25 | MIT. 26 | 27 | Exception: The directory `external` contains files, that are included for 28 | reference purposes, that may or may not be compatible with the MIT license. 29 | 30 | 31 | Copyright 32 | --------- 33 | 34 | Copyright © 2007-2012 by Tommi Rantala 35 | 36 | The directory `external` contains files, that are included for reference 37 | purposes, and are copyright by their respective authors. 38 | 39 | 40 | Requirements 41 | ------------ 42 | 43 | * C++11 44 | * CMake 45 | 46 | 47 | Compilation 48 | ----------- 49 | 50 | Default compilation with GCC: 51 | 52 | $ git clone git://github.com/rantala/string-sorting.git 53 | $ mkdir string-sorting-build 54 | $ cd string-sorting-build 55 | $ cmake -DCMAKE_BUILD_TYPE=Release ../string-sorting 56 | $ make 57 | $ ./sortstring 58 | 59 | Use a separate debug build for easier debugging: 60 | 61 | $ mkdir debug-build 62 | $ cd debug-build 63 | $ cmake -DCMAKE_BUILD_TYPE=Debug ../string-sorting 64 | 65 | 66 | Huge pages 67 | ---------- 68 | 69 | The default page size on many computer architectures is 4 kilobytes. When 70 | working with large data sets, this means that the input is spread to thousands 71 | of memory pages. Unfortunately random access in thousands of pages can be slow 72 | (see e.g. http://en.wikipedia.org/wiki/Translation_lookaside_buffer). 73 | 74 | To alleviate this exact problem, many architectures have support for larger 75 | page size. For example modern x86 has support for 2/4 megabyte "huge pages". 76 | With such large pages, even large data sets fit into a much smaller amount of 77 | memory pages. 78 | 79 | In this program, support for huge pages is enabled using either --hugetlb-text 80 | or --hugetlb-ptrs, or both. The former option places the input data (i.e. the 81 | actual strings from the given file) into huge pages, and the latter option 82 | places the string pointer array into huge pages. Using huge pages in Linux 83 | requires CPU support, and properly adjusted kernel settings. 84 | 85 | The external library libhugetlbfs (https://github.com/libhugetlbfs/libhugetlbfs) 86 | can be used to replace all calls to malloc to use huge pages. If this library is 87 | used, the aforementioned options are not needed. 88 | 89 | 90 | HTML report creation 91 | -------------------- 92 | 93 | Requirements: 94 | * OProfile for most measurements, probably also requires root privileges. 95 | - The default settings use Intel Core 2 specific events. When profiling on 96 | other platforms, you will most likely need to modify the scripts in the 97 | report/ directory. 98 | * /usr/bin/memusage for measuring the memory peak usage. This is a GNU libc 99 | utility. 100 | -------------------------------------------------------------------------------- /external/burstsortL.c: -------------------------------------------------------------------------------- 1 | /* 2 | This is an implementation of Burstsort using linked lists for buckets. A 3 | more complete discussion of the algorithm, the implementation and a 4 | comparison with other well known sorting algorithms, both radix sorting and 5 | comparison-based methods, can be found in: 6 | 7 | R. Sinha and J. Zobel, "Cache-Conscious Sorting of Large Sets of Strings 8 | with Dynamic Tries", In Proc. 5th Workshop Algorithm Engineering and 9 | Experiments (ALENEX), R. Ladner (ed), Baltimore, Maryland, USA, January 10 | 2003. 11 | 12 | R. Sinha and J. Zobel, "Efficient Trie-based Sorting of Large Sets of 13 | Strings", In Proc. Australasian Computer Science Conference, M. Oudshoorn 14 | (ed), Adelaide, Australia, February, 2003. 15 | 16 | The code presented in this file has been tested with care but is not 17 | guaranteed for any purpose. The writer does not offer any warranties nor 18 | does he accept any liabilities with respect to the code. 19 | 20 | Ranjan Sinha, 28 July 2003. 21 | 22 | School of Computer Science and Information Technology, RMIT University, 23 | Melbourne, Australia rsinha@cs.rmit.edu.au 24 | 25 | note: 26 | 1. It is a work in progress 27 | 2. Not tuned for number of instructions, use the highest optimizations such as O3 28 | 3. Any relevant changes to code may please be intimated to me 29 | 4. It is solely meant for academic use 30 | */ 31 | 32 | #include "routine.h" 33 | #include "utils.h" 34 | #include 35 | 36 | #define THRESHOLD 8192 37 | #define ALPHABET 256 38 | 39 | typedef struct trierec 40 | { 41 | struct trierec *ptrs[ALPHABET]; 42 | int counts[ALPHABET]; 43 | } TRIE; 44 | 45 | typedef struct strlistrec 46 | { 47 | unsigned char *word; 48 | struct strlistrec *next; 49 | } LIST; 50 | 51 | static void 52 | burstinsertL(TRIE *root, LIST *list, size_t scnt) 53 | { 54 | TRIE *new_; 55 | TRIE *curr; 56 | LIST *node; 57 | LIST *lp, *np; 58 | unsigned int i, p; 59 | unsigned char c, cc; 60 | 61 | for( i=0 ; icounts[c]<0 ; curr=curr->ptrs[c], p++, c=list[i].word[p] ) 67 | ; 68 | 69 | node->next = (LIST *) curr->ptrs[c]; 70 | curr->ptrs[c] = (TRIE *) node; 71 | 72 | if( c=='\0' ) 73 | { 74 | ; /* leave counter alone to avoid overflow, no burst */ 75 | } 76 | else 77 | { 78 | curr->counts[c]++; 79 | if( curr->counts[c]>THRESHOLD ) /* burst */ 80 | { 81 | curr->counts[c] = -1; 82 | p++; 83 | new_ = (TRIE *) calloc(1, sizeof(TRIE)); 84 | 85 | lp = (LIST *) curr->ptrs[c], cc = lp->word[p], np = lp->next; 86 | while( lp!=NULL ) 87 | { 88 | lp->next = (LIST *) new_->ptrs[cc]; 89 | new_->ptrs[cc] = (TRIE *) lp; 90 | new_->counts[cc] ++; 91 | lp = np; 92 | if( lp!=NULL ) 93 | { 94 | cc = lp->word[p]; 95 | np = lp->next; 96 | } 97 | } 98 | curr->ptrs[c] = new_; 99 | curr->counts[c] = -1; /* used to traverse along the trie hierarchy */ 100 | curr = new_; /* used to burst recursive, so point curr to new */ 101 | c = cc; /* point to the character that the last string was inserted into */ 102 | } 103 | } 104 | } 105 | } 106 | 107 | static int 108 | bursttraverseL(TRIE *node, unsigned char **strings, int pos, int deep) 109 | { 110 | LIST *l; 111 | unsigned int i, off; 112 | unsigned int sizeOfContainer = 0; 113 | 114 | for( i=0 ; icounts[i]<0 ) 117 | { 118 | pos = bursttraverseL(node->ptrs[i], strings, pos, deep+1); 119 | } 120 | else 121 | { 122 | for( off=pos, l=(LIST *) node->ptrs[i] ; l!=NULL ; off++, l=l->next ) 123 | { 124 | strings[off] = l->word; 125 | } 126 | sizeOfContainer = (off - pos); 127 | 128 | if( i>0 && sizeOfContainer > 1 ) 129 | { 130 | if (sizeOfContainer < INSERTBREAK) 131 | inssort( strings+pos, off-pos, deep + 1); 132 | else 133 | mkqsort( strings+pos, off-pos, deep + 1); 134 | } 135 | pos = off; 136 | } 137 | } 138 | free(node); 139 | return pos; 140 | } 141 | 142 | void 143 | burstsortL(unsigned char *strings[], size_t scnt) 144 | { 145 | TRIE *root; 146 | LIST *listnodes; 147 | unsigned int i; 148 | 149 | listnodes = (LIST *) calloc(scnt, sizeof(LIST)); 150 | 151 | for( i=scnt; i-- ;) 152 | listnodes[i].word = strings[i]; 153 | 154 | root = (TRIE *) calloc(1, sizeof(TRIE)); 155 | 156 | (void) burstinsertL(root, listnodes, scnt); 157 | 158 | (void) bursttraverseL(root, strings, 0, 0); 159 | 160 | free(listnodes); 161 | 162 | return; 163 | } 164 | ROUTINE_REGISTER_SINGLECORE(burstsortL, 165 | "Burstsort with List buckets by R. Sinha and J. Zobel") 166 | -------------------------------------------------------------------------------- /external/cradix-improved.c: -------------------------------------------------------------------------------- 1 | /* This source code is from the following article: 2 | * 3 | * @article{1226858, 4 | * author = {Waihong Ng and Katsuhiko Kakehi}, 5 | * title = {Cache Efficient Radix Sort for String Sorting}, 6 | * journal = {IEICE Trans. Fundam. Electron. Commun. Comput. Sci.}, 7 | * volume = {E90-A}, 8 | * number = {2}, 9 | * year = {2007}, 10 | * issn = {0916-8508}, 11 | * pages = {457--466}, 12 | * doi = {http://dx.doi.org/10.1093/ietfec/e90-a.2.457}, 13 | * publisher = {Oxford University Press}, 14 | * address = {Oxford, UK}, 15 | * } 16 | * 17 | * Appendix: Source Code of CRadix Sort 18 | */ 19 | 20 | /* 21 | This code is based on the program 3.1 of Engineering radix sort by P.M. 22 | McIlroy, K. Bostic and M.D. McIlroy, Comput. Syst. vol.6, 1993. 23 | 24 | The main improvement is the adoption of the key buffer. Key buffers are 25 | filled by the function FillKeyBuffer() while the following code in the main 26 | body is responsible for permuting the key buffers in the order as same as the 27 | access order as the key pointers 28 | 29 | memcpy(ta, tk, sizeof(unsigned char)*n*kbsd); 30 | for (i=0, kb=(LPBYTE)ta; i 54 | * - replace original isort() with slightly different insertion_sort() 55 | * - re-implement FillKeyBuffer() to reduce memory stalls 56 | * - use caching in RDFK() to reduce memory stalls 57 | */ 58 | 59 | #include "routine.h" 60 | #include 61 | #include 62 | 63 | #define AS 256 /* Alphabet size */ 64 | #define BS 4 /* key buffer size */ 65 | #define AL 0 /* Alphabet lower bound */ 66 | #define AH 255 /* Alphabet upper bound */ 67 | #define IC 20 /* Insertion sort cut off */ 68 | #define KBC 128 /* Cache cut off */ 69 | #define SS 4096 /* stack size */ 70 | 71 | #define push(a, k, n, b) _sp->sa=a, _sp->sk=k, _sp->sn=n, (_sp++)->sb=b 72 | #define pop(a, k, n, b) a=(--_sp)->sa, k=_sp->sk, n=_sp->sn, b=_sp->sb 73 | #define stackempty() (_sp<=stack) 74 | #define splittable(c) c > 0 && count[c] > IC 75 | typedef size_t UINT; 76 | typedef unsigned char BYTE, *LPBYTE, **LPPBYTE; 77 | typedef unsigned char STR, *LPSTR, **LPPSTR, **STRPARR; 78 | 79 | static struct Stack { 80 | LPSTR* sa; LPBYTE sk; 81 | int sn, sb; 82 | } stack[SS], *_sp=stack; 83 | 84 | static void 85 | insertion_sort(unsigned char** strings, int n, size_t depth) 86 | { 87 | for (unsigned char** i = strings + 1; --n > 0; ++i) { 88 | unsigned char** j = i; 89 | unsigned char* tmp = *i; 90 | while (j > strings) { 91 | unsigned char* s = *(j-1)+depth; 92 | unsigned char* t = tmp+depth; 93 | while (*s == *t && *s) { 94 | ++s; 95 | ++t; 96 | } 97 | if (*s <= *t) break; 98 | *j = *(j-1); 99 | --j; 100 | } 101 | *j = tmp; 102 | } 103 | } 104 | 105 | static 106 | void FillKeyBuffer(LPPSTR a, LPBYTE kb, UINT* count, UINT n, UINT d) 107 | { 108 | for (size_t i=0; i1 && c>0) insertion_sort(a, n, d); 140 | count[c]=0; return; 141 | } 142 | GrpKP[AL]=a; 143 | for (ak=a, i=AL; i1 && i>0) insertion_sort(ak, count[i], d); 151 | ak+=count[i]; count[i]=0; 152 | } 153 | } 154 | 155 | void cradix_rantala(LPPSTR a, UINT n) 156 | { 157 | UINT kbsd, kbsd1, i, j, stage, d, MEMSIZE; 158 | UINT *cptr, gs, count[AS]; 159 | LPSTR tj, tk, ax, tl, kb, ss, tt, GrpKB[AS]; 160 | LPPSTR GrpKP[AS], ak, ta, tc, t; 161 | if (sizeof(LPPSTR)>sizeof(unsigned char)*BS) 162 | MEMSIZE=sizeof(LPPSTR); 163 | else 164 | MEMSIZE=sizeof(unsigned char)*BS; 165 | /* workspace */ 166 | ta = (LPPSTR)malloc(n * MEMSIZE); 167 | /* memory for key buffers */ 168 | tk = (LPBYTE)malloc(n * sizeof(unsigned char) * BS); 169 | tj=tk; 170 | push(a, tk, n, 0); for (i=AL; iKBC) 181 | FillKeyBuffer(a, tk, count, n, stage); 182 | else { 183 | RDFK(GrpKP, a, n, ta, count, stage); 184 | continue; 185 | } 186 | } 187 | /* check if there is only 1 group */ 188 | cptr=&count[AL]; 189 | while (*cptr<1) cptr++; 190 | if (*cptr1 && i>0) 219 | insertion_sort(ak, count[i], stage); 220 | ak+=count[i]; ax+=count[i]*(kbsd1); 221 | count[i]=0; 222 | } 223 | } 224 | else RDFK(GrpKP, a, n, ta, count, stage); 225 | } 226 | free((void*)ta); 227 | free((void*)tj); 228 | } 229 | ROUTINE_REGISTER_SINGLECORE(cradix_rantala, 230 | "CRadix by Waihong Ng and Katsuhiko Kakehi," 231 | " with modifications by Tommi Rantala") 232 | -------------------------------------------------------------------------------- /external/cradix.c: -------------------------------------------------------------------------------- 1 | /* This source code is from the following article: 2 | * 3 | * @article{1226858, 4 | * author = {Waihong Ng and Katsuhiko Kakehi}, 5 | * title = {Cache Efficient Radix Sort for String Sorting}, 6 | * journal = {IEICE Trans. Fundam. Electron. Commun. Comput. Sci.}, 7 | * volume = {E90-A}, 8 | * number = {2}, 9 | * year = {2007}, 10 | * issn = {0916-8508}, 11 | * pages = {457--466}, 12 | * doi = {http://dx.doi.org/10.1093/ietfec/e90-a.2.457}, 13 | * publisher = {Oxford University Press}, 14 | * address = {Oxford, UK}, 15 | * } 16 | * 17 | * Appendix: Source Code of CRadix Sort 18 | */ 19 | 20 | /* 21 | This code is based on the program 3.1 of Engineering radix sort by P.M. 22 | McIlroy, K. Bostic and M.D. McIlroy, Comput. Syst. vol.6, 1993. 23 | 24 | The main improvement is the adoption of the key buffer. Key buffers are 25 | filled by the function FillKeyBuffer() while the following code in the main 26 | body is responsible for permuting the key buffers in the order as same as the 27 | access order as the key pointers 28 | 29 | memcpy(ta, tk, sizeof(unsigned char)*n*kbsd); 30 | for (i=0, kb=(LPBYTE)ta; i 57 | #include 58 | 59 | #define AS 256 /* Alphabet size */ 60 | #define BS 4 /* key buffer size */ 61 | #define AL 0 /* Alphabet lower bound */ 62 | #define AH 255 /* Alphabet upper bound */ 63 | #define IC 20 /* Insertion sort cut off */ 64 | #define KBC 128 /* Cache cut off */ 65 | #define SS 4096 /* stack size */ 66 | 67 | #define push(a, k, n, b) sp->sa=a, sp->sk=k, sp->sn=n, (sp++)->sb=b 68 | #define pop(a, k, n, b) a=(--sp)->sa, k=sp->sk, n=sp->sn, b=sp->sb 69 | #define stackempty() (sp<=stack) 70 | #define splittable(c) c > 0 && count[c] > IC 71 | typedef size_t UINT; 72 | typedef unsigned char BYTE, *LPBYTE, **LPPBYTE; 73 | typedef unsigned char STR, *LPSTR, **LPPSTR, **STRPARR; 74 | 75 | static struct Stack { 76 | LPSTR* sa; LPBYTE sk; 77 | int sn, sb; 78 | } stack[SS], *sp=stack; 79 | 80 | static 81 | void FillKeyBuffer(LPPSTR a, LPBYTE kb, UINT* count, UINT n, UINT d) 82 | { 83 | UINT i, j; LPSTR c, x; 84 | for (i=0; i 0; pi++) 96 | for (pj = pi; pj > a; pj--) { 97 | for (s=*(pj-1)+d, t=*pj+d; 98 | *s==*t && *s!=0; s++, t++) ; 99 | if (*s <= *t) break; 100 | t = *(pj); *(pj) = *(pj-1); 101 | *(pj-1) = t; 102 | } 103 | } 104 | static 105 | void RDFK(LPPSTR* GrpKP, LPPSTR a, UINT n, LPPSTR ta, 106 | UINT* count, UINT d) 107 | { /* Read Directly From Keys */ 108 | LPPSTR ak, tc; UINT i, *cptr, gs; unsigned char c=0; 109 | for (i=0; i1 && c>0) isort(a, n, d); 116 | count[c]=0; return; 117 | } 118 | GrpKP[AL]=a; 119 | for (ak=a, i=AL; i1 && i>0) isort(ak, count[i], d); 127 | ak+=count[i]; count[i]=0; 128 | } 129 | } 130 | void CRadix(LPPSTR a, UINT n) 131 | { 132 | UINT kbsd, kbsd1, i, j, stage, d, MEMSIZE; 133 | UINT *cptr, gs, count[AS]; 134 | LPSTR tj, tk, ax, tl, kb, ss, tt, GrpKB[AS]; 135 | LPPSTR GrpKP[AS], ak, ta, tc, t; 136 | if (sizeof(LPPSTR)>sizeof(unsigned char)*BS) 137 | MEMSIZE=sizeof(LPPSTR); 138 | else 139 | MEMSIZE=sizeof(unsigned char)*BS; 140 | /* workspace */ 141 | ta = (LPPSTR)malloc(n * MEMSIZE); 142 | /* memory for key buffers */ 143 | tk = (LPBYTE)malloc(n * sizeof(unsigned char) * BS); 144 | tj=tk; 145 | push(a, tk, n, 0); for (i=AL; iKBC) 156 | FillKeyBuffer(a, tk, count, n, stage); 157 | else { 158 | RDFK(GrpKP, a, n, ta, count, stage); 159 | continue; 160 | } 161 | } 162 | /* check if there is only 1 group */ 163 | cptr=&count[AL]; 164 | while (*cptr<1) cptr++; 165 | if (*cptr1 && i>0) 194 | isort(ak, count[i], stage); 195 | ak+=count[i]; ax+=count[i]*(kbsd1); 196 | count[i]=0; 197 | } 198 | } 199 | else RDFK(GrpKP, a, n, ta, count, stage); 200 | } 201 | free((void*)ta); 202 | free((void*)tj); 203 | } 204 | 205 | void cradix(unsigned char **strings, size_t n) 206 | { 207 | return CRadix(strings, n); 208 | } 209 | ROUTINE_REGISTER_SINGLECORE(cradix, 210 | "CRadix by Waihong Ng and Katsuhiko Kakehi") 211 | -------------------------------------------------------------------------------- /external/forward8.c: -------------------------------------------------------------------------------- 1 | /* 2 | Forward radixsort with a fixed sized alphabet. The algorithm 3 | inspects one character at a time. This code will work well for 4 | alphabets of small size (8 bits). Larger alphabets (16 bits or 5 | more) may, however, require some heuristic to avoid inspecting 6 | empty buckets. 7 | 8 | S. Nilsson. Radix Sorting and Searching. PhD thesis, Department 9 | of Computer Science, Lund University, 1990. 10 | 11 | The code presented in this file has been tested with care but is 12 | not guaranteed for any purpose. The writer does not offer any 13 | warranties nor does he accept any liabilities with respect to 14 | the code. 15 | 16 | Stefan Nilsson, 8 jan 1997. 17 | 18 | Laboratory of Information Processing Science 19 | Helsinki University of Technology 20 | Stefan.Nilsson@hut.fi 21 | */ 22 | 23 | #include "routine.h" 24 | #include "nilsson.h" 25 | #include 26 | 27 | #define IS_ENDMARK(ch) (ch == '\0') 28 | #define CHAR(s, p) s[p] 29 | 30 | typedef struct grouprec *group; 31 | typedef struct bucketrec *bucket; 32 | 33 | struct grouprec { 34 | list head, tail; /* a list of elements */ 35 | group next; /* the next group */ 36 | group nextunf; /* the next unfinished group */ 37 | group insp; /* insertion point */ 38 | boolean finis; /* is the group finished? */ 39 | }; 40 | /* The group structure member insp is used to make splitting of 41 | groups possible during the phase where elements are moved from 42 | buckets back into their previous groups. The group structure 43 | member finis indicates if the elements in the group are sorted; 44 | this information makes it easy to skip finished groups during a 45 | traversal of the group data structure */ 46 | 47 | struct bucketrec { 48 | list head, tail; /* a list of elements */ 49 | int size; /* list length */ 50 | group tag; /* group tag */ 51 | bucket next; /* next bucket item */ 52 | }; 53 | 54 | static memory groupmem[1]; 55 | static memory bucketmem[1]; 56 | 57 | /* Put a list of elements into a bucket. We distinguish between two 58 | cases. If the first bucket item has the same tag as the list to 59 | be inserted the list is just appended, otherwise a new bucket 60 | is created. */ 61 | static void intobucket(bucket *b, list head, list tail, 62 | int size, group g) 63 | { 64 | bucket btemp = *b, newb; 65 | 66 | if (!btemp || btemp->tag != g) { /* create new tag */ 67 | newb = (bucket) allocmem(bucketmem, sizeof(struct bucketrec)); 68 | newb->next = btemp; 69 | newb->head = head; 70 | newb->size = size; 71 | newb->tag = g; 72 | *b = btemp = newb; 73 | } else { /* append */ 74 | btemp->tail->next = head; 75 | btemp->size += size; 76 | } 77 | tail->next = NULL; 78 | btemp->tail = tail; 79 | } 80 | 81 | /* Travers the groups and put the elements into buckets. 82 | The parameter pos indicates the current position in the string. 83 | To be able to skip groups that are already sorted we keep track 84 | of the previous group. Also, the previously read character is 85 | recorded. In this way it is possible to move the elements in 86 | blocks consisting of strings that have a common character in 87 | position pos. Furthermore, a group that is not split during this 88 | phase is left behind and not put into a bucket. */ 89 | static void intobuckets(group g, bucket b[], int pos) 90 | { 91 | group prevg; 92 | character ch, prevch; 93 | boolean split; 94 | list tail, tailn; 95 | int size; 96 | 97 | resetmem(bucketmem); 98 | for (prevg = g, g = g->nextunf ; g; g = g->nextunf) { 99 | if (g->finis) 100 | {prevg->nextunf = g->nextunf; continue;} 101 | tail = g->head; split = FALSE; 102 | prevch = CHAR(tail->str, pos); size = 1; 103 | for ( ; (tailn = tail->next); tail = tailn) { 104 | ch = CHAR(tailn->str, pos); size++; 105 | if (ch == prevch) continue; 106 | intobucket(b+prevch, g->head, tail, size-1, g); 107 | g->head = tailn; split = TRUE; 108 | prevch = ch; size = 1; 109 | } 110 | if (split) { 111 | intobucket(b+prevch, g->head, tail, size, g); 112 | g->head = NULL; 113 | prevg = g; 114 | } else if (IS_ENDMARK(prevch)) 115 | prevg->nextunf = g->nextunf; 116 | else 117 | prevg = g; 118 | } 119 | } 120 | 121 | /* Put a list into group g and, at the same time, split g. 122 | If two consecutive groups are both finished, there is no need 123 | to perform any splitting. */ 124 | static void intogroup(group g, list head, list tail, boolean finis) 125 | { 126 | group newg; 127 | 128 | if (!g->head) { /* back into old group */ 129 | g->head = head; 130 | g->tail = tail; 131 | g->finis = finis; 132 | g->insp = g; 133 | } else if (finis && g->insp->finis) { /* don't split if both */ 134 | g->insp->tail->next = head; /* groups are finished */ 135 | g->insp->tail = tail; 136 | } 137 | else { /* split */ 138 | newg = (group) allocmem(groupmem, sizeof(struct grouprec)); 139 | newg->head = head; 140 | newg->tail = tail; 141 | newg->next = g->insp->next; 142 | newg->nextunf = g->insp->nextunf; 143 | newg->finis = finis; 144 | g->insp = g->insp->nextunf = g->insp->next = newg; 145 | } 146 | } 147 | 148 | /* Traverse the buckets and put the elements back into their groups. 149 | Split the groups and mark all finished groups. 150 | The elements are moved in blocks. */ 151 | static void intogroups(bucket b[], int pos) 152 | { 153 | character ch; 154 | bucket s; 155 | boolean finis; 156 | 157 | for (ch = 0; ch < CHARS; ch++) { 158 | if (!b[ch]) continue; 159 | for (s = b[ch]; s; s = s->next) { 160 | finis = IS_ENDMARK(ch); 161 | if (s->size < INSERTBREAK && !finis) { 162 | if (s->size > 1) 163 | s->head = ListInsertsort(s->head, &s->tail, pos); 164 | finis = TRUE; 165 | } 166 | intogroup(s->tag, s->head, s->tail, finis); 167 | } 168 | b[ch] = NULL; 169 | } 170 | } 171 | 172 | /* Travers the groups and return the elements in sorted order. */ 173 | static list collect(group g) 174 | { 175 | list head, tail; 176 | 177 | g = g->next; 178 | head = g->head; 179 | tail = g->tail; 180 | for (g = g->next; g; g = g->next) { 181 | tail->next = g->head; 182 | tail = g->tail; 183 | } 184 | return head; 185 | } 186 | 187 | static inline list forward1(list t, int n) 188 | { 189 | static bucket b[CHARS]; /* buckets */ 190 | group g, g2; /* groups */ 191 | int pos = 0; /* pos in string */ 192 | 193 | if (n<2) return t; 194 | 195 | initmem(groupmem, sizeof(struct grouprec), n/15); 196 | initmem(bucketmem, sizeof(struct bucketrec), n/5); 197 | 198 | /* We use a dummy group g as the header of the group data 199 | structure. It does not contain any elements, but only a 200 | pointer to the first unfinished group. */ 201 | g = (group) allocmem(groupmem, sizeof(struct grouprec)); 202 | g2 = (group) allocmem(groupmem, sizeof(struct grouprec)); 203 | g->next = g->nextunf = g2; 204 | g2->head = t; 205 | g2->next = g2->nextunf = NULL; 206 | g2->finis = FALSE; 207 | 208 | intobuckets(g, b, pos); 209 | while (g->nextunf) { 210 | pos++; 211 | intogroups(b, pos); 212 | intobuckets(g, b, pos); 213 | } 214 | t = collect(g); 215 | 216 | freemem(bucketmem); 217 | freemem(groupmem); 218 | 219 | return t; 220 | } 221 | 222 | void frssort1(string strings[], size_t scnt) 223 | { 224 | list ptr, listnodes; 225 | size_t i; 226 | 227 | /* allocate memory based on the number of strings in the array */ 228 | ptr = listnodes = (list ) calloc(scnt, sizeof(struct listrec)); 229 | 230 | /* point the linked list nodes to the strings in the array */ 231 | for( i=0; inext) 245 | strings[i] = listnodes->str; 246 | 247 | free(ptr); 248 | } 249 | 250 | void forward8(unsigned char **strings, size_t n) 251 | { 252 | return frssort1(strings, n); 253 | } 254 | ROUTINE_REGISTER_SINGLECORE(forward8, 255 | "Forward Radix Sort 8-bit by Stefan Nilsson") 256 | -------------------------------------------------------------------------------- /external/lcp-quicksort.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "routine.h" 4 | 5 | typedef int Lcp; 6 | 7 | inline int lcpstrcmp( unsigned char const * const p, unsigned char const * const q, Lcp &i) { 8 | for( ; !(q[i] - p[i]) && p[i]; i++ ) 9 | ; 10 | return q[i]-p[i]; 11 | } 12 | 13 | inline void exch( unsigned char *strings[], Lcp lcps[], int I, int J) { 14 | std::swap(strings[I],strings[J]); 15 | std::swap(lcps[I],lcps[J]); 16 | } 17 | 18 | void strsort(unsigned char * strings[], Lcp lcps[], int lo, int hi ); 19 | 20 | template 21 | void lcpsort( unsigned char * strings[], Lcp lcps[], int lo, int hi ) { 22 | if ( hi <= lo ) return; 23 | int lt = lo, gt = hi; 24 | 25 | Lcp pivot = lcps[lo]; 26 | for( int i = lo + 1; i <= gt; ) { 27 | if ( ascending ? lcps[i] > pivot : lcps[i] < pivot ) exch( strings, lcps, i, gt--); 28 | else if ( ascending ? lcps[i] < pivot : lcps[i] > pivot ) exch( strings, lcps, lt++, i++); 29 | else i++; 30 | } 31 | 32 | strsort( strings, lcps, lt, gt ); 33 | lcpsort( strings, lcps, lo, lt-1 ); 34 | lcpsort( strings, lcps, gt+1, hi ); 35 | }; 36 | 37 | void strsort(unsigned char * strings[], Lcp lcps[], int lo, int hi ) 38 | { 39 | if ( hi <= lo ) return; 40 | int lt = lo, gt = hi; 41 | 42 | unsigned char * pivotStr = strings[lo]; 43 | for( int i = lo + 1; i <= gt; ) 44 | { 45 | int cmpr = lcpstrcmp( pivotStr, strings[i], lcps[i] ); 46 | if (cmpr < 0) exch( strings, lcps, lt++, i++); 47 | else if (cmpr > 0) exch( strings, lcps, i, gt--); 48 | else i++; 49 | } 50 | 51 | lcpsort ( strings, lcps, lo, lt-1 ); 52 | lcpsort( strings, lcps, gt+1, hi ); 53 | }; 54 | 55 | extern "C" void lcpquicksort( unsigned char * strings[], size_t n ) { 56 | Lcp *lcps = (Lcp *) calloc( n, sizeof(Lcp)); 57 | strsort( strings, lcps, 0, n-1 ); 58 | free(lcps); 59 | } 60 | 61 | ROUTINE_REGISTER_SINGLECORE( lcpquicksort, 62 | "LCP Quicksort by Kendall Willets") 63 | -------------------------------------------------------------------------------- /external/mbmradix.c: -------------------------------------------------------------------------------- 1 | /* 2 | Hybrid American flag sort (with stack control), a radix sort 3 | algorithm for arrays of character strings by McIlroy, Bostic, 4 | and McIlroy. 5 | 6 | P. M. McIlroy, K. Bostic, and M. D. McIlroy. Engineering radix 7 | sort. Computing Systems, 6(1):5-27, 1993. 8 | 9 | The code presented in this file has been tested with care but is 10 | not guaranteed for any purpose. The writer does not offer any 11 | warranties nor does he accept any liabilities with respect to 12 | the code. 13 | 14 | Stefan Nilsson, 2 jan 1997. 15 | 16 | Laboratory of Information Processing Science 17 | Helsinki University of Technology 18 | Stefan.Nilsson@hut.fi 19 | */ 20 | 21 | #include "routine.h" 22 | #include "utils.h" 23 | 24 | enum { SIZE = 1024, THRESHOLD = 10 }; 25 | 26 | typedef struct { string *sa; int sn, si; } mbmstack_t; 27 | 28 | static void simplesort(string a[], int n, int b) 29 | { 30 | int i, j; 31 | string tmp; 32 | 33 | for (i = 1; i < n; i++) 34 | for (j = i; j > 0 && scmp(a[j-1]+b, a[j]+b) > 0; j--) 35 | { tmp = a[j]; a[j] = a[j-1]; a[j-1] = tmp; } 36 | } 37 | 38 | static void rsorta(string *a, int n, int b) 39 | { 40 | #define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i 41 | #define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si 42 | #define stackempty() (sp <= stack) 43 | #define swap(p, q, r) r = p, p = q, q = r 44 | mbmstack_t stack[SIZE], *sp = stack, stmp, *oldsp, *bigsp; 45 | string *pile[256], *ak, *an, r, t; 46 | static int count[256], cmin, nc; 47 | int *cp, c, cmax; 48 | 49 | push(a, n, b); 50 | 51 | while(!stackempty()) { 52 | pop(a, n, b); 53 | if(n < THRESHOLD) { 54 | simplesort(a, n, b); 55 | continue; 56 | } 57 | an = a + n; 58 | if(nc == 0) { /* untallied? */ 59 | cmin = 255; /* tally */ 60 | for(ak = a; ak < an; ) { 61 | c = (*ak++)[b]; 62 | if(++count[c] == 1 && c > 0) { 63 | if(c < cmin) cmin = c; 64 | nc++; 65 | } 66 | } 67 | if(sp+nc > stack+SIZE) { /* stack overflow */ 68 | rsorta(a, n, b); 69 | continue; 70 | } 71 | } 72 | oldsp = bigsp = sp, c = 2; /* logartihmic stack */ 73 | pile[0] = ak = a+count[cmax=0]; /* find places */ 74 | for(cp = count+cmin; nc > 0; cp++, nc--) { 75 | while(*cp == 0) cp++; 76 | if (*cp > 1) { 77 | if(*cp > c) c = *cp, bigsp = sp; 78 | push(ak, *cp, b+1); 79 | } 80 | pile[cmax = cp-count] = ak += *cp; 81 | } 82 | swap(*oldsp, *bigsp, stmp); 83 | an -= count[cmax]; /* permute home */ 84 | count[cmax] = 0; 85 | for(ak = a; ak < an; ak += count[c], count[c] = 0) { 86 | r = *ak; 87 | while(--pile[c = r[b]] > ak) 88 | swap(*pile[c], r, t); 89 | *ak = r; 90 | /* here nc = count[...] = 0 */ 91 | } 92 | } 93 | } 94 | 95 | void mbmradix(string a[], size_t n) 96 | { rsorta(a, n, 0); } 97 | 98 | ROUTINE_REGISTER_SINGLECORE(mbmradix, 99 | "MSD Radix Sort by P. M. McIlroy, K. Bostic, and M. D. McIlroy") 100 | -------------------------------------------------------------------------------- /external/mkqsort.c: -------------------------------------------------------------------------------- 1 | /* 2 | Multikey quicksort, a radix sort algorithm for arrays of character 3 | strings by Bentley and Sedgewick. 4 | 5 | J. Bentley and R. Sedgewick. Fast algorithms for sorting and 6 | searching strings. In Proceedings of 8th Annual ACM-SIAM Symposium 7 | on Discrete Algorithms, 1997. 8 | 9 | http://www.CS.Princeton.EDU/~rs/strings/index.html 10 | 11 | The code presented in this file has been tested with care but is 12 | not guaranteed for any purpose. The writer does not offer any 13 | warranties nor does he accept any liabilities with respect to 14 | the code. 15 | 16 | Ranjan Sinha, 1 jan 2003. 17 | 18 | School of Computer Science and Information Technology, 19 | RMIT University, Melbourne, Australia 20 | rsinha@cs.rmit.edu.au 21 | 22 | */ 23 | 24 | #include "utils.h" 25 | 26 | /* MULTIKEY QUICKSORT */ 27 | 28 | #ifndef min 29 | #define min(a, b) ((a)<=(b) ? (a) : (b)) 30 | #endif 31 | 32 | /* ssort2 -- Faster Version of Multikey Quicksort */ 33 | 34 | void vecswap2(unsigned char **a, unsigned char **b, int n) 35 | { while (n-- > 0) { 36 | unsigned char *t = *a; 37 | *a++ = *b; 38 | *b++ = t; 39 | } 40 | } 41 | 42 | #define swap2(a, b) { t = *(a); *(a) = *(b); *(b) = t; } 43 | #define ptr2char(i) (*(*(i) + depth)) 44 | 45 | unsigned char **med3func(unsigned char **a, unsigned char **b, unsigned char **c, int depth) 46 | { int va, vb, vc; 47 | if ((va=ptr2char(a)) == (vb=ptr2char(b))) 48 | return a; 49 | if ((vc=ptr2char(c)) == va || vc == vb) 50 | return c; 51 | return va < vb ? 52 | (vb < vc ? b : (va < vc ? c : a ) ) 53 | : (vb > vc ? b : (va < vc ? a : c ) ); 54 | } 55 | #define med3(a, b, c) med3func(a, b, c, depth) 56 | 57 | void mkqsort(unsigned char **a, int n, int depth) 58 | { int d, r, partval; 59 | unsigned char **pa, **pb, **pc, **pd, **pl, **pm, **pn, *t; 60 | if (n < 20) { 61 | inssort(a, n, depth); 62 | return; 63 | } 64 | pl = a; 65 | pm = a + (n/2); 66 | pn = a + (n-1); 67 | if (n > 30) { /* On big arrays, pseudomedian of 9 */ 68 | d = (n/8); 69 | pl = med3(pl, pl+d, pl+2*d); 70 | pm = med3(pm-d, pm, pm+d); 71 | pn = med3(pn-2*d, pn-d, pn); 72 | } 73 | pm = med3(pl, pm, pn); 74 | swap2(a, pm); 75 | partval = ptr2char(a); 76 | pa = pb = a + 1; 77 | pc = pd = a + n-1; 78 | for (;;) { 79 | while (pb <= pc && (r = ptr2char(pb)-partval) <= 0) { 80 | if (r == 0) { swap2(pa, pb); pa++; } 81 | pb++; 82 | } 83 | while (pb <= pc && (r = ptr2char(pc)-partval) >= 0) { 84 | if (r == 0) { swap2(pc, pd); pd--; } 85 | pc--; 86 | } 87 | if (pb > pc) break; 88 | swap2(pb, pc); 89 | pb++; 90 | pc--; 91 | } 92 | pn = a + n; 93 | r = min(pa-a, pb-pa); vecswap2(a, pb-r, r); 94 | r = min(pd-pc, pn-pd-1); vecswap2(pb, pn-r, r); 95 | if ((r = pb-pa) > 1) 96 | mkqsort(a, r, depth); 97 | if (ptr2char(a + r) != 0) 98 | mkqsort(a + r, pa-a + pn-pd-1, depth+1); 99 | if ((r = pd-pc) > 1) 100 | mkqsort(a + n-r, r, depth); 101 | } 102 | 103 | void mkqsort_main(unsigned char **a, int n) { mkqsort(a, n, 0); } 104 | -------------------------------------------------------------------------------- /external/msd.c: -------------------------------------------------------------------------------- 1 | /* 2 | MSD radix sort with a fixed sized alphabet. 3 | 4 | S. Nilsson. Radix Sorting and Searching. PhD thesis, Department 5 | of Computer Science, Lund University, 1990. 6 | 7 | The code presented in this file has been tested with care but is 8 | not guaranteed for any purpose. The writer does not offer any 9 | warranties nor does he accept any liabilities with respect to 10 | the code. 11 | 12 | Stefan Nilsson, 8 jan 1997. 13 | 14 | Laboratory of Information Processing Science 15 | Helsinki University of Technology 16 | Stefan.Nilsson@hut.fi 17 | */ 18 | 19 | #include "routine.h" 20 | #include "nilsson.h" 21 | #include 22 | 23 | #define CHAR(s, p) s[p] 24 | 25 | typedef struct bucketrec { 26 | list head, tail; 27 | int size; /* size of list, 0 if already sorted */ 28 | } bucket; 29 | 30 | typedef struct stackrec { 31 | list head, tail; 32 | int size; /* size of list, 0 if already sorted */ 33 | int pos; /* current position in string */ 34 | } stack; 35 | 36 | static memory stackmem[1]; 37 | static stack *stackp; 38 | 39 | static void push(list head, list tail, int size, int pos) 40 | { 41 | stackp = (stack *) allocmem(stackmem, sizeof(struct stackrec)); 42 | stackp->head = head; 43 | stackp->tail = tail; 44 | stackp->size = size; 45 | stackp->pos = pos; 46 | } 47 | 48 | static stack *pop() 49 | { 50 | stack *temp; 51 | 52 | temp = stackp; 53 | stackp = (stack *) deallocmem(stackmem, sizeof(struct stackrec)); 54 | return temp; 55 | } 56 | 57 | static stack *top() 58 | { 59 | return stackp; 60 | } 61 | 62 | static boolean stackempty() 63 | { 64 | return !stackp; 65 | } 66 | 67 | /* Put a list of elements into a bucket. The minimum and maximum 68 | character seen so far (chmin, chmax) are updated when the bucket 69 | is updated for the first time. */ 70 | static void intobucket(bucket *b, list h, list t, int size, 71 | character ch, character *chmin, character *chmax) 72 | { 73 | if (!b->head) { 74 | b->head = h; 75 | b->tail = t; 76 | b->size = size; 77 | if (ch != '\0') { 78 | if (ch < *chmin) *chmin = ch; 79 | if (ch > *chmax) *chmax = ch; 80 | } 81 | } else { 82 | b->tail->next = h; 83 | b->tail = t; 84 | b->size += size; 85 | } 86 | } 87 | 88 | /* Put the list in a bucket onto the stack. If the list is small 89 | (contains at most INSERTBREAK elements) sort it using insertion 90 | sort. If both the the list on top of the stack and the list to 91 | be added to the stack are already sorted the new list is appended 92 | to the end of the list on the stack and no new stack record is 93 | created. */ 94 | static void ontostack(bucket *b, int pos) 95 | { 96 | b->tail->next = NULL; 97 | if (b->size <= INSERTBREAK) { 98 | if (b->size > 1) 99 | b->head = ListInsertsort(b->head, &b->tail, pos); 100 | b->size = 0; /* sorted */ 101 | } 102 | if (!b->size && !stackempty() && !top()->size) { 103 | top()->tail->next = b->head; 104 | top()->tail = b->tail; 105 | } 106 | else { 107 | push(b->head, b->tail, b->size, pos); 108 | b->size = 0; 109 | } 110 | b->head = NULL; 111 | } 112 | 113 | /* Traverse a list and put the elements into buckets according 114 | to the character in position pos. The elements are moved in 115 | blocks consisting of strings that have a common character in 116 | position pos. We keep track of the minimum and maximum nonzero 117 | characters encountered. In this way we may avoid looking at 118 | some empty buckets when we traverse the buckets in ascending 119 | order and push the lists onto the stack */ 120 | static void bucketing(list a, int pos) 121 | { 122 | static bucket b[CHARS]; 123 | bucket *bp; 124 | character ch, prevch; 125 | character chmin = CHARS-1, chmax = 0; 126 | list t = a, tn; 127 | int size = 1; 128 | 129 | prevch = CHAR(t->str, pos); 130 | for ( ; (tn = t->next); t = tn) { 131 | ch = CHAR(tn->str, pos); size++; 132 | if (ch == prevch) continue; 133 | intobucket(b+prevch, a, t, size-1, prevch, &chmin, &chmax); 134 | a = tn; 135 | prevch = ch; 136 | size = 1; 137 | } 138 | intobucket(b+prevch, a, t, size, prevch, &chmin, &chmax); 139 | 140 | if (b->head) { /* ch = '\0', end of string */ 141 | b->size = 0; /* already sorted */ 142 | ontostack(b, pos); 143 | } 144 | for (bp = b + chmin; bp <= b + chmax; bp++) 145 | if (bp->head) ontostack(bp, pos+1); 146 | } 147 | 148 | list MSD1(list a, int n) 149 | { 150 | list res = NULL; 151 | stack *s; 152 | 153 | if (n < 2) return a; 154 | initmem(stackmem, sizeof(struct stackrec), n/50); 155 | push(a, NULL, n, 0); 156 | 157 | while (!stackempty()) { 158 | s = pop(); 159 | if (!s->size) { /* sorted */ 160 | s->tail->next = res; 161 | res = s->head; 162 | continue; 163 | } 164 | bucketing(s->head, s->pos); 165 | } 166 | 167 | freemem(stackmem); 168 | return res; 169 | } 170 | 171 | void MSDsort(string strings[], size_t scnt) 172 | { 173 | list ptr, listnodes; 174 | size_t i; 175 | 176 | /* allocate memory based on the number of strings in the array */ 177 | ptr = listnodes = (list ) calloc(scnt, sizeof(struct listrec)); 178 | 179 | /* point the linked list nodes to the strings in the array */ 180 | for( i=0; inext) 194 | strings[i] = listnodes->str; 195 | 196 | free(ptr); 197 | } 198 | 199 | void msd_nilsson(unsigned char **strings, size_t n) 200 | { 201 | return MSDsort(strings, n); 202 | } 203 | ROUTINE_REGISTER_SINGLECORE(msd_nilsson, "MSD Radix Sort by Stefan Nilsson") 204 | -------------------------------------------------------------------------------- /external/multikey.c: -------------------------------------------------------------------------------- 1 | /* 2 | Multikey quicksort, a radix sort algorithm for arrays of character 3 | strings by Bentley and Sedgewick. 4 | 5 | J. Bentley and R. Sedgewick. Fast algorithms for sorting and 6 | searching strings. In Proceedings of 8th Annual ACM-SIAM Symposium 7 | on Discrete Algorithms, 1997. 8 | 9 | http://www.CS.Princeton.EDU/~rs/strings/index.html 10 | 11 | The code presented in this file has been tested with care but is 12 | not guaranteed for any purpose. The writer does not offer any 13 | warranties nor does he accept any liabilities with respect to 14 | the code. 15 | 16 | Stefan Nilsson, 8 jan 1997. 17 | 18 | Laboratory of Information Processing Science 19 | Helsinki University of Technology 20 | Stefan.Nilsson@hut.fi 21 | */ 22 | 23 | #include "routine.h" 24 | #include "utils.h" 25 | #include 26 | 27 | #ifndef min 28 | #define min(a, b) ((a)<=(b) ? (a) : (b)) 29 | #endif 30 | 31 | #define swap(a, b) { string t=x[a]; \ 32 | x[a]=x[b]; x[b]=t; } 33 | #define i2c(i) x[i][depth] 34 | 35 | static void vecswap(int i, int j, int n, string x[]) 36 | { while (n-- > 0) { 37 | swap(i, j); 38 | i++; 39 | j++; 40 | } 41 | } 42 | 43 | static void ssort1(string x[], int n, int depth) 44 | { int a, b, c, d, r, v; 45 | if (n <= 1) 46 | return; 47 | a = rand() % n; 48 | swap(0, a); 49 | v = i2c(0); 50 | a = b = 1; 51 | c = d = n-1; 52 | for (;;) { 53 | while (b <= c && (r = i2c(b)-v) <= 0) { 54 | if (r == 0) { swap(a, b); a++; } 55 | b++; 56 | } 57 | while (b <= c && (r = i2c(c)-v) >= 0) { 58 | if (r == 0) { swap(c, d); d--; } 59 | c--; 60 | } 61 | if (b > c) break; 62 | swap(b, c); 63 | b++; 64 | c--; 65 | } 66 | r = min(a, b-a); vecswap(0, b-r, r, x); 67 | r = min(d-c, n-d-1); vecswap(b, n-r, r, x); 68 | r = b-a; ssort1(x, r, depth); 69 | if (i2c(r) != 0) 70 | ssort1(x + r, a + n-d-1, depth+1); 71 | r = d-c; ssort1(x + n-r, r, depth); 72 | } 73 | 74 | void multikey1(string x[], int n) 75 | { ssort1(x, n, 0); } 76 | 77 | 78 | /* ssort2 -- Faster Version of Multikey Quicksort */ 79 | 80 | static void vecswap2(string *a, string *b, int n) 81 | { while (n-- > 0) { 82 | string t = *a; 83 | *a++ = *b; 84 | *b++ = t; 85 | } 86 | } 87 | 88 | #define swap2(a, b) { t = *(a); *(a) = *(b); *(b) = t; } 89 | #define ptr2char(i) (*(*(i) + depth)) 90 | 91 | static string *med3func(string *a, string *b, string *c, int depth) 92 | { int va, vb, vc; 93 | if ((va=ptr2char(a)) == (vb=ptr2char(b))) 94 | return a; 95 | if ((vc=ptr2char(c)) == va || vc == vb) 96 | return c; 97 | return va < vb ? 98 | (vb < vc ? b : (va < vc ? c : a ) ) 99 | : (vb > vc ? b : (va < vc ? a : c ) ); 100 | } 101 | #define med3(a, b, c) med3func(a, b, c, depth) 102 | 103 | static void insertsort(string *a, int n, int d) 104 | { string *pi, *pj, s, t; 105 | for (pi = a + 1; --n > 0; pi++) 106 | for (pj = pi; pj > a; pj--) { 107 | /* Inline strcmp: break if *(pj-1) <= *pj */ 108 | for (s=*(pj-1)+d, t=*pj+d; *s==*t && *s!=0; s++, t++) 109 | ; 110 | if (*s <= *t) 111 | break; 112 | swap2(pj, pj-1); 113 | } 114 | } 115 | 116 | static void ssort2(string a[], int n, int depth) 117 | { int d, r, partval; 118 | string *pa, *pb, *pc, *pd, *pl, *pm, *pn, t; 119 | if (n < 10) { 120 | insertsort(a, n, depth); 121 | return; 122 | } 123 | pl = a; 124 | pm = a + (n/2); 125 | pn = a + (n-1); 126 | if (n > 30) { /* On big arrays, pseudomedian of 9 */ 127 | d = (n/8); 128 | pl = med3(pl, pl+d, pl+2*d); 129 | pm = med3(pm-d, pm, pm+d); 130 | pn = med3(pn-2*d, pn-d, pn); 131 | } 132 | pm = med3(pl, pm, pn); 133 | swap2(a, pm); 134 | partval = ptr2char(a); 135 | pa = pb = a + 1; 136 | pc = pd = a + n-1; 137 | for (;;) { 138 | while (pb <= pc && (r = ptr2char(pb)-partval) <= 0) { 139 | if (r == 0) { swap2(pa, pb); pa++; } 140 | pb++; 141 | } 142 | while (pb <= pc && (r = ptr2char(pc)-partval) >= 0) { 143 | if (r == 0) { swap2(pc, pd); pd--; } 144 | pc--; 145 | } 146 | if (pb > pc) break; 147 | swap2(pb, pc); 148 | pb++; 149 | pc--; 150 | } 151 | pn = a + n; 152 | r = min(pa-a, pb-pa); vecswap2(a, pb-r, r); 153 | r = min(pd-pc, pn-pd-1); vecswap2(pb, pn-r, r); 154 | if ((r = pb-pa) > 1) 155 | ssort2(a, r, depth); 156 | if (ptr2char(a + r) != 0) 157 | ssort2(a + r, pa-a + pn-pd-1, depth+1); 158 | if ((r = pd-pc) > 1) 159 | ssort2(a + n-r, r, depth); 160 | } 161 | 162 | void multikey2(string a[], size_t n) { ssort2(a, n, 0); } 163 | 164 | void mkqsort_bs(unsigned char **strings, size_t n) 165 | { 166 | return multikey2(strings, n); 167 | } 168 | ROUTINE_REGISTER_SINGLECORE(mkqsort_bs, 169 | "Multi-Key-Quicksort by J. Bentley and R. Sedgewick") 170 | -------------------------------------------------------------------------------- /external/nilsson.h: -------------------------------------------------------------------------------- 1 | #ifndef NILSSON_H 2 | #define NILSSON_H 3 | 4 | #include "utils.h" 5 | 6 | #define MAXBLOCKS 100 7 | #define TRUE 1 8 | #define FALSE 0 9 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) 10 | 11 | typedef int boolean; 12 | typedef int character; 13 | 14 | typedef struct listrec *list; 15 | struct listrec { 16 | string str; 17 | list next; 18 | int length; 19 | }; 20 | 21 | typedef struct { 22 | void *block[MAXBLOCKS]; 23 | int allocnr; 24 | int nr; 25 | int blocksize; 26 | void *current, *first, *last; 27 | } memory; 28 | 29 | void initmem(memory *m, int elemsize, int blocksize); 30 | void *allocmem(memory *m, int elemsize); 31 | void *deallocmem(memory *m, int elemsize); 32 | void resetmem(memory *m); 33 | void freemem(memory *m); 34 | 35 | list ListInsertsort(list head, list *tail , int length); 36 | 37 | #endif //NILSSON_H 38 | -------------------------------------------------------------------------------- /external/parallel_string_radix_sort.cpp: -------------------------------------------------------------------------------- 1 | #include "routine.h" 2 | #include "parallel_string_radix_sort.h" 3 | 4 | namespace parallel_string_radix_sort { 5 | namespace internal { 6 | template<> class Compare { 7 | public: 8 | explicit Compare(int depth) : depth_(depth) {} 9 | inline bool operator()(const unsigned char* const a, 10 | const unsigned char* const b) { 11 | return strcmp((char*)a + depth_, (char*)b + depth_) < 0; 12 | } 13 | private: 14 | int depth_; 15 | }; 16 | } 17 | } 18 | 19 | void parallel_msd_radix_sort(unsigned char **strings, size_t count) 20 | { 21 | parallel_string_radix_sort::Sort( 22 | (const unsigned char **)strings, count); 23 | } 24 | 25 | ROUTINE_REGISTER_MULTICORE(parallel_msd_radix_sort, 26 | "Parallel MSD radix sort by Takuya Akiba") 27 | -------------------------------------------------------------------------------- /external/quicksort.c: -------------------------------------------------------------------------------- 1 | /* 2 | A stripped down version of a quicksort algorithm by Bentley 3 | and McIlroy. It sorts an array of pointers to strings. 4 | 5 | J. L. Bentley and M. D. McIlroy. Engineering a sort function. 6 | Software---Practice and Experience, 23(11):1249-1265, 1993. 7 | 8 | The code presented in this file has been tested with care but is 9 | not guaranteed for any purpose. The writer does not offer any 10 | warranties nor does he accept any liabilities with respect to 11 | the code. 12 | 13 | Stefan Nilsson, 2 jan 1997. 14 | 15 | Laboratory of Information Processing Science 16 | Helsinki University of Technology 17 | Stefan.Nilsson@hut.fi 18 | */ 19 | 20 | #include "routine.h" 21 | #include 22 | #include "utils.h" 23 | 24 | #define swap(a, b) (t = (a), (a) = (b), (b) = t) 25 | static void vecswap(int pa, int pb, int n, string a[]) 26 | { 27 | string t; 28 | for( ; n > 0; pa++, pb++, n--) 29 | swap(a[pa], a[pb]); 30 | } 31 | 32 | #define min(a, b) ((a) < (b) ? (a) : (b)) 33 | 34 | static int med3(int pa, int pb, int pc, string a[]) 35 | { return scmp(a[pa], a[pb]) < 0 ? 36 | (scmp(a[pb], a[pc]) < 0 ? pb : scmp(a[pa], a[pc]) < 0 ? pc : pa) 37 | : (scmp(a[pb], a[pc]) > 0 ? pb : scmp(a[pa], a[pc]) > 0 ? pc : pa); 38 | } 39 | 40 | void quicksort(string a[], size_t n) 41 | { 42 | int pa, pb, pc, pd, pl, pm, pn, r, s; 43 | string t; 44 | string pv; 45 | 46 | if (n < 10) { /* Insertion sort on smallest arrays */ 47 | for (pm = 1; pm < n; pm++) 48 | for (pl = pm; pl > 0 && scmp(a[pl-1], a[pl]) > 0; pl--) 49 | swap(a[pl], a[pl-1]); 50 | return; 51 | } 52 | pm = n/2; /* Small arrays, middle element */ 53 | if (n > 7) { 54 | pl = 0; 55 | pn = n-1; 56 | if (n > 40) { /* Big arrays, pseudomedian of 9 */ 57 | s = n/8; 58 | pl = med3(pl, pl+s, pl+2*s, a); 59 | pm = med3(pm-s, pm, pm+s, a); 60 | pn = med3(pn-2*s, pn-s, pn, a); 61 | } 62 | pm = med3(pl, pm, pn, a); /* Mid-size, med of 3 */ 63 | } 64 | pv = a[pm]; 65 | pa = pb = 0; 66 | pc = pd = n-1; 67 | for (;;) { 68 | while (pb <= pc && (r = scmp(a[pb], pv)) <= 0) { 69 | if (r == 0) { swap(a[pa], a[pb]); pa++; } 70 | pb++; 71 | } 72 | while (pc >= pb && (r = scmp(a[pc], pv)) >= 0) { 73 | if (r == 0) { swap(a[pc], a[pd]); pd--; } 74 | pc--; 75 | } 76 | if (pb > pc) break; 77 | swap(a[pb], a[pc]); 78 | pb++; 79 | pc--; 80 | } 81 | pn = n; 82 | s = min(pa, pb-pa ); vecswap(0, pb-s, s, a); 83 | s = min(pd-pc, pn-pd-1); vecswap(pb, pn-s, s, a); 84 | if ((s = pb-pa) > 1) quicksort(a, s); 85 | if ((s = pd-pc) > 1) quicksort(&a[pn-s], s); 86 | } 87 | 88 | ROUTINE_REGISTER_SINGLECORE(quicksort, 89 | "Quicksort by J. L. Bentley and M. D. McIlroy") 90 | -------------------------------------------------------------------------------- /external/utils.c: -------------------------------------------------------------------------------- 1 | int scmp( unsigned char *s1, unsigned char *s2 ) 2 | { 3 | while( *s1 != '\0' && *s1 == *s2 ) 4 | { 5 | s1++; 6 | s2++; 7 | } 8 | return( *s1-*s2 ); 9 | } 10 | 11 | void 12 | inssort(unsigned char** a, int n, int d) 13 | { 14 | unsigned char** pi; 15 | unsigned char** pj; 16 | unsigned char* s; 17 | unsigned char* t; 18 | 19 | for (pi = a + 1; --n > 0; pi++) { 20 | unsigned char* tmp = *pi; 21 | 22 | for (pj = pi; pj > a; pj--) { 23 | for (s=*(pj-1)+d, t=tmp+d; *s==*t && *s!=0; ++s, ++t) 24 | ; 25 | if (*s <= *t) 26 | break; 27 | *pj = *(pj-1); 28 | } 29 | *pj = tmp; 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /external/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | 6 | #define CHARS 256 7 | #define INSERTBREAK 20 8 | typedef unsigned char* string; 9 | 10 | void mkqsort(unsigned char **, int n, int depth); 11 | void inssort(unsigned char **, int n, int depth); 12 | int scmp(unsigned char*, unsigned char*); 13 | 14 | #endif //UTILS_H 15 | -------------------------------------------------------------------------------- /report/Makefile: -------------------------------------------------------------------------------- 1 | default: report.html 2 | 3 | algs.xml: generate-algs-xml Makefile ./sortstring 4 | ./generate-algs-xml 5 | 6 | report.html: Makefile htmlreport.xsl algs.xml sortable.js 7 | xsltproc htmlreport.xsl algs.xml >report.html 8 | 9 | clean: 10 | rm -f algs.xml 11 | rm -f report.html 12 | -------------------------------------------------------------------------------- /report/README: -------------------------------------------------------------------------------- 1 | In order to generate the HTML based comparison sheet, perform the following steps. 2 | 3 | 0. Obtain input files, I've used the data sets Sinha&Zobel used in their 4 | Burstsort experiments. You can find them via Google. They are about 5 | 300 megabytes each. 6 | 7 | 1. Run the ''benchmark'' script, takes about 12 hours if you run all algorithms 8 | and three input files, each about 300MB. This runs each algorithm seven 9 | times for each input file. 10 | 11 | 2. Run OProfile with the ''collect-oprofile-statistics'' script. Takes about 12 12 | hours. Requires root priviledges. 13 | 14 | 3. Run memusage with the ''collect-memusage-statistics'' script. Takes about 30 15 | minutes. 16 | 17 | 4. Process the OProfile results with ''process-oprofile-statistics'' script. 18 | Takes about one hour. 19 | 20 | 5. Process memusages results with ''process-memusage-statistics'' script. 21 | Takes a minute or two. 22 | 23 | 6. Finally, run ''make''. This will generate the HTML file based on the results 24 | of the previous steps. 25 | 26 | 27 | NOTES: 28 | *) You can create results for smaller subsets of available algorithms using 29 | environmental variables, for example: 30 | export ALGS="1 2 3" 31 | OR: 32 | ALGS="1 2 3" ./collect-oprofile-statistics 33 | ALGS="1 2 3" ./collect-memusage-statistics 34 | etc. 35 | -------------------------------------------------------------------------------- /report/benchmark: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################################ 3 | # Copyright 2008 by Tommi Rantala 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to 7 | # deal in the Software without restriction, including without limitation the 8 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 9 | # sell copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | # IN THE SOFTWARE. 22 | ################################################################################ 23 | function die() { 24 | echo "ERROR: $1" 25 | exit 1 26 | } 27 | ################################################################################ 28 | if [[ -z $BIN ]] ; then BIN=./sortstring ; fi 29 | if [[ ! -x $BIN ]] ; then die "Sorry, binary not executable" ; fi 30 | if [[ -z $ALGS ]] ; then ALGS=`$BIN --alg-nums` ; fi 31 | if [[ -z $INFILES ]] ; then INFILES="input/url3 input/nodup3 input/genome3" ; fi 32 | if [[ -z $OUTDIR ]] ; then OUTDIR="data" ; fi 33 | mkdir -p $OUTDIR 34 | for I in $INFILES ; do 35 | if [[ ! -r $I ]] ; then die "Sorry, ''$I'' not readable" ; fi 36 | done 37 | ################################################################################ 38 | let ALGCOUNT=0; for A in $ALGS ; do let ++ALGCOUNT; done 39 | ################################################################################ 40 | echo "ALGS=$ALGS" 41 | echo "INFILES=$INFILES" 42 | echo "Starting measurements ..." 43 | for I in $INFILES ; do 44 | echo "Input file $I ..." 45 | KDIALOG=`which kdialog 2>/dev/null` 46 | if [[ -x "$KDIALOG" ]] ; then 47 | # dont spawn kdialog in a subshell 48 | kdialog --progressbar "-" $((7*$ALGCOUNT)) 2>/dev/null >.tmp 49 | DCOPREF=`cat .tmp` 50 | rm .tmp 51 | fi 52 | if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setAutoClose 1 ; fi 53 | for A in $ALGS ; do 54 | if [[ -n "$DCOPREF" ]] ; then 55 | dcop $DCOPREF setLabel "
$I
`$BIN --alg-name=$A`
" 2>/dev/null 56 | fi 57 | for ITER in `seq 1 7` ; do 58 | $BIN --xml-stats $A $I \ 59 | >$OUTDIR/timings_`basename ${I}`_${A}_${ITER}.xml 60 | if [[ $? -ne 0 ]] ; then 61 | echo "WARNING: failure with input=$I alg=$A" 62 | fi 63 | if [[ -n "$DCOPREF" ]] ; then PROGRESS=`dcop $DCOPREF progress` ; fi 64 | if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setProgress $((PROGRESS+1)) 2>/dev/null ; fi 65 | done 66 | done 67 | done 68 | -------------------------------------------------------------------------------- /report/collect-memusage-statistics: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################################ 3 | # Copyright 2008 by Tommi Rantala 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to 7 | # deal in the Software without restriction, including without limitation the 8 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 9 | # sell copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | # IN THE SOFTWARE. 22 | ################################################################################ 23 | function die() { 24 | echo "ERROR: $1" 25 | exit 1 26 | } 27 | ################################################################################ 28 | if [[ -z $BIN ]] ; then BIN=./sortstring ; fi 29 | if [[ ! -x $BIN ]] ; then die "Sorry, binary not executable" ; fi 30 | if [[ -z $ALGS ]] ; then ALGS=`$BIN --alg-nums` ; fi 31 | if [[ -z $INFILES ]] ; then INFILES="input/url3 input/nodup3 input/genome3" ; fi 32 | if [[ -z $OUTDIR ]] ; then OUTDIR="data" ; fi 33 | mkdir -p $OUTDIR 34 | if [[ -z $TOOLOUTDIR ]] ; then TOOLOUTDIR="tool-output"; fi 35 | mkdir -p $TOOLOUTDIR 36 | for I in $INFILES ; do 37 | if [[ ! -r $I ]] ; then die "Sorry, ''$I'' not readable" ; fi 38 | done 39 | ################################################################################ 40 | let ALGCOUNT=0; for A in $ALGS ; do let ++ALGCOUNT; done 41 | let FILECOUNT=0; for F in $INFILES ; do let ++FILECOUNT; done 42 | ################################################################################ 43 | echo "ALGS=$ALGS" 44 | echo "INFILES=$INFILES" 45 | echo "Starting measurements ..." 46 | KDIALOG=`which kdialog 2>/dev/null` 47 | if [[ -x "$KDIALOG" ]] ; then 48 | # dont spawn kdialog in a subshell 49 | kdialog --caption "memusage" --progressbar "-" $(($FILECOUNT*$ALGCOUNT)) 2>/dev/null >.tmp 50 | DCOPREF=`cat .tmp` 51 | rm .tmp 52 | fi 53 | if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setAutoClose 1 ; fi 54 | for FILE in $INFILES ; do 55 | echo "Input file $FILE ..." 56 | if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setLabel "
$FILE
Baseline calculation ...
" ; fi 57 | # Find out baseline values by running with non-existent algorithm. We 58 | # can then find out how much memory the actual algorithm requires and 59 | # how many malloc calls it makes. 60 | TOOLOUT=$TOOLOUTDIR/memusage_`basename ${FILE}`_0 61 | memusage $BIN 0 $FILE >/dev/null 2>$TOOLOUT 62 | for ALG in $ALGS ; do 63 | if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setLabel "
$FILE
`$BIN --alg-name=$ALG`
" ; fi 64 | TOOLOUT=$TOOLOUTDIR/memusage_`basename ${FILE}`_${ALG} 65 | memusage $BIN $ALG $FILE >/dev/null 2>$TOOLOUT 66 | if [[ $? -ne 0 ]] ; then echo "WARNING: failure with alg=$ALG input=$FILE" ; fi 67 | if [[ -n "$DCOPREF" ]] ; then PROGRESS=`dcop $DCOPREF progress` ; fi 68 | if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setProgress $((PROGRESS+1)) ; fi 69 | done 70 | done 71 | -------------------------------------------------------------------------------- /report/collect-oprofile-statistics: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ################################################################################ 3 | # Copyright 2008 by Tommi Rantala 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to 7 | # deal in the Software without restriction, including without limitation the 8 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 9 | # sell copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | # IN THE SOFTWARE. 22 | ################################################################################ 23 | if [[ `id -u` != 0 ]] ; then 24 | echo "Sorry, profiling requires root priviledges" 25 | exit 1 26 | fi 27 | ################################################################################ 28 | function die() { 29 | echo "ERROR: $1" 30 | exit 1 31 | } 32 | ################################################################################ 33 | if [[ -z $EVENTS ]] ; then 34 | EVENTS="CPU_CLK_UNHALTED:100000 35 | INST_RETIRED:100000 36 | DTLB_MISSES:10000 37 | L1D_REPL:10000 38 | L2_LINES_IN:10000 39 | LOAD_BLOCK:10000:0x02" 40 | fi 41 | ################################################################################ 42 | if [[ -z $BIN ]] ; then BIN=./sortstring ; fi 43 | if [[ ! -x $BIN ]] ; then die "Sorry, binary not executable" ; fi 44 | if [[ -z $ALGS ]] ; then ALGS=`$BIN --alg-nums` ; fi 45 | if [[ -z $INFILES ]] ; then INFILES="input/url3 input/nodup3 input/genome3" ; fi 46 | if [[ -z $OUTDIR ]] ; then OUTDIR="data" ; fi 47 | mkdir -p $OUTDIR 48 | if [[ -z $TOOLOUTDIR ]] ; then TOOLOUTDIR="tool-output"; fi 49 | for I in $INFILES ; do 50 | if [[ ! -r "$I" ]] ; then die "Sorry, ''$I'' not readable" ; fi 51 | done 52 | # Oprofile requires absolute path. 53 | OPSESSIONDIR=$PWD/$TOOLOUTDIR/oprofile-session-dir 54 | mkdir -p $OPSESSIONDIR 55 | ################################################################################ 56 | # 1. event to monitor 57 | # 2. algorithm 58 | # 3. input file name 59 | # 4. oprofile XML output file name 60 | ################################################################################ 61 | function round() { 62 | echo " Profiling algorithm ''$2'', input ''$3'', event ''$1'' ..." 63 | local SESSIONNAME=`basename "$3"`_${2}_`echo $1 | sed 's/:/_/g'` 64 | if [[ -e "$OPSESSIONDIR/samples/$SESSIONNAME" ]] ; then 65 | rm -rf "$OPSESSIONDIR/samples/$SESSIONNAME" 66 | fi 67 | opcontrol --session-dir=$OPSESSIONDIR --reset >>.op_log 2>&1 || die "Could not reset OProfile" 68 | # Nuke old configuration file. Only (?) way to _really_ clean up old 69 | # settings. 70 | rm -f /root/.oprofile/daemonrc 71 | opcontrol --session-dir=$OPSESSIONDIR --image=$BIN --separate=kernel --vmlinux=/usr/lib/debug/lib/modules/`uname -r`/vmlinux --event=$1 >>.op_log 2>&1 || die "Could not setup OProfile correctly" 72 | $BIN --oprofile $2 "$3" >>.op_log 2>&1 73 | if [[ $? -ne 0 ]] ; then echo "WARNING: failure was reported" ; fi 74 | opcontrol --session-dir=$OPSESSIONDIR --shutdown >>.op_log 2>&1 || die "Could not shutdown OProfile correctly" 75 | opcontrol --session-dir=$OPSESSIONDIR --save=$SESSIONNAME 76 | if [[ $? -ne 0 ]] ; then die "Failed in --save"; fi 77 | } 78 | ################################################################################ 79 | echo "ALGS=$ALGS" 80 | echo "EVENTS=$EVENTS" 81 | echo "INFILES=$INFILES" 82 | if [[ -e $OPSESSIONDIR/lock ]] ; then 83 | echo "WARNING: oprofile lock file exists!" 84 | KILLPID=`cat $OPSESSIONDIR/lock` 85 | echo "killing PID $KILLPID" 86 | kill $KILLPID 87 | sleep 1 88 | fi 89 | # kill old daemons 90 | ps -C oprofiled >/dev/null 2>&1 91 | if [[ $? -ne 1 ]] ; then 92 | echo "WARNING: oprofiled alive, killing!" 93 | killall oprofiled 94 | fi 95 | echo "Starting measurements ..." 96 | for FILE in $INFILES ; do 97 | echo "Input file $FILE ..." 98 | for ALG in $ALGS ; do 99 | for EVENT in $EVENTS ; do 100 | round $EVENT $ALG "$FILE" $XMLOUT 101 | done 102 | done 103 | done 104 | -------------------------------------------------------------------------------- /report/generate-algs-xml: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | OUT=algs.xml 3 | if [[ -e $OUT ]] ; then 4 | rm $OUT 5 | fi 6 | echo "" >> $OUT 7 | for ALGNUM in `./sortstring --alg-nums` ; do 8 | # replace normal spaces with non-breaking space   9 | # replace normal hyphens with non-breaking hyphens ‑ 10 | ALGNAME="`./sortstring --alg-name=$ALGNUM | sed 's/ /\\ /g' | sed 's/-/\\‑/g'`" 11 | echo " " >> $OUT 12 | done 13 | echo "" >> $OUT 14 | echo "Generated ''$OUT''." 15 | -------------------------------------------------------------------------------- /report/oprofile-simplify.xsl: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 9 | 19 | 20 |