├── .github
    └── workflows
    │   └── build.yml
├── CMakeLists.txt
├── LICENSE
├── README.md
├── external
    ├── adaptive.c
    ├── burstsortA.c
    ├── burstsortL.c
    ├── cradix-improved.c
    ├── cradix.c
    ├── forward16.c
    ├── forward8.c
    ├── lcp-quicksort.cpp
    ├── mbmradix.c
    ├── mkqsort.c
    ├── msd.c
    ├── multikey.c
    ├── nilsson.h
    ├── parallel_string_radix_sort.cpp
    ├── parallel_string_radix_sort.h
    ├── quicksort.c
    ├── utils.c
    └── utils.h
├── report
    ├── Makefile
    ├── README
    ├── benchmark
    ├── collect-memusage-statistics
    ├── collect-oprofile-statistics
    ├── generate-algs-xml
    ├── htmlreport.xsl
    ├── oprofile-simplify.xsl
    ├── process-memusage-statistics
    ├── process-oprofile-statistics
    └── sortable.js
├── src
    ├── burstsort.cpp
    ├── burstsort2.cpp
    ├── burstsort_mkq.cpp
    ├── funnelsort.cpp
    ├── losertree.h
    ├── mergesort.cpp
    ├── mergesort_lcp.cpp
    ├── mergesort_losertree.cpp
    ├── mergesort_unstable.cpp
    ├── msd_a.cpp
    ├── msd_a2.cpp
    ├── msd_ce.cpp
    ├── msd_ci.cpp
    ├── msd_dyn_block.cpp
    ├── msd_dyn_vector.cpp
    ├── msd_lsd.cpp
    ├── multikey_block.cpp
    ├── multikey_cache.cpp
    ├── multikey_dynamic.cpp
    ├── multikey_multipivot.cpp
    ├── multikey_simd.cpp
    ├── routine.h
    ├── routines.c
    ├── routines.h
    ├── sortstring.c
    ├── util
    │   ├── cpus_allowed.c
    │   ├── cpus_allowed.h
    │   ├── debug.h
    │   ├── get_char.h
    │   ├── insertion_sort.h
    │   ├── median.h
    │   ├── sdt.h
    │   ├── timing.c
    │   ├── timing.h
    │   ├── vmainfo.c
    │   └── vmainfo.h
    ├── vector_bagwell.h
    ├── vector_block.h
    ├── vector_brodnik.h
    ├── vector_malloc.h
    └── vector_realloc.h
└── unit-test
    └── main.cpp


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build and unit test
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     name: ${{ matrix.build_type }} GCC on ${{ matrix.os }}
 8 |     runs-on: ${{ matrix.os }}
 9 |     strategy:
10 |       fail-fast: false
11 |       matrix:
12 |         build_type: [Release, Debug]
13 |         os: [ubuntu-20.04, ubuntu-18.04]
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: Install dependencies
17 |       run: sudo apt-get install cmake
18 |     - name: cmake
19 |       run: cmake -B builddir -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
20 |     - name: make
21 |       run: make -j $(nproc) -C builddir
22 |     - name: unit test
23 |       run: ./builddir/unit-test
24 |   build-llvm:
25 |     name: ${{ matrix.build_type }} Clang on ${{ matrix.os }}
26 |     runs-on: ${{ matrix.os }}
27 |     strategy:
28 |       fail-fast: false
29 |       matrix:
30 |         build_type: [Release, Debug]
31 |         os: [ubuntu-20.04]
32 |     steps:
33 |     - uses: actions/checkout@v2
34 |     - name: Install dependencies
35 |       run: sudo apt-get install cmake clang systemtap-sdt-dev
36 |     - name: cmake
37 |       run: CC=clang CXX=clang++ cmake -B builddir -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
38 |     - name: make
39 |       run: make -j $(nproc) -C builddir
40 |     - name: unit test
41 |       run: ./builddir/unit-test
42 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.1)
 2 | set(CMAKE_CXX_STANDARD 11)
 3 | include(CheckIncludeFile)
 4 | 
 5 | project(sortstring)
 6 | include_directories(src src/util)
 7 | 
 8 | link_libraries(rt)
 9 | 
10 | set(INTERNAL_SRCS
11 | 	src/funnelsort.cpp
12 | 	src/msd_a.cpp
13 | 	src/msd_a2.cpp
14 | 	src/msd_lsd.cpp
15 | 	src/msd_ce.cpp
16 | 	src/msd_ci.cpp
17 | 	src/msd_dyn_block.cpp
18 | 	src/msd_dyn_vector.cpp
19 | 	src/burstsort.cpp
20 | 	src/burstsort2.cpp
21 | 	src/burstsort_mkq.cpp
22 | 	src/multikey_simd.cpp
23 | 	src/multikey_dynamic.cpp
24 | 	src/multikey_block.cpp
25 | 	src/multikey_multipivot.cpp
26 | 	src/multikey_cache.cpp
27 | 	src/mergesort.cpp
28 | 	src/mergesort_unstable.cpp
29 | 	src/mergesort_losertree.cpp
30 | 	src/mergesort_lcp.cpp
31 | 	src/routines.c
32 | 	src/util/timing.c
33 | 	src/util/cpus_allowed.c
34 | 	src/util/vmainfo.c)
35 | 
36 | set(EXTERNAL_SRCS
37 | 	external/lcp-quicksort.cpp
38 | 	external/mbmradix.c
39 | 	external/quicksort.c
40 | 	external/mkqsort.c
41 | 	external/forward8.c
42 | 	external/cradix.c
43 | 	external/cradix-improved.c
44 | 	external/msd.c
45 | 	external/multikey.c
46 | 	external/burstsortL.c
47 | 	external/utils.c
48 | 	external/adaptive.c
49 | 	external/burstsortA.c
50 | 	external/forward16.c
51 | 	external/parallel_string_radix_sort.cpp)
52 | 
53 | check_include_file(sys/sdt.h HAVE_SYS_SDT_H)
54 | if(HAVE_SYS_SDT_H)
55 | 	add_definitions(-DHAVE_SYS_SDT_H=1)
56 | endif()
57 | 
58 | set_source_files_properties(external/adaptive.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare)
59 | set_source_files_properties(external/quicksort.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare)
60 | 
61 | add_executable(sortstring src/sortstring.c ${INTERNAL_SRCS} ${EXTERNAL_SRCS})
62 | 
63 | add_executable(unit-test unit-test/main.cpp ${INTERNAL_SRCS} ${EXTERNAL_SRCS})
64 | target_compile_definitions(unit-test PUBLIC UNIT_TEST)
65 | 
66 | add_definitions(-Drestrict=__restrict__)
67 | set(CMAKE_CXX_FLAGS_RELEASE        "-fopenmp -g -DNDEBUG -march=native ${CMAKE_CXX_FLAGS_RELEASE}")
68 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-fopenmp -g -DNDEBUG -march=native ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
69 | set(CMAKE_C_FLAGS_RELEASE          "-fopenmp -g -DNDEBUG -march=native ${CMAKE_C_FLAGS_RELEASE}")
70 | set(CMAKE_C_FLAGS_RELWITHDEBINFO   "-fopenmp -g -DNDEBUG -march=native ${CMAKE_C_FLAGS_RELWITHDEBINFO}")
71 | set(CMAKE_CXX_FLAGS                "-Wall -Wextra ${CMAKE_CXX_FLAGS}")
72 | set(CMAKE_C_FLAGS                  "-Wall -Wextra -std=c99 ${CMAKE_C_FLAGS}")
73 | 
74 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O1 -g -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2")
75 | set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG}   -O1 -g -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2")
76 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any person obtaining a copy
 2 | of this software and associated documentation files (the "Software"), to deal
 3 | in the Software without restriction, including without limitation the rights
 4 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 5 | copies of the Software, and to permit persons to whom the Software is
 6 | furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in
 9 | all copies or substantial portions of the Software.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 | THE SOFTWARE.
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | A collection of string sorting algorithm implementations
  2 | ========================================================
  3 | 
  4 | This collection features several string sorting algorithm implementations, that
  5 | have been tuned to take better advantage of modern hardware. Classic
  6 | implementations tend to optimize instruction counts, but when sorting large
  7 | collections of strings, we also need to focus on memory issues. All algorithms
  8 | are implemented using C and C++.
  9 | 
 10 | Technical details:
 11 |   * All of the implementations sort the strings by raw byte values. This
 12 |     means that they are mainly intended for research use.
 13 |   * Includes several variants of known and efficient (string) sorting
 14 |     algorithms, such as MSD radix sort, burstsort and multi-key-quicksort.
 15 |   * Emphasis on reducing cache misses and memory stalls.
 16 |   * Includes the tools to create a HTML report, that can be
 17 |     used to compare the provided implementations. The report includes details
 18 |     such as TLB, L1 and L2 cache misses, run times and memory peak usage.
 19 |   * Supports Linux huge pages. For more information, see below.
 20 | 
 21 | 
 22 | License
 23 | -------
 24 | 
 25 | MIT.
 26 | 
 27 | Exception: The directory `external` contains files, that are included for
 28 | reference purposes, that may or may not be compatible with the MIT license.
 29 | 
 30 | 
 31 | Copyright
 32 | ---------
 33 | 
 34 | Copyright © 2007-2012 by Tommi Rantala <tt.rantala@gmail.com>
 35 | 
 36 | The directory `external` contains files, that are included for reference
 37 | purposes, and are copyright by their respective authors.
 38 | 
 39 | 
 40 | Requirements
 41 | ------------
 42 | 
 43 |   * C++11
 44 |   * CMake
 45 | 
 46 | 
 47 | Compilation
 48 | -----------
 49 | 
 50 | Default compilation with GCC:
 51 | 
 52 |     $ git clone git://github.com/rantala/string-sorting.git
 53 |     $ mkdir string-sorting-build
 54 |     $ cd string-sorting-build
 55 |     $ cmake -DCMAKE_BUILD_TYPE=Release ../string-sorting
 56 |     $ make
 57 |     $ ./sortstring
 58 | 
 59 | Use a separate debug build for easier debugging:
 60 | 
 61 |     $ mkdir debug-build
 62 |     $ cd debug-build
 63 |     $ cmake -DCMAKE_BUILD_TYPE=Debug ../string-sorting
 64 | 
 65 | 
 66 | Huge pages
 67 | ----------
 68 | 
 69 | The default page size on many computer architectures is 4 kilobytes. When
 70 | working with large data sets, this means that the input is spread to thousands
 71 | of memory pages. Unfortunately random access in thousands of pages can be slow
 72 | (see e.g. http://en.wikipedia.org/wiki/Translation_lookaside_buffer).
 73 | 
 74 | To alleviate this exact problem, many architectures have support for larger
 75 | page size. For example modern x86 has support for 2/4 megabyte "huge pages".
 76 | With such large pages, even large data sets fit into a much smaller amount of
 77 | memory pages.
 78 | 
 79 | In this program, support for huge pages is enabled using either --hugetlb-text
 80 | or --hugetlb-ptrs, or both. The former option places the input data (i.e. the
 81 | actual strings from the given file) into huge pages, and the latter option
 82 | places the string pointer array into huge pages. Using huge pages in Linux
 83 | requires CPU support, and properly adjusted kernel settings.
 84 | 
 85 | The external library libhugetlbfs (https://github.com/libhugetlbfs/libhugetlbfs)
 86 | can be used to replace all calls to malloc to use huge pages. If this library is
 87 | used, the aforementioned options are not needed.
 88 | 
 89 | 
 90 | HTML report creation
 91 | --------------------
 92 | 
 93 | Requirements:
 94 |   * OProfile for most measurements, probably also requires root privileges.
 95 |      - The default settings use Intel Core 2 specific events. When profiling on
 96 |        other platforms, you will most likely need to modify the scripts in the
 97 |        report/ directory.
 98 |   * /usr/bin/memusage for measuring the memory peak usage. This is a GNU libc
 99 |     utility.
100 | 


--------------------------------------------------------------------------------
/external/burstsortL.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    This is an implementation of Burstsort using linked lists for buckets. A
  3 |    more complete discussion of the algorithm, the implementation and a
  4 |    comparison with other well known sorting algorithms, both radix sorting and
  5 |    comparison-based methods, can be found in:
  6 | 
  7 |    R. Sinha and J. Zobel, "Cache-Conscious Sorting of Large Sets of Strings
  8 |    with Dynamic Tries", In Proc. 5th Workshop Algorithm Engineering and
  9 |    Experiments (ALENEX), R. Ladner (ed), Baltimore, Maryland, USA, January
 10 |    2003.
 11 | 
 12 |    R. Sinha and J. Zobel, "Efficient Trie-based Sorting of Large Sets of
 13 |    Strings", In Proc. Australasian Computer Science Conference, M. Oudshoorn
 14 |    (ed), Adelaide, Australia, February, 2003.
 15 | 
 16 |    The code presented in this file has been tested with care but is not
 17 |    guaranteed for any purpose. The writer does not offer any warranties nor
 18 |    does he accept any liabilities with respect to the code.
 19 | 
 20 |    Ranjan Sinha, 28 July 2003.
 21 | 
 22 |    School of Computer Science and Information Technology, RMIT University,
 23 |    Melbourne, Australia rsinha@cs.rmit.edu.au
 24 | 
 25 |    note:
 26 |        1. It is a work in progress
 27 |        2. Not tuned for number of instructions, use the highest optimizations such as O3
 28 |        3. Any relevant changes to code may please be intimated to me
 29 |        4. It is solely meant for academic use
 30 | */
 31 | 
 32 | #include "routine.h"
 33 | #include "utils.h"
 34 | #include <stdlib.h>
 35 | 
 36 | #define THRESHOLD 8192
 37 | #define ALPHABET 256
 38 | 
 39 | typedef struct trierec
 40 | {
 41 |     struct trierec *ptrs[ALPHABET];
 42 |     int counts[ALPHABET];
 43 | } TRIE;
 44 | 
 45 | typedef struct strlistrec
 46 | {
 47 |     unsigned char	*word;
 48 |     struct strlistrec *next;
 49 | } LIST;
 50 | 
 51 | static void
 52 | burstinsertL(TRIE *root, LIST *list,  size_t scnt)
 53 | {
 54 |     TRIE	*new_;
 55 |     TRIE	*curr;
 56 |     LIST	*node;
 57 |     LIST	*lp, *np;
 58 |     unsigned int	i, p;
 59 |     unsigned char	c, cc;
 60 | 
 61 |     for( i=0 ; i<scnt ; i++ )
 62 |     {
 63 |         curr = root;
 64 |         node = &list[i];
 65 | 
 66 |         for( p=0, c=list[i].word[p] ; curr->counts[c]<0 ; curr=curr->ptrs[c], p++, c=list[i].word[p] )
 67 |             ;
 68 | 
 69 |         node->next = (LIST *) curr->ptrs[c];
 70 |         curr->ptrs[c] = (TRIE *) node;
 71 | 
 72 |         if( c=='\0' )
 73 |         {
 74 |             ;  /* leave counter alone to avoid overflow, no burst */
 75 |         }
 76 |         else
 77 |         {
 78 |             curr->counts[c]++;
 79 |             if( curr->counts[c]>THRESHOLD )  /* burst */
 80 |             {
 81 |                 curr->counts[c] = -1;
 82 |                 p++;
 83 |                 new_ = (TRIE *) calloc(1, sizeof(TRIE));
 84 | 
 85 |                 lp = (LIST *) curr->ptrs[c], cc = lp->word[p], np = lp->next;
 86 |                 while( lp!=NULL )
 87 |                 {
 88 |                     lp->next = (LIST *) new_->ptrs[cc];
 89 |                     new_->ptrs[cc] = (TRIE *) lp;
 90 |                     new_->counts[cc] ++;
 91 |                     lp = np;
 92 |                     if( lp!=NULL )
 93 |                     {
 94 |                         cc = lp->word[p];
 95 |                         np = lp->next;
 96 |                     }
 97 |                 }
 98 |                 curr->ptrs[c] = new_;
 99 |                 curr->counts[c] = -1; /* used to traverse along the trie hierarchy                     */
100 |                 curr = new_;           /* used to burst recursive, so point curr to new                 */
101 |                 c = cc;               /* point to the character that the last string was inserted into */
102 |             }
103 |         }
104 |     }
105 | }
106 | 
107 | static int
108 | bursttraverseL(TRIE *node, unsigned char **strings, int pos, int deep)
109 | {
110 |     LIST	*l;
111 |     unsigned int i, off;
112 |     unsigned int sizeOfContainer = 0;
113 | 
114 |     for( i=0 ; i<ALPHABET ; i++ )
115 |     {
116 |         if( node->counts[i]<0 )
117 |         {
118 |             pos = bursttraverseL(node->ptrs[i], strings, pos, deep+1);
119 | 	    }
120 |         else
121 |         {
122 |             for( off=pos, l=(LIST *) node->ptrs[i] ; l!=NULL ; off++, l=l->next )
123 |             {
124 |                 strings[off] = l->word;
125 |             }
126 |             sizeOfContainer = (off - pos); 
127 | 
128 |             if( i>0 && sizeOfContainer > 1 )
129 |             {
130 |                 if (sizeOfContainer < INSERTBREAK)
131 |                     inssort( strings+pos, off-pos, deep + 1);     
132 |                 else
133 |                     mkqsort( strings+pos, off-pos, deep + 1);
134 |             }
135 |             pos = off;
136 |         }
137 |     }
138 |     free(node);
139 |     return pos;
140 | }
141 | 
142 | void
143 | burstsortL(unsigned char *strings[], size_t scnt)
144 | {
145 |     TRIE	*root;
146 |     LIST	*listnodes;
147 |     unsigned int i;
148 | 
149 |     listnodes = (LIST *) calloc(scnt, sizeof(LIST));
150 | 
151 |     for( i=scnt; i-- ;)
152 |         listnodes[i].word = strings[i];
153 | 
154 |     root = (TRIE *) calloc(1, sizeof(TRIE));
155 | 
156 |     (void) burstinsertL(root, listnodes, scnt);
157 | 
158 |     (void) bursttraverseL(root, strings, 0, 0);
159 | 
160 |     free(listnodes);
161 | 
162 |     return;
163 | }
164 | ROUTINE_REGISTER_SINGLECORE(burstsortL,
165 | 		"Burstsort with List buckets by R. Sinha and J. Zobel")
166 | 


--------------------------------------------------------------------------------
/external/cradix-improved.c:
--------------------------------------------------------------------------------
  1 | /* This source code is from the following article:
  2 |  *
  3 |  * @article{1226858,
  4 |  *     author = {Waihong Ng and Katsuhiko Kakehi},
  5 |  *     title = {Cache Efficient Radix Sort for String Sorting},
  6 |  *     journal = {IEICE Trans. Fundam. Electron. Commun. Comput. Sci.},
  7 |  *     volume = {E90-A},
  8 |  *     number = {2},
  9 |  *     year = {2007},
 10 |  *     issn = {0916-8508},
 11 |  *     pages = {457--466},
 12 |  *     doi = {http://dx.doi.org/10.1093/ietfec/e90-a.2.457},
 13 |  *     publisher = {Oxford University Press},
 14 |  *     address = {Oxford, UK},
 15 |  * }
 16 |  *
 17 |  * Appendix: Source Code of CRadix Sort
 18 |  */
 19 | 
 20 | /*
 21 |   This code is based on the program 3.1 of Engineering radix sort by P.M.
 22 |   McIlroy, K. Bostic and M.D. McIlroy, Comput. Syst. vol.6, 1993.
 23 | 
 24 |   The main improvement is the adoption of the key buffer. Key buffers are
 25 |   filled by the function FillKeyBuffer() while the following code in the main
 26 |   body is responsible for permuting the key buffers in the order as same as the
 27 |   access order as the key pointers
 28 | 
 29 |        memcpy(ta, tk, sizeof(unsigned char)*n*kbsd);
 30 |        for (i=0, kb=(LPBYTE)ta; i<n;
 31 |              i++, *t+=kbsd1) {
 32 |           t=&GrpKB[*kb]; ss=*t; tt=kb+1;
 33 |           for (j=0; j<kbsd1; j++)
 34 |               { *ss=*tt; ss++; tt++; }
 35 |           kb+=kbsd;
 36 |         }
 37 | 
 38 |   Other improvements described in Sect. 6 are as follows.
 39 | 
 40 |   The following statements in the main body implement
 41 |   the improvement described in Sect. 6.1
 42 | 
 43 |         cptr=&count[AL];
 44 | 	while (*cptr<1) cptr++;
 45 | 	if (*cptr<n) gs=n; else gs=0;
 46 | 
 47 |   RDFK() -- the function which read the keys directly as described in Sect. 6.2
 48 | 
 49 |   Other modifications are mainly for assembling the improvements into the
 50 |   original code
 51 | */
 52 | 
 53 | /* Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
 54 |  *  - replace original isort() with slightly different insertion_sort()
 55 |  *  - re-implement FillKeyBuffer() to reduce memory stalls
 56 |  *  - use caching in RDFK() to reduce memory stalls
 57 |  */
 58 | 
 59 | #include "routine.h"
 60 | #include <stdlib.h>
 61 | #include <string.h>
 62 | 
 63 | #define AS 256 /* Alphabet size */
 64 | #define BS 4 /* key buffer size */
 65 | #define AL 0 /* Alphabet lower bound */
 66 | #define AH 255 /* Alphabet upper bound */
 67 | #define IC 20 /* Insertion sort cut off */
 68 | #define KBC 128 /* Cache cut off */
 69 | #define SS 4096 /* stack size */
 70 | 
 71 | #define push(a, k, n, b) _sp->sa=a, _sp->sk=k, _sp->sn=n, (_sp++)->sb=b
 72 | #define pop(a, k, n, b) a=(--_sp)->sa, k=_sp->sk, n=_sp->sn, b=_sp->sb
 73 | #define stackempty() (_sp<=stack)
 74 | #define splittable(c) c > 0 && count[c] > IC
 75 | typedef size_t UINT;
 76 | typedef unsigned char BYTE, *LPBYTE, **LPPBYTE;
 77 | typedef unsigned char STR, *LPSTR, **LPPSTR, **STRPARR;
 78 | 
 79 | static struct Stack {
 80 | 	LPSTR* sa; LPBYTE sk;
 81 | 	int sn, sb;
 82 | } stack[SS], *_sp=stack;
 83 | 
 84 | static void
 85 | insertion_sort(unsigned char** strings, int n, size_t depth)
 86 | {
 87 | 	for (unsigned char** i = strings + 1; --n > 0; ++i) {
 88 | 		unsigned char** j = i;
 89 | 		unsigned char* tmp = *i;
 90 | 		while (j > strings) {
 91 | 			unsigned char* s = *(j-1)+depth;
 92 | 			unsigned char* t = tmp+depth;
 93 | 			while (*s == *t && *s) {
 94 | 				++s;
 95 | 				++t;
 96 | 			}
 97 | 			if (*s <= *t) break;
 98 | 			*j = *(j-1);
 99 | 			--j;
100 | 		}
101 | 		*j = tmp;
102 | 	}
103 | }
104 | 
105 | static
106 | void FillKeyBuffer(LPPSTR a, LPBYTE kb, UINT* count, UINT n, UINT d)
107 | {
108 | 	for (size_t i=0; i<n; ++i) {
109 | 		const unsigned char* str = a[i];
110 | 		unsigned j=0;
111 | 		for (; j<BS; ++j) {
112 | 			unsigned char c = str[d+j];
113 | 			kb[BS*i+j] = c;
114 | 			if (c==0) break;
115 | 		}
116 | 		if (j<BS) kb[BS*i+j] = 0;
117 | 	}
118 | 	// Make another sweep through data to calculate counts. Should be very
119 | 	// fast, because we access 'count' linearly from start to finish.
120 | 	for (size_t i=0; i<n; ++i) ++count[kb[BS*i]];
121 | }
122 | 
123 | static
124 | void RDFK(LPPSTR* GrpKP, LPPSTR a, UINT n, LPPSTR ta,
125 | 		UINT* count, UINT d)
126 | { /* Read Directly From Keys */
127 | 	LPPSTR ak, tc; UINT i, *cptr, gs; unsigned char c=0;
128 | 	for (i=0; i<n-n%32; i+=32) {
129 | 		unsigned char cache[32];
130 | 		for (unsigned j=0; j<32; ++j) cache[j] = a[i+j][d];
131 | 		for (unsigned j=0; j<32; ++j) ++count[cache[j]];
132 | 	}
133 | 	for (; i<n; i++) count[a[i][d]]++;
134 | 	cptr=&count[AL]; while (*cptr<1) cptr++;
135 | 	if (*cptr<n) gs=n;
136 | 	else { c=(cptr-&count[AL])+AL; gs=0; }
137 | 	if (!gs) {
138 | 		if (splittable(c)) push(a, 0, n, d+1);
139 | 		else if (n>1 && c>0) insertion_sort(a, n, d);
140 | 		count[c]=0; return;
141 | 	}
142 | 	GrpKP[AL]=a;
143 | 	for (ak=a, i=AL; i<AH; i++) GrpKP[i+1]=ak+=count[i];
144 | 	memcpy(ta, a, sizeof(LPSTR)*n);
145 | 	for (i=0, tc=ta; i<n; i++, tc++) {
146 | 		*GrpKP[ta[i][d]]=*tc; GrpKP[ta[i][d]]++;
147 | 	}
148 | 	for (ak=a, i=AL; i<AH; i++) {
149 | 		if (splittable(i)) push(ak, 0, count[i], d+1);
150 | 		else if (count[i]>1 && i>0) insertion_sort(ak, count[i], d);
151 | 		ak+=count[i]; count[i]=0;
152 | 	}
153 | }
154 | 
155 | void cradix_rantala(LPPSTR a, UINT n)
156 | {
157 | 	UINT kbsd, kbsd1, i, j, stage, d, MEMSIZE;
158 | 	UINT *cptr, gs, count[AS];
159 | 	LPSTR tj, tk, ax, tl, kb, ss, tt, GrpKB[AS];
160 | 	LPPSTR GrpKP[AS], ak, ta, tc, t;
161 | 	if (sizeof(LPPSTR)>sizeof(unsigned char)*BS)
162 | 		MEMSIZE=sizeof(LPPSTR);
163 | 	else
164 | 		MEMSIZE=sizeof(unsigned char)*BS;
165 | 	/* workspace */
166 | 	ta = (LPPSTR)malloc(n * MEMSIZE);
167 | 	/* memory for key buffers */
168 | 	tk = (LPBYTE)malloc(n * sizeof(unsigned char) * BS);
169 | 	tj=tk;
170 | 	push(a, tk, n, 0); for (i=AL; i<AH; i++) count[i]=0;
171 | 	while (!stackempty()) {
172 | 		pop(a, tk, n, stage);
173 | 		if (tk) {
174 | 			/* set the counters and
175 | 			   fill the key buffers if necessary */
176 | 			if ((d=stage%BS)!=0)
177 | 				for (i=0, tl=tk; i<n; i++, tl+=(BS-d))
178 | 					count[*tl]++;
179 | 			else {
180 | 				if (n>KBC)
181 | 					FillKeyBuffer(a, tk, count, n, stage);
182 | 				else {
183 | 					RDFK(GrpKP, a, n, ta, count, stage);
184 | 					continue;
185 | 				}
186 | 			}
187 | 			/* check if there is only 1 group */
188 | 			cptr=&count[AL];
189 | 			while (*cptr<1) cptr++;
190 | 			if (*cptr<n) gs=n; else gs=0;
191 | 			/* calculate both key ptr and
192 | 			   key buffer addresses */
193 | 			kbsd=BS-d, kbsd1=kbsd-1;
194 | 			GrpKP[AL]=a; GrpKB[AL]=tk;
195 | 			for (ak=a, ax=tk, i=AL; i<AH; i++) {
196 | 				GrpKP[i+1]=ak+=count[i];
197 | 				GrpKB[i+1]=ax+=count[i]*kbsd1;
198 | 			}
199 | 
200 | 			/* permute the key ptrs */
201 | 			memcpy(ta, a, sizeof(LPSTR)*gs);
202 | 			for (i=0, ax=tk, tc=ta; i<gs;
203 | 					i++, ax+=kbsd, tc++) {
204 | 				*GrpKP[*ax]=*tc; GrpKP[*ax]++;
205 | 			}
206 | 			/* permute the key buffers */
207 | 			memcpy(ta, tk, sizeof(unsigned char)*n*kbsd);
208 | 			for (i=0, kb=(LPBYTE)ta; i<n; i++, *t+=kbsd1) {
209 | 				t=&GrpKB[*kb]; ss=*t; tt=kb+1;
210 | 				for (j=0; j<kbsd1; j++)
211 | 				{ *ss=*tt; ss++; tt++; }
212 | 				kb+=kbsd;
213 | 			}
214 | 			/* down 1 level */
215 | 			for (ak=a, ax=tk, i=AL; i<AH; i++) {
216 | 				if (splittable(i))
217 | 				{ push(ak, ax, count[i], stage+1); }
218 | 				else if (count[i]>1 && i>0)
219 | 					insertion_sort(ak, count[i], stage);
220 | 				ak+=count[i]; ax+=count[i]*(kbsd1);
221 | 				count[i]=0;
222 | 			}
223 | 		}
224 | 		else RDFK(GrpKP, a, n, ta, count, stage);
225 | 	}
226 | 	free((void*)ta);
227 | 	free((void*)tj);
228 | }
229 | ROUTINE_REGISTER_SINGLECORE(cradix_rantala,
230 | 		"CRadix by Waihong Ng and Katsuhiko Kakehi,"
231 | 		" with modifications by Tommi Rantala")
232 | 


--------------------------------------------------------------------------------
/external/cradix.c:
--------------------------------------------------------------------------------
  1 | /* This source code is from the following article:
  2 |  *
  3 |  * @article{1226858,
  4 |  *     author = {Waihong Ng and Katsuhiko Kakehi},
  5 |  *     title = {Cache Efficient Radix Sort for String Sorting},
  6 |  *     journal = {IEICE Trans. Fundam. Electron. Commun. Comput. Sci.},
  7 |  *     volume = {E90-A},
  8 |  *     number = {2},
  9 |  *     year = {2007},
 10 |  *     issn = {0916-8508},
 11 |  *     pages = {457--466},
 12 |  *     doi = {http://dx.doi.org/10.1093/ietfec/e90-a.2.457},
 13 |  *     publisher = {Oxford University Press},
 14 |  *     address = {Oxford, UK},
 15 |  * }
 16 |  *
 17 |  * Appendix: Source Code of CRadix Sort
 18 |  */
 19 | 
 20 | /*
 21 |   This code is based on the program 3.1 of Engineering radix sort by P.M.
 22 |   McIlroy, K. Bostic and M.D. McIlroy, Comput. Syst. vol.6, 1993.
 23 | 
 24 |   The main improvement is the adoption of the key buffer. Key buffers are
 25 |   filled by the function FillKeyBuffer() while the following code in the main
 26 |   body is responsible for permuting the key buffers in the order as same as the
 27 |   access order as the key pointers
 28 | 
 29 |        memcpy(ta, tk, sizeof(unsigned char)*n*kbsd);
 30 |        for (i=0, kb=(LPBYTE)ta; i<n;
 31 |              i++, *t+=kbsd1) {
 32 |           t=&GrpKB[*kb]; ss=*t; tt=kb+1;
 33 |           for (j=0; j<kbsd1; j++)
 34 |               { *ss=*tt; ss++; tt++; }
 35 |           kb+=kbsd;
 36 |         }
 37 | 
 38 |   Other improvements described in Sect. 6 are as follows.
 39 | 
 40 |   The following statements in the main body implement
 41 |   the improvement described in Sect. 6.1
 42 | 
 43 |         cptr=&count[AL];
 44 | 	while (*cptr<1) cptr++;
 45 | 	if (*cptr<n) gs=n; else gs=0;
 46 | 
 47 |   RDFK() -- the function which read the keys directly as described in Sect. 6.2
 48 | 
 49 |   isort() - the insertion sort routine as described in Sect. 6.3
 50 | 
 51 |   Other modifications are mainly for assembling the improvements into the
 52 |   original code
 53 | */
 54 | 
 55 | #include "routine.h"
 56 | #include <stdlib.h>
 57 | #include <string.h>
 58 | 
 59 | #define AS 256 /* Alphabet size */
 60 | #define BS 4 /* key buffer size */
 61 | #define AL 0 /* Alphabet lower bound */
 62 | #define AH 255 /* Alphabet upper bound */
 63 | #define IC 20 /* Insertion sort cut off */
 64 | #define KBC 128 /* Cache cut off */
 65 | #define SS 4096 /* stack size */
 66 | 
 67 | #define push(a, k, n, b) sp->sa=a, sp->sk=k, sp->sn=n, (sp++)->sb=b
 68 | #define pop(a, k, n, b) a=(--sp)->sa, k=sp->sk, n=sp->sn, b=sp->sb
 69 | #define stackempty() (sp<=stack)
 70 | #define splittable(c) c > 0 && count[c] > IC
 71 | typedef size_t UINT;
 72 | typedef unsigned char BYTE, *LPBYTE, **LPPBYTE;
 73 | typedef unsigned char STR, *LPSTR, **LPPSTR, **STRPARR;
 74 | 
 75 | static struct Stack {
 76 | 	LPSTR* sa; LPBYTE sk;
 77 | 	int sn, sb;
 78 | } stack[SS], *sp=stack;
 79 | 
 80 | static
 81 | void FillKeyBuffer(LPPSTR a, LPBYTE kb, UINT* count, UINT n, UINT d)
 82 | {
 83 | 	UINT i, j; LPSTR c, x;
 84 | 	for (i=0; i<n; i++) {
 85 | 		x=a[i]+d; count[*x]++;
 86 | 		for (j=0, c=x; *c!=0 && j<BS; j++)
 87 | 		{ *kb=*c; kb++; c++; }
 88 | 		if (j<BS) { *kb='\0'; kb+=BS-j; }
 89 | 	}
 90 | }
 91 | static
 92 | void isort(unsigned char **a, int n, int d)
 93 | {
 94 | 	unsigned char **pi, **pj, *s, *t;
 95 | 	for (pi = a + 1; --n > 0; pi++)
 96 | 		for (pj = pi; pj > a; pj--) {
 97 | 			for (s=*(pj-1)+d, t=*pj+d;
 98 | 					*s==*t && *s!=0; s++, t++) ;
 99 | 			if (*s <= *t) break;
100 | 			t = *(pj); *(pj) = *(pj-1);
101 | 			*(pj-1) = t;
102 | 		}
103 | }
104 | static
105 | void RDFK(LPPSTR* GrpKP, LPPSTR a, UINT n, LPPSTR ta,
106 | 		UINT* count, UINT d)
107 | { /* Read Directly From Keys */
108 | 	LPPSTR ak, tc; UINT i, *cptr, gs; unsigned char c=0;
109 | 	for (i=0; i<n; i++) count[a[i][d]]++;
110 | 	cptr=&count[AL]; while (*cptr<1) cptr++;
111 | 	if (*cptr<n) gs=n;
112 | 	else { c=(cptr-&count[AL])+AL; gs=0; }
113 | 	if (!gs) {
114 | 		if (splittable(c)) push(a, 0, n, d+1);
115 | 		else if (n>1 && c>0) isort(a, n, d);
116 | 		count[c]=0; return;
117 | 	}
118 | 	GrpKP[AL]=a;
119 | 	for (ak=a, i=AL; i<AH; i++) GrpKP[i+1]=ak+=count[i];
120 | 	memcpy(ta, a, sizeof(LPSTR)*n);
121 | 	for (i=0, tc=ta; i<n; i++, tc++) {
122 | 		*GrpKP[ta[i][d]]=*tc; GrpKP[ta[i][d]]++;
123 | 	}
124 | 	for (ak=a, i=AL; i<AH; i++) {
125 | 		if (splittable(i)) push(ak, 0, count[i], d+1);
126 | 		else if (count[i]>1 && i>0) isort(ak, count[i], d);
127 | 		ak+=count[i]; count[i]=0;
128 | 	}
129 | }
130 | void CRadix(LPPSTR a, UINT n)
131 | {
132 | 	UINT kbsd, kbsd1, i, j, stage, d, MEMSIZE;
133 | 	UINT *cptr, gs, count[AS];
134 | 	LPSTR tj, tk, ax, tl, kb, ss, tt, GrpKB[AS];
135 | 	LPPSTR GrpKP[AS], ak, ta, tc, t;
136 | 	if (sizeof(LPPSTR)>sizeof(unsigned char)*BS)
137 | 		MEMSIZE=sizeof(LPPSTR);
138 | 	else
139 | 		MEMSIZE=sizeof(unsigned char)*BS;
140 | 	/* workspace */
141 | 	ta = (LPPSTR)malloc(n * MEMSIZE);
142 | 	/* memory for key buffers */
143 | 	tk = (LPBYTE)malloc(n * sizeof(unsigned char) * BS);
144 | 	tj=tk;
145 | 	push(a, tk, n, 0); for (i=AL; i<AH; i++) count[i]=0;
146 | 	while (!stackempty()) {
147 | 		pop(a, tk, n, stage);
148 | 		if (tk) {
149 | 			/* set the counters and
150 | 			   fill the key buffers if necessary */
151 | 			if ((d=stage%BS)!=0)
152 | 				for (i=0, tl=tk; i<n; i++, tl+=(BS-d))
153 | 					count[*tl]++;
154 | 			else {
155 | 				if (n>KBC)
156 | 					FillKeyBuffer(a, tk, count, n, stage);
157 | 				else {
158 | 					RDFK(GrpKP, a, n, ta, count, stage);
159 | 					continue;
160 | 				}
161 | 			}
162 | 			/* check if there is only 1 group */
163 | 			cptr=&count[AL];
164 | 			while (*cptr<1) cptr++;
165 | 			if (*cptr<n) gs=n; else gs=0;
166 | 			/* calculate both key ptr and
167 | 			   key buffer addresses */
168 | 			kbsd=BS-d, kbsd1=kbsd-1;
169 | 			GrpKP[AL]=a; GrpKB[AL]=tk;
170 | 			for (ak=a, ax=tk, i=AL; i<AH; i++) {
171 | 				GrpKP[i+1]=ak+=count[i];
172 | 				GrpKB[i+1]=ax+=count[i]*kbsd1;
173 | 			}
174 | 
175 | 			/* permute the key ptrs */
176 | 			memcpy(ta, a, sizeof(LPSTR)*gs);
177 | 			for (i=0, ax=tk, tc=ta; i<gs;
178 | 					i++, ax+=kbsd, tc++) {
179 | 				*GrpKP[*ax]=*tc; GrpKP[*ax]++;
180 | 			}
181 | 			/* permute the key buffers */
182 | 			memcpy(ta, tk, sizeof(unsigned char)*n*kbsd);
183 | 			for (i=0, kb=(LPBYTE)ta; i<n; i++, *t+=kbsd1) {
184 | 				t=&GrpKB[*kb]; ss=*t; tt=kb+1;
185 | 				for (j=0; j<kbsd1; j++)
186 | 				{ *ss=*tt; ss++; tt++; }
187 | 				kb+=kbsd;
188 | 			}
189 | 			/* down 1 level */
190 | 			for (ak=a, ax=tk, i=AL; i<AH; i++) {
191 | 				if (splittable(i))
192 | 				{ push(ak, ax, count[i], stage+1); }
193 | 				else if (count[i]>1 && i>0)
194 | 					isort(ak, count[i], stage);
195 | 				ak+=count[i]; ax+=count[i]*(kbsd1);
196 | 				count[i]=0;
197 | 			}
198 | 		}
199 | 		else RDFK(GrpKP, a, n, ta, count, stage);
200 | 	}
201 | 	free((void*)ta);
202 | 	free((void*)tj);
203 | }
204 | 
205 | void cradix(unsigned char **strings, size_t n)
206 | {
207 | 	return CRadix(strings, n);
208 | }
209 | ROUTINE_REGISTER_SINGLECORE(cradix,
210 | 		"CRadix by Waihong Ng and Katsuhiko Kakehi")
211 | 


--------------------------------------------------------------------------------
/external/forward8.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Forward radixsort with a fixed sized alphabet. The algorithm
  3 |    inspects one character at a time. This code will work well for
  4 |    alphabets of small size (8 bits). Larger alphabets (16 bits or
  5 |    more) may, however, require some heuristic to avoid inspecting
  6 |    empty buckets.
  7 | 
  8 |    S. Nilsson. Radix Sorting and Searching. PhD thesis, Department
  9 |    of Computer Science, Lund University, 1990.
 10 | 
 11 |    The code presented in this file has been tested with care but is
 12 |    not guaranteed for any purpose. The writer does not offer any
 13 |    warranties nor does he accept any liabilities with respect to
 14 |    the code.
 15 | 
 16 |    Stefan Nilsson, 8 jan 1997.
 17 | 
 18 |    Laboratory of Information Processing Science
 19 |    Helsinki University of Technology
 20 |    Stefan.Nilsson@hut.fi
 21 | */
 22 | 
 23 | #include "routine.h"
 24 | #include "nilsson.h"
 25 | #include <stdlib.h>
 26 | 
 27 | #define IS_ENDMARK(ch) (ch == '\0')
 28 | #define CHAR(s, p) s[p]
 29 | 
 30 | typedef struct grouprec *group;
 31 | typedef struct bucketrec *bucket;
 32 | 
 33 | struct grouprec {
 34 |    list head, tail; /* a list of elements */
 35 |    group next;      /* the next group */
 36 |    group nextunf;   /* the next unfinished group */
 37 |    group insp;      /* insertion point */
 38 |    boolean finis;   /* is the group finished? */
 39 | };
 40 | /* The group structure member insp is used to make splitting of
 41 |    groups possible during the phase where elements are moved from
 42 |    buckets back into their previous groups. The group structure
 43 |    member finis indicates if the elements in the group are sorted;
 44 |    this information makes it easy to skip finished groups during a
 45 |    traversal of the group data structure */
 46 | 
 47 | struct bucketrec {
 48 |    list head, tail; /* a list of elements */
 49 |    int size;        /* list length */
 50 |    group tag;       /* group tag */
 51 |    bucket next;     /* next bucket item */
 52 | };
 53 | 
 54 | static memory groupmem[1];
 55 | static memory bucketmem[1];
 56 | 
 57 | /* Put a list of elements into a bucket. We distinguish between two
 58 |    cases. If the first bucket item has the same tag as the list to
 59 |    be inserted the list is just appended, otherwise a new bucket
 60 |    is created. */
 61 | static void intobucket(bucket *b, list head, list tail,
 62 |                        int size, group g)
 63 | {
 64 |    bucket btemp = *b, newb;
 65 | 
 66 |    if (!btemp || btemp->tag != g) {   /* create new tag */
 67 |       newb = (bucket) allocmem(bucketmem, sizeof(struct bucketrec));
 68 |       newb->next = btemp;
 69 |       newb->head = head;
 70 |       newb->size = size;
 71 |       newb->tag = g;
 72 |       *b = btemp = newb;
 73 |    } else {   /* append */
 74 |       btemp->tail->next = head;
 75 |       btemp->size += size;
 76 |    }
 77 |    tail->next = NULL;
 78 |    btemp->tail = tail;
 79 | }
 80 | 
 81 | /* Travers the groups and put the elements into buckets.
 82 |    The parameter pos indicates the current position in the string.
 83 |    To be able to skip groups that are already sorted we keep track
 84 |    of the previous group. Also, the previously read character is
 85 |    recorded. In this way it is possible to move the elements in
 86 |    blocks consisting of strings that have a common character in
 87 |    position pos. Furthermore, a group that is not split during this
 88 |    phase is left behind and not put into a bucket. */
 89 | static void intobuckets(group g, bucket b[], int pos)
 90 | {
 91 |    group prevg;
 92 |    character ch, prevch;
 93 |    boolean split;
 94 |    list tail, tailn;
 95 |    int size;
 96 | 
 97 |    resetmem(bucketmem);
 98 |    for (prevg = g, g = g->nextunf ; g; g = g->nextunf) {
 99 |       if (g->finis)
100 |          {prevg->nextunf = g->nextunf; continue;}
101 |       tail = g->head; split = FALSE;
102 |       prevch = CHAR(tail->str, pos); size = 1;
103 |       for ( ; (tailn = tail->next); tail = tailn) {
104 |          ch = CHAR(tailn->str, pos); size++;
105 |          if (ch == prevch) continue;
106 |          intobucket(b+prevch, g->head, tail, size-1, g);
107 |          g->head = tailn; split = TRUE;
108 |          prevch = ch; size = 1;
109 |       }
110 |       if (split) {
111 |          intobucket(b+prevch, g->head, tail, size, g);
112 |          g->head = NULL;
113 |          prevg = g;
114 |       } else if (IS_ENDMARK(prevch))
115 |          prevg->nextunf = g->nextunf;
116 |       else
117 |          prevg = g;
118 |    }
119 | }
120 | 
121 | /* Put a list into group g and, at the same time, split g.
122 |    If two consecutive groups are both finished, there is no need
123 |    to perform any splitting. */
124 | static void intogroup(group g, list head, list tail, boolean finis)
125 | {
126 |    group newg;
127 | 
128 |    if (!g->head) {   /* back into old group */
129 |       g->head = head;
130 |       g->tail = tail;
131 |       g->finis = finis;
132 |       g->insp = g;
133 |    } else if (finis && g->insp->finis) {  /* don't split if both */
134 |       g->insp->tail->next = head;         /* groups are finished */
135 |       g->insp->tail = tail;
136 |    }
137 |    else {   /* split */
138 |       newg = (group) allocmem(groupmem, sizeof(struct grouprec));
139 |       newg->head = head;
140 |       newg->tail = tail;
141 |       newg->next = g->insp->next;
142 |       newg->nextunf = g->insp->nextunf;
143 |       newg->finis = finis;
144 |       g->insp = g->insp->nextunf = g->insp->next = newg;
145 |    }
146 | }
147 | 
148 | /* Traverse the buckets and put the elements back into their groups.
149 |    Split the groups and mark all finished groups.
150 |    The elements are moved in blocks. */
151 | static void intogroups(bucket b[], int pos)
152 | {
153 |    character ch;
154 |    bucket s;
155 |    boolean finis;
156 | 
157 |    for (ch = 0; ch < CHARS; ch++) {
158 |       if (!b[ch]) continue;
159 |       for (s = b[ch]; s; s = s->next) {
160 |          finis = IS_ENDMARK(ch);
161 |          if (s->size < INSERTBREAK && !finis) {
162 |             if (s->size > 1)
163 |                s->head = ListInsertsort(s->head, &s->tail, pos);
164 |             finis = TRUE;
165 |          }
166 |          intogroup(s->tag, s->head, s->tail, finis);
167 |       }
168 |       b[ch] = NULL;
169 |    }
170 | }
171 | 
172 | /* Travers the groups and return the elements in sorted order. */
173 | static list collect(group g)
174 | {
175 |    list head, tail;
176 | 
177 |    g = g->next;
178 |    head = g->head;
179 |    tail = g->tail;
180 |    for (g = g->next; g; g = g->next) {
181 |       tail->next = g->head;
182 |       tail = g->tail;
183 |    }
184 |    return head;
185 | }
186 | 
187 | static inline list forward1(list t, int n)
188 | {
189 |    static bucket b[CHARS];   /* buckets */
190 |    group g, g2;              /* groups */
191 |    int pos = 0;              /* pos in string */
192 | 
193 |    if (n<2) return t;
194 | 
195 |    initmem(groupmem, sizeof(struct grouprec), n/15);
196 |    initmem(bucketmem, sizeof(struct bucketrec), n/5);
197 | 
198 |    /* We use a dummy group g as the header of the group data
199 |       structure. It does not contain any elements, but only a
200 |       pointer to the first unfinished group. */
201 |    g = (group) allocmem(groupmem, sizeof(struct grouprec));
202 |    g2 = (group) allocmem(groupmem, sizeof(struct grouprec));
203 |    g->next = g->nextunf = g2;
204 |    g2->head = t;
205 |    g2->next = g2->nextunf = NULL; 
206 |    g2->finis = FALSE;
207 | 
208 |    intobuckets(g, b, pos);
209 |    while (g->nextunf) {
210 |       pos++;
211 |       intogroups(b, pos);
212 |       intobuckets(g, b, pos);
213 |    }
214 |    t = collect(g);
215 | 
216 |    freemem(bucketmem);
217 |    freemem(groupmem);
218 | 
219 |    return t;
220 | }
221 | 
222 | void frssort1(string strings[], size_t scnt)
223 | {
224 |    list ptr, listnodes;
225 |    size_t i;
226 | 
227 |     /* allocate memory based on the number of strings in the array */
228 |     ptr = listnodes = (list ) calloc(scnt, sizeof(struct listrec));
229 | 
230 |     /* point the linked list nodes to the strings in the array */
231 |     for( i=0; i<scnt; i++)
232 |     {
233 |         listnodes[i].str = strings[i];
234 |         if (i<(scnt-1))
235 |            listnodes[i].next = &listnodes[i+1];
236 |         else
237 |            listnodes[i].next = NULL;
238 |     }
239 | 
240 |     /* sort */
241 |     listnodes = forward1(listnodes, scnt);
242 | 
243 |     /* write the strings back into the array */
244 |     for (i = 0;  i < scnt ; i++, listnodes=listnodes->next)
245 |        strings[i] = listnodes->str;
246 | 
247 |     free(ptr);
248 | }
249 | 
250 | void forward8(unsigned char **strings, size_t n)
251 | {
252 | 	return frssort1(strings, n);
253 | }
254 | ROUTINE_REGISTER_SINGLECORE(forward8,
255 | 		"Forward Radix Sort 8-bit by Stefan Nilsson")
256 | 


--------------------------------------------------------------------------------
/external/lcp-quicksort.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <algorithm>
 3 | #include "routine.h"
 4 | 
 5 | typedef int Lcp;
 6 | 
 7 | inline int lcpstrcmp( unsigned char const * const p, unsigned char const * const q, Lcp &i) {
 8 |   for( ; !(q[i] - p[i]) && p[i]; i++ )
 9 |     ;
10 |   return q[i]-p[i];
11 | }
12 | 
13 | inline void exch( unsigned char  *strings[], Lcp lcps[], int I, int J) { 
14 |   std::swap(strings[I],strings[J]);
15 |   std::swap(lcps[I],lcps[J]);
16 | }
17 | 
18 | void strsort(unsigned char * strings[], Lcp lcps[], int lo, int hi );
19 | 
20 | template <bool ascending>
21 | void lcpsort( unsigned char * strings[], Lcp lcps[], int lo, int hi ) {
22 |   if ( hi <= lo ) return;
23 |   int lt = lo, gt = hi;
24 | 
25 |   Lcp pivot = lcps[lo];
26 |   for( int i = lo + 1; i <= gt; ) {
27 |     if      ( ascending ? lcps[i] > pivot : lcps[i] < pivot ) exch( strings, lcps, i, gt--);
28 |     else if ( ascending ? lcps[i] < pivot : lcps[i] > pivot ) exch( strings, lcps, lt++, i++);
29 |     else            i++;
30 |   }
31 | 
32 |   strsort( strings, lcps, lt, gt );
33 |   lcpsort<ascending>( strings, lcps, lo, lt-1 );
34 |   lcpsort<ascending>( strings, lcps, gt+1, hi );
35 | };
36 | 
37 | void strsort(unsigned char * strings[], Lcp lcps[], int lo, int hi )
38 | {
39 |   if ( hi <= lo ) return;
40 |   int lt = lo, gt = hi;
41 | 
42 |   unsigned char  *  pivotStr = strings[lo];
43 |   for( int i = lo + 1; i <= gt; )
44 |     {
45 |       int cmpr = lcpstrcmp( pivotStr, strings[i], lcps[i] );
46 |       if      (cmpr < 0) exch( strings, lcps, lt++, i++);
47 |       else if (cmpr > 0) exch( strings, lcps, i, gt--);
48 |       else            i++;
49 |     }
50 | 
51 |   lcpsort<true> ( strings, lcps, lo, lt-1 );
52 |   lcpsort<false>( strings, lcps, gt+1, hi );  
53 | };
54 | 
55 | extern "C" void lcpquicksort( unsigned char * strings[], size_t n ) {
56 |   Lcp *lcps = (Lcp *) calloc( n, sizeof(Lcp)); 
57 |   strsort( strings, lcps, 0, n-1 );
58 |   free(lcps);
59 | }
60 | 
61 | ROUTINE_REGISTER_SINGLECORE( lcpquicksort,
62 | 			    "LCP Quicksort by Kendall Willets")
63 | 


--------------------------------------------------------------------------------
/external/mbmradix.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Hybrid American flag sort (with stack control), a radix sort
  3 |    algorithm for arrays of character strings by McIlroy, Bostic,
  4 |    and McIlroy.
  5 | 
  6 |    P. M. McIlroy, K. Bostic, and M. D. McIlroy. Engineering radix
  7 |    sort. Computing Systems, 6(1):5-27, 1993.
  8 | 
  9 |    The code presented in this file has been tested with care but is
 10 |    not guaranteed for any purpose. The writer does not offer any
 11 |    warranties nor does he accept any liabilities with respect to
 12 |    the code.
 13 | 
 14 |    Stefan Nilsson, 2 jan 1997.
 15 | 
 16 |    Laboratory of Information Processing Science
 17 |    Helsinki University of Technology
 18 |    Stefan.Nilsson@hut.fi
 19 | */
 20 | 
 21 | #include "routine.h"
 22 | #include "utils.h"
 23 | 
 24 | enum { SIZE = 1024, THRESHOLD = 10 };
 25 | 
 26 | typedef struct { string *sa; int sn, si; } mbmstack_t;
 27 | 
 28 | static void simplesort(string a[], int n, int b)
 29 | {
 30 |    int i, j;
 31 |    string tmp;
 32 | 
 33 |    for (i = 1; i < n; i++)
 34 |       for (j = i; j > 0 && scmp(a[j-1]+b, a[j]+b) > 0; j--)
 35 |          { tmp = a[j]; a[j] = a[j-1]; a[j-1] = tmp; }
 36 | }
 37 | 
 38 | static void rsorta(string *a, int n, int b)
 39 | {
 40 | #define push(a, n, i)   sp->sa = a, sp->sn = n, (sp++)->si = i
 41 | #define pop(a, n, i)    a = (--sp)->sa, n = sp->sn, i = sp->si
 42 | #define stackempty()    (sp <= stack)
 43 | #define swap(p, q, r)   r = p, p = q, q = r
 44 |         mbmstack_t      stack[SIZE], *sp = stack, stmp, *oldsp, *bigsp;
 45 |         string          *pile[256], *ak, *an, r, t;
 46 |         static int      count[256], cmin, nc;
 47 |         int             *cp, c, cmax;
 48 | 
 49 |         push(a, n, b);
 50 | 
 51 |         while(!stackempty()) {
 52 |                 pop(a, n, b);
 53 |                 if(n < THRESHOLD) {
 54 |                         simplesort(a, n, b);
 55 |                         continue;
 56 |                 }
 57 |                 an = a + n;
 58 |                 if(nc == 0) {                       /* untallied? */
 59 |                         cmin = 255;                 /* tally */
 60 |                         for(ak = a; ak < an; ) {
 61 |                                 c = (*ak++)[b];
 62 |                                 if(++count[c] == 1 && c > 0) {
 63 |                                         if(c < cmin) cmin = c;
 64 |                                         nc++;
 65 |                                 }
 66 |                         }
 67 |                         if(sp+nc > stack+SIZE) {     /* stack overflow */
 68 |                                   rsorta(a, n, b);
 69 |                                   continue;
 70 |                         }
 71 |                 }
 72 |                 oldsp = bigsp = sp, c = 2;         /* logartihmic stack */
 73 |                 pile[0] = ak = a+count[cmax=0];    /* find places */
 74 |                 for(cp = count+cmin; nc > 0; cp++, nc--) {
 75 |                          while(*cp == 0) cp++;
 76 |                          if (*cp > 1) {
 77 |                                   if(*cp > c) c = *cp, bigsp = sp;
 78 |                                   push(ak, *cp, b+1);
 79 |                          }
 80 |                          pile[cmax = cp-count] = ak += *cp;
 81 |                 }
 82 |                 swap(*oldsp, *bigsp, stmp);
 83 |                 an -= count[cmax];                 /* permute home */
 84 |                 count[cmax] = 0;
 85 |                 for(ak = a; ak < an; ak += count[c], count[c] = 0) {
 86 |                         r = *ak;
 87 |                         while(--pile[c = r[b]] > ak)
 88 |                                 swap(*pile[c], r, t);
 89 |                         *ak = r;
 90 |                                 /* here nc = count[...] = 0 */
 91 |                 }
 92 |         }
 93 | }
 94 | 
 95 | void mbmradix(string a[], size_t n)
 96 | { rsorta(a, n, 0); }
 97 | 
 98 | ROUTINE_REGISTER_SINGLECORE(mbmradix,
 99 | 	"MSD Radix Sort by P. M. McIlroy, K. Bostic, and M. D. McIlroy")
100 | 


--------------------------------------------------------------------------------
/external/mkqsort.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Multikey quicksort, a radix sort algorithm for arrays of character
  3 |    strings by Bentley and Sedgewick.
  4 | 
  5 |    J. Bentley and R. Sedgewick. Fast algorithms for sorting and
  6 |    searching strings. In Proceedings of 8th Annual ACM-SIAM Symposium
  7 |    on Discrete Algorithms, 1997.
  8 | 
  9 |    http://www.CS.Princeton.EDU/~rs/strings/index.html
 10 | 
 11 |    The code presented in this file has been tested with care but is
 12 |    not guaranteed for any purpose. The writer does not offer any
 13 |    warranties nor does he accept any liabilities with respect to
 14 |    the code.
 15 | 
 16 |    Ranjan Sinha, 1 jan 2003.
 17 | 
 18 |    School of Computer Science and Information Technology,
 19 |    RMIT University, Melbourne, Australia
 20 |    rsinha@cs.rmit.edu.au
 21 | 
 22 | */
 23 | 
 24 | #include "utils.h"
 25 | 
 26 | /* MULTIKEY QUICKSORT */
 27 | 
 28 | #ifndef min
 29 | #define min(a, b) ((a)<=(b) ? (a) : (b))
 30 | #endif
 31 | 
 32 | /* ssort2 -- Faster Version of Multikey Quicksort */
 33 | 
 34 | void vecswap2(unsigned char **a, unsigned char **b, int n)
 35 | {   while (n-- > 0) {
 36 |         unsigned char *t = *a;
 37 |         *a++ = *b;
 38 |         *b++ = t;
 39 |     }
 40 | }
 41 | 
 42 | #define swap2(a, b) { t = *(a); *(a) = *(b); *(b) = t; }
 43 | #define ptr2char(i) (*(*(i) + depth))
 44 | 
 45 | unsigned char **med3func(unsigned char **a, unsigned char **b, unsigned char **c, int depth)
 46 | {   int va, vb, vc;
 47 |     if ((va=ptr2char(a)) == (vb=ptr2char(b)))
 48 |         return a;
 49 |     if ((vc=ptr2char(c)) == va || vc == vb)
 50 |         return c;       
 51 |     return va < vb ?
 52 |           (vb < vc ? b : (va < vc ? c : a ) )
 53 |         : (vb > vc ? b : (va < vc ? a : c ) );
 54 | }
 55 | #define med3(a, b, c) med3func(a, b, c, depth)
 56 | 
 57 | void mkqsort(unsigned char **a, int n, int depth)
 58 | {   int d, r, partval;
 59 |     unsigned char **pa, **pb, **pc, **pd, **pl, **pm, **pn, *t;
 60 |     if (n < 20) {
 61 |         inssort(a, n, depth);
 62 |         return;
 63 |     }
 64 |     pl = a;
 65 |     pm = a + (n/2);
 66 |     pn = a + (n-1);
 67 |     if (n > 30) { /* On big arrays, pseudomedian of 9 */
 68 |         d = (n/8);
 69 |         pl = med3(pl, pl+d, pl+2*d);
 70 |         pm = med3(pm-d, pm, pm+d);
 71 |         pn = med3(pn-2*d, pn-d, pn);
 72 |     }
 73 |     pm = med3(pl, pm, pn);
 74 |     swap2(a, pm);
 75 |     partval = ptr2char(a);
 76 |     pa = pb = a + 1;
 77 |     pc = pd = a + n-1;
 78 |     for (;;) {
 79 |         while (pb <= pc && (r = ptr2char(pb)-partval) <= 0) {
 80 |             if (r == 0) { swap2(pa, pb); pa++; }
 81 |             pb++;
 82 |         }
 83 |         while (pb <= pc && (r = ptr2char(pc)-partval) >= 0) {
 84 |             if (r == 0) { swap2(pc, pd); pd--; }
 85 |             pc--;
 86 |        }
 87 |         if (pb > pc) break;
 88 |         swap2(pb, pc);
 89 |         pb++;
 90 |         pc--;
 91 |     }
 92 |     pn = a + n;
 93 |     r = min(pa-a, pb-pa);    vecswap2(a,  pb-r, r);
 94 |     r = min(pd-pc, pn-pd-1); vecswap2(pb, pn-r, r);
 95 |     if ((r = pb-pa) > 1)
 96 |         mkqsort(a, r, depth);
 97 |     if (ptr2char(a + r) != 0)
 98 |         mkqsort(a + r, pa-a + pn-pd-1, depth+1);
 99 |     if ((r = pd-pc) > 1)
100 |         mkqsort(a + n-r, r, depth);
101 | }
102 | 
103 | void mkqsort_main(unsigned char **a, int n) { mkqsort(a, n, 0); }
104 | 


--------------------------------------------------------------------------------
/external/msd.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    MSD radix sort with a fixed sized alphabet.
  3 | 
  4 |    S. Nilsson. Radix Sorting and Searching. PhD thesis, Department
  5 |    of Computer Science, Lund University, 1990.
  6 | 
  7 |    The code presented in this file has been tested with care but is
  8 |    not guaranteed for any purpose. The writer does not offer any
  9 |    warranties nor does he accept any liabilities with respect to
 10 |    the code.
 11 | 
 12 |    Stefan Nilsson, 8 jan 1997.
 13 | 
 14 |    Laboratory of Information Processing Science
 15 |    Helsinki University of Technology
 16 |    Stefan.Nilsson@hut.fi
 17 | */
 18 | 
 19 | #include "routine.h"
 20 | #include "nilsson.h"
 21 | #include <stdlib.h>
 22 | 
 23 | #define CHAR(s, p) s[p]
 24 | 
 25 | typedef struct bucketrec {
 26 |    list head, tail;
 27 |    int size;   /* size of list, 0 if already sorted */
 28 | } bucket;
 29 | 
 30 | typedef struct stackrec {
 31 |    list head, tail;
 32 |    int size;   /* size of list, 0 if already sorted */
 33 |    int pos;    /* current position in string */
 34 | } stack;
 35 | 
 36 | static memory stackmem[1];
 37 | static stack *stackp;
 38 | 
 39 | static void push(list head, list tail, int size, int pos)
 40 | {   
 41 |    stackp = (stack *) allocmem(stackmem, sizeof(struct stackrec));
 42 |    stackp->head = head;
 43 |    stackp->tail = tail;
 44 |    stackp->size = size;
 45 |    stackp->pos = pos;
 46 | }
 47 | 
 48 | static stack *pop()
 49 | {
 50 |    stack *temp;
 51 | 
 52 |    temp = stackp;
 53 |    stackp = (stack *) deallocmem(stackmem, sizeof(struct stackrec));
 54 |    return temp;
 55 | }
 56 | 
 57 | static stack *top()
 58 | {
 59 |    return stackp;
 60 | }
 61 | 
 62 | static boolean stackempty()
 63 | {
 64 |    return !stackp;
 65 | }
 66 | 
 67 | /* Put a list of elements into a bucket. The minimum and maximum
 68 |    character seen so far (chmin, chmax) are updated when the bucket
 69 |    is updated for the first time. */
 70 | static void intobucket(bucket *b, list h, list t, int size,
 71 |                        character ch, character *chmin, character *chmax)
 72 | {
 73 |    if (!b->head) {
 74 |       b->head = h;
 75 |       b->tail = t;
 76 |       b->size = size;
 77 |       if (ch != '\0') {
 78 |          if (ch < *chmin) *chmin = ch;
 79 |          if (ch > *chmax) *chmax = ch;
 80 |       }
 81 |    } else {
 82 |       b->tail->next = h;
 83 |       b->tail = t;
 84 |       b->size += size;
 85 |    }
 86 | }
 87 | 
 88 | /* Put the list in a bucket onto the stack. If the list is small
 89 |    (contains at most INSERTBREAK elements) sort it using insertion
 90 |    sort. If both the the list on top of the stack and the list to
 91 |    be added to the stack are already sorted the new list is appended
 92 |    to the end of the list on the stack and no new stack record is
 93 |    created. */
 94 | static void ontostack(bucket *b, int pos)
 95 | {
 96 |    b->tail->next = NULL;
 97 |    if (b->size <= INSERTBREAK) {
 98 |       if (b->size > 1)
 99 |          b->head = ListInsertsort(b->head, &b->tail, pos);
100 |       b->size = 0;   /* sorted */
101 |    }
102 |    if (!b->size && !stackempty() && !top()->size) {
103 |       top()->tail->next = b->head;
104 |       top()->tail = b->tail;
105 |    }
106 |    else {
107 |       push(b->head, b->tail, b->size, pos);
108 |       b->size = 0;
109 |    }
110 |    b->head = NULL;
111 | }
112 | 
113 | /* Traverse a list and put the elements into buckets according
114 |    to the character in position pos. The elements are moved in
115 |    blocks consisting of strings that have a common character in
116 |    position pos. We keep track of the minimum and maximum nonzero
117 |    characters encountered. In this way we may avoid looking at
118 |    some empty buckets when we traverse the buckets in ascending
119 |    order and push the lists onto the stack */
120 | static void bucketing(list a, int pos)
121 | {
122 |    static bucket b[CHARS];
123 |    bucket *bp;
124 |    character ch, prevch;
125 |    character chmin = CHARS-1, chmax = 0;
126 |    list t = a, tn;
127 |    int size = 1;
128 | 
129 |    prevch = CHAR(t->str, pos);
130 |    for ( ; (tn = t->next); t = tn) {
131 |       ch = CHAR(tn->str, pos); size++;
132 |       if (ch == prevch) continue;
133 |       intobucket(b+prevch, a, t, size-1, prevch, &chmin, &chmax);
134 |       a = tn;
135 |       prevch = ch;
136 |       size = 1;
137 |    }
138 |    intobucket(b+prevch, a, t, size, prevch, &chmin, &chmax);
139 | 
140 |    if (b->head) {    /* ch = '\0', end of string */
141 |       b->size = 0;   /* already sorted */
142 |       ontostack(b, pos);
143 |    }
144 |    for (bp = b + chmin; bp <= b + chmax; bp++)
145 |       if (bp->head) ontostack(bp, pos+1);
146 | }
147 | 
148 | list MSD1(list a, int n)
149 | {
150 |    list res = NULL;
151 |    stack *s;
152 | 
153 |    if (n < 2) return a;
154 |    initmem(stackmem, sizeof(struct stackrec), n/50);
155 |    push(a, NULL, n, 0);
156 | 
157 |    while (!stackempty()) {
158 |       s = pop();
159 |       if (!s->size) {   /* sorted */
160 |          s->tail->next = res;
161 |          res = s->head;
162 |          continue;
163 |       }
164 |       bucketing(s->head, s->pos);
165 |    }
166 | 
167 |    freemem(stackmem);
168 |    return res;
169 | }
170 | 
171 | void MSDsort(string strings[], size_t scnt)
172 | {
173 |    list ptr, listnodes;
174 |    size_t i;
175 | 
176 |     /* allocate memory based on the number of strings in the array */
177 |     ptr = listnodes = (list ) calloc(scnt, sizeof(struct listrec));
178 | 
179 |     /* point the linked list nodes to the strings in the array */
180 |     for( i=0; i<scnt; i++)
181 |     {
182 |         listnodes[i].str = strings[i];
183 |         if (i<(scnt-1))
184 |            listnodes[i].next = &listnodes[i+1];
185 |         else
186 |            listnodes[i].next = NULL;
187 |     }
188 | 
189 |     /* sort */
190 |     listnodes = MSD1(listnodes, scnt);
191 | 
192 |     /* write the strings back into the array */
193 |     for (i = 0;  i < scnt ; i++, listnodes=listnodes->next)
194 |        strings[i] = listnodes->str;
195 | 
196 |     free(ptr);
197 | }
198 | 
199 | void msd_nilsson(unsigned char **strings, size_t n)
200 | {
201 | 	return MSDsort(strings, n);
202 | }
203 | ROUTINE_REGISTER_SINGLECORE(msd_nilsson, "MSD Radix Sort by Stefan Nilsson")
204 | 


--------------------------------------------------------------------------------
/external/multikey.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Multikey quicksort, a radix sort algorithm for arrays of character
  3 |    strings by Bentley and Sedgewick.
  4 | 
  5 |    J. Bentley and R. Sedgewick. Fast algorithms for sorting and
  6 |    searching strings. In Proceedings of 8th Annual ACM-SIAM Symposium
  7 |    on Discrete Algorithms, 1997.
  8 | 
  9 |    http://www.CS.Princeton.EDU/~rs/strings/index.html
 10 | 
 11 |    The code presented in this file has been tested with care but is
 12 |    not guaranteed for any purpose. The writer does not offer any
 13 |    warranties nor does he accept any liabilities with respect to
 14 |    the code.
 15 | 
 16 |    Stefan Nilsson, 8 jan 1997.
 17 | 
 18 |    Laboratory of Information Processing Science
 19 |    Helsinki University of Technology
 20 |    Stefan.Nilsson@hut.fi
 21 | */
 22 | 
 23 | #include "routine.h"
 24 | #include "utils.h"
 25 | #include <stdlib.h>
 26 | 
 27 | #ifndef min
 28 | #define min(a, b) ((a)<=(b) ? (a) : (b))
 29 | #endif
 30 | 
 31 | #define swap(a, b) { string t=x[a]; \
 32 |                      x[a]=x[b]; x[b]=t; }
 33 | #define i2c(i) x[i][depth]
 34 | 
 35 | static void vecswap(int i, int j, int n, string x[])
 36 | {   while (n-- > 0) {
 37 |         swap(i, j);
 38 |         i++;
 39 |         j++;
 40 |     }
 41 | }
 42 | 
 43 | static void ssort1(string x[], int n, int depth)
 44 | {   int    a, b, c, d, r, v;
 45 |     if (n <= 1)
 46 |         return;
 47 |     a = rand() % n;
 48 |     swap(0, a);
 49 |     v = i2c(0);
 50 |     a = b = 1;
 51 |     c = d = n-1;
 52 |     for (;;) {
 53 |         while (b <= c && (r = i2c(b)-v) <= 0) {
 54 |             if (r == 0) { swap(a, b); a++; }
 55 |             b++;
 56 |         }
 57 |         while (b <= c && (r = i2c(c)-v) >= 0) {
 58 |             if (r == 0) { swap(c, d); d--; }
 59 |             c--;
 60 |         }
 61 |         if (b > c) break;
 62 |         swap(b, c);
 63 |         b++;
 64 |         c--;
 65 |     }
 66 |     r = min(a, b-a);     vecswap(0, b-r, r, x);
 67 |     r = min(d-c, n-d-1); vecswap(b, n-r, r, x);
 68 |     r = b-a; ssort1(x, r, depth);
 69 |     if (i2c(r) != 0)
 70 |         ssort1(x + r, a + n-d-1, depth+1);
 71 |     r = d-c; ssort1(x + n-r, r, depth);
 72 | }
 73 | 
 74 | void multikey1(string x[], int n)
 75 | { ssort1(x, n, 0); }
 76 | 
 77 | 
 78 | /* ssort2 -- Faster Version of Multikey Quicksort */
 79 | 
 80 | static void vecswap2(string *a, string *b, int n)
 81 | {   while (n-- > 0) {
 82 |         string t = *a;
 83 |         *a++ = *b;
 84 |         *b++ = t;
 85 |     }
 86 | }
 87 | 
 88 | #define swap2(a, b) { t = *(a); *(a) = *(b); *(b) = t; }
 89 | #define ptr2char(i) (*(*(i) + depth))
 90 | 
 91 | static string *med3func(string *a, string *b, string *c, int depth)
 92 | {   int va, vb, vc;
 93 |     if ((va=ptr2char(a)) == (vb=ptr2char(b)))
 94 |         return a;
 95 |     if ((vc=ptr2char(c)) == va || vc == vb)
 96 |         return c;       
 97 |     return va < vb ?
 98 |           (vb < vc ? b : (va < vc ? c : a ) )
 99 |         : (vb > vc ? b : (va < vc ? a : c ) );
100 | }
101 | #define med3(a, b, c) med3func(a, b, c, depth)
102 | 
103 | static void insertsort(string *a, int n, int d)
104 | {   string *pi, *pj, s, t;
105 |     for (pi = a + 1; --n > 0; pi++)
106 |         for (pj = pi; pj > a; pj--) {
107 |             /* Inline strcmp: break if *(pj-1) <= *pj */
108 |             for (s=*(pj-1)+d, t=*pj+d; *s==*t && *s!=0; s++, t++)
109 |                 ;
110 |             if (*s <= *t)
111 |                 break;
112 |             swap2(pj, pj-1);
113 |     }
114 | }
115 | 
116 | static void ssort2(string a[], int n, int depth)
117 | {   int d, r, partval;
118 |     string *pa, *pb, *pc, *pd, *pl, *pm, *pn, t;
119 |     if (n < 10) {
120 |         insertsort(a, n, depth);
121 |         return;
122 |     }
123 |     pl = a;
124 |     pm = a + (n/2);
125 |     pn = a + (n-1);
126 |     if (n > 30) { /* On big arrays, pseudomedian of 9 */
127 |         d = (n/8);
128 |         pl = med3(pl, pl+d, pl+2*d);
129 |         pm = med3(pm-d, pm, pm+d);
130 |         pn = med3(pn-2*d, pn-d, pn);
131 |     }
132 |     pm = med3(pl, pm, pn);
133 |     swap2(a, pm);
134 |     partval = ptr2char(a);
135 |     pa = pb = a + 1;
136 |     pc = pd = a + n-1;
137 |     for (;;) {
138 |         while (pb <= pc && (r = ptr2char(pb)-partval) <= 0) {
139 |             if (r == 0) { swap2(pa, pb); pa++; }
140 |             pb++;
141 |         }
142 |         while (pb <= pc && (r = ptr2char(pc)-partval) >= 0) {
143 |             if (r == 0) { swap2(pc, pd); pd--; }
144 |             pc--;
145 |         }
146 |         if (pb > pc) break;
147 |         swap2(pb, pc);
148 |         pb++;
149 |         pc--;
150 |     }
151 |     pn = a + n;
152 |     r = min(pa-a, pb-pa);    vecswap2(a,  pb-r, r);
153 |     r = min(pd-pc, pn-pd-1); vecswap2(pb, pn-r, r);
154 |     if ((r = pb-pa) > 1)
155 |         ssort2(a, r, depth);
156 |     if (ptr2char(a + r) != 0)
157 |         ssort2(a + r, pa-a + pn-pd-1, depth+1);
158 |     if ((r = pd-pc) > 1)
159 |         ssort2(a + n-r, r, depth);
160 | }
161 | 
162 | void multikey2(string a[], size_t n) { ssort2(a, n, 0); }
163 | 
164 | void mkqsort_bs(unsigned char **strings, size_t n)
165 | {
166 | 	return multikey2(strings, n);
167 | }
168 | ROUTINE_REGISTER_SINGLECORE(mkqsort_bs,
169 | 		"Multi-Key-Quicksort by J. Bentley and R. Sedgewick")
170 | 


--------------------------------------------------------------------------------
/external/nilsson.h:
--------------------------------------------------------------------------------
 1 | #ifndef NILSSON_H
 2 | #define NILSSON_H
 3 | 
 4 | #include "utils.h"
 5 | 
 6 | #define MAXBLOCKS 100
 7 | #define TRUE 1
 8 | #define FALSE 0
 9 | #define MAX(a, b) ((a) > (b) ? (a) : (b))
10 | 
11 | typedef int boolean;
12 | typedef int character;
13 | 
14 | typedef struct listrec *list;
15 | struct listrec {
16 | 	string str;
17 | 	list next;
18 | 	int length;
19 | };
20 | 
21 | typedef struct {
22 | 	void *block[MAXBLOCKS];
23 | 	int allocnr;
24 | 	int nr;
25 | 	int blocksize;
26 | 	void *current, *first, *last;
27 | } memory;
28 | 
29 | void initmem(memory *m, int elemsize, int blocksize);
30 | void *allocmem(memory *m, int elemsize);
31 | void *deallocmem(memory *m, int elemsize);
32 | void resetmem(memory *m);
33 | void freemem(memory *m);
34 | 
35 | list ListInsertsort(list head, list *tail , int length);
36 | 
37 | #endif //NILSSON_H
38 | 


--------------------------------------------------------------------------------
/external/parallel_string_radix_sort.cpp:
--------------------------------------------------------------------------------
 1 | #include "routine.h"
 2 | #include "parallel_string_radix_sort.h"
 3 | 
 4 | namespace parallel_string_radix_sort {
 5 | namespace internal {
 6 | template<> class Compare<const unsigned char*> {
 7 | public:
 8 | 	explicit Compare(int depth) : depth_(depth) {}
 9 | 	inline bool operator()(const unsigned char* const a,
10 | 			const unsigned char* const b) {
11 | 		return strcmp((char*)a + depth_, (char*)b + depth_) < 0;
12 | 	}
13 | private:
14 | 	int depth_;
15 | };
16 | }
17 | }
18 | 
19 | void parallel_msd_radix_sort(unsigned char **strings, size_t count)
20 | {
21 | 	parallel_string_radix_sort::Sort<const unsigned char *>(
22 | 			(const unsigned char **)strings, count);
23 | }
24 | 
25 | ROUTINE_REGISTER_MULTICORE(parallel_msd_radix_sort,
26 | 		"Parallel MSD radix sort by Takuya Akiba")
27 | 


--------------------------------------------------------------------------------
/external/quicksort.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |    A stripped down version of a quicksort algorithm by Bentley
 3 |    and McIlroy. It sorts an array of pointers to strings.
 4 | 
 5 |    J. L. Bentley and M. D. McIlroy. Engineering a sort function.
 6 |    Software---Practice and Experience, 23(11):1249-1265, 1993.
 7 | 
 8 |    The code presented in this file has been tested with care but is
 9 |    not guaranteed for any purpose. The writer does not offer any
10 |    warranties nor does he accept any liabilities with respect to
11 |    the code.
12 | 
13 |    Stefan Nilsson, 2 jan 1997.
14 | 
15 |    Laboratory of Information Processing Science
16 |    Helsinki University of Technology
17 |    Stefan.Nilsson@hut.fi
18 | */
19 | 
20 | #include "routine.h"
21 | #include <stddef.h>
22 | #include "utils.h"
23 | 
24 | #define swap(a, b)  (t = (a), (a) = (b), (b) = t)
25 | static void vecswap(int pa, int pb, int n, string a[])
26 | {
27 |    string t;
28 |    for( ; n > 0; pa++, pb++, n--)
29 |       swap(a[pa], a[pb]);
30 | }
31 | 
32 | #define min(a, b) ((a) < (b) ? (a) : (b))
33 | 
34 | static int med3(int pa, int pb, int pc, string a[])
35 | {   return scmp(a[pa], a[pb]) < 0 ?
36 |        (scmp(a[pb], a[pc]) < 0 ? pb : scmp(a[pa], a[pc]) < 0 ? pc : pa)
37 |      : (scmp(a[pb], a[pc]) > 0 ? pb : scmp(a[pa], a[pc]) > 0 ? pc : pa);
38 | }
39 | 
40 | void quicksort(string a[], size_t n)
41 | {
42 |    int pa, pb, pc, pd, pl, pm, pn, r, s;
43 |    string t;
44 |    string pv;
45 | 
46 |    if (n < 10) {       /* Insertion sort on smallest arrays */
47 |       for (pm = 1; pm < n; pm++)
48 |          for (pl = pm; pl > 0 && scmp(a[pl-1], a[pl]) > 0; pl--)
49 |             swap(a[pl], a[pl-1]);
50 |       return;
51 |    }
52 |    pm = n/2;               /* Small arrays, middle element */
53 |    if (n > 7) {
54 |       pl = 0;
55 |       pn = n-1;
56 |       if (n > 40) {       /* Big arrays, pseudomedian of 9 */
57 |          s = n/8;
58 |          pl = med3(pl, pl+s, pl+2*s, a);
59 |          pm = med3(pm-s, pm, pm+s, a);
60 |          pn = med3(pn-2*s, pn-s, pn, a);
61 |       }
62 |       pm = med3(pl, pm, pn, a);      /* Mid-size, med of 3 */
63 |    }
64 |    pv = a[pm];
65 |    pa = pb = 0;
66 |    pc = pd = n-1;
67 |    for (;;) {
68 |       while (pb <= pc && (r = scmp(a[pb], pv)) <= 0) {
69 |          if (r == 0) { swap(a[pa], a[pb]); pa++; }
70 |          pb++;
71 |       }
72 |       while (pc >= pb && (r = scmp(a[pc], pv)) >= 0) {
73 |          if (r == 0) { swap(a[pc], a[pd]); pd--; }
74 |          pc--;
75 |       }
76 |       if (pb > pc) break;
77 |       swap(a[pb], a[pc]);
78 |       pb++;
79 |       pc--;
80 |    }
81 |    pn = n;
82 |    s = min(pa,  pb-pa   ); vecswap(0,  pb-s, s, a);
83 |    s = min(pd-pc, pn-pd-1); vecswap(pb, pn-s, s, a);
84 |    if ((s = pb-pa) > 1) quicksort(a,    s);
85 |    if ((s = pd-pc) > 1) quicksort(&a[pn-s], s);
86 | }
87 | 
88 | ROUTINE_REGISTER_SINGLECORE(quicksort,
89 | 		"Quicksort by J. L. Bentley and M. D. McIlroy")
90 | 


--------------------------------------------------------------------------------
/external/utils.c:
--------------------------------------------------------------------------------
 1 | int scmp( unsigned char *s1, unsigned char *s2 )
 2 | {
 3 |     while( *s1 != '\0' && *s1 == *s2 )
 4 |     {
 5 |         s1++;
 6 |         s2++;
 7 |     }
 8 |     return( *s1-*s2 );
 9 | }
10 | 
11 | void
12 | inssort(unsigned char** a, int n, int d)
13 | {
14 | 	unsigned char** pi;
15 | 	unsigned char** pj;
16 | 	unsigned char* s;
17 | 	unsigned char* t;
18 | 
19 | 	for (pi = a + 1; --n > 0; pi++) {
20 | 		unsigned char* tmp = *pi;
21 | 
22 | 		for (pj = pi; pj > a; pj--) {
23 | 			for (s=*(pj-1)+d, t=tmp+d; *s==*t && *s!=0; ++s, ++t)
24 | 				;
25 | 			if (*s <= *t)
26 | 				break;
27 | 			*pj = *(pj-1);
28 | 		}
29 | 		*pj = tmp;
30 | 	}
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/external/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H
 2 | #define UTILS_H
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | #define CHARS 256
 7 | #define INSERTBREAK 20
 8 | typedef unsigned char* string;
 9 | 
10 | void mkqsort(unsigned char **, int n, int depth);
11 | void inssort(unsigned char **, int n, int depth);
12 | int  scmp(unsigned char*, unsigned char*);
13 | 
14 | #endif //UTILS_H
15 | 


--------------------------------------------------------------------------------
/report/Makefile:
--------------------------------------------------------------------------------
 1 | default: report.html
 2 | 
 3 | algs.xml: generate-algs-xml Makefile ./sortstring
 4 | 	./generate-algs-xml
 5 | 
 6 | report.html: Makefile htmlreport.xsl algs.xml sortable.js
 7 | 	xsltproc htmlreport.xsl algs.xml >report.html
 8 | 
 9 | clean:
10 | 	rm -f algs.xml
11 | 	rm -f report.html
12 | 


--------------------------------------------------------------------------------
/report/README:
--------------------------------------------------------------------------------
 1 | In order to generate the HTML based comparison sheet, perform the following steps.
 2 | 
 3 | 0. Obtain input files, I've used the data sets Sinha&Zobel used in their
 4 |    Burstsort experiments. You can find them via Google. They are about
 5 |    300 megabytes each.
 6 | 
 7 | 1. Run the ''benchmark'' script, takes about 12 hours if you run all algorithms
 8 |    and three input files, each about 300MB. This runs each algorithm seven
 9 |    times for each input file.
10 | 
11 | 2. Run OProfile with the ''collect-oprofile-statistics'' script. Takes about 12
12 |    hours. Requires root priviledges.
13 | 
14 | 3. Run memusage with the ''collect-memusage-statistics'' script. Takes about 30
15 |    minutes.
16 | 
17 | 4. Process the OProfile results with ''process-oprofile-statistics'' script.
18 |    Takes about one hour.
19 | 
20 | 5. Process memusages results with ''process-memusage-statistics'' script.
21 |    Takes a minute or two.
22 | 
23 | 6. Finally, run ''make''. This will generate the HTML file based on the results
24 |    of the previous steps.
25 | 
26 | 
27 | NOTES:
28 | *) You can create results for smaller subsets of available algorithms using
29 |    environmental variables, for example:
30 |       export ALGS="1 2 3"
31 |    OR:
32 |       ALGS="1 2 3" ./collect-oprofile-statistics
33 |       ALGS="1 2 3" ./collect-memusage-statistics
34 |       etc.
35 | 


--------------------------------------------------------------------------------
/report/benchmark:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ################################################################################
 3 | # Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to
 7 | # deal in the Software without restriction, including without limitation the
 8 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 9 | # sell copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 | # IN THE SOFTWARE.
22 | ################################################################################
23 | function die() {
24 | 	echo "ERROR: $1"
25 | 	exit 1
26 | }
27 | ################################################################################
28 | if [[ -z $BIN  ]] ; then BIN=./sortstring ; fi
29 | if [[ ! -x $BIN ]] ; then die "Sorry, binary not executable" ; fi
30 | if [[ -z $ALGS ]] ; then ALGS=`$BIN --alg-nums` ; fi
31 | if [[ -z $INFILES ]] ; then INFILES="input/url3 input/nodup3 input/genome3" ; fi
32 | if [[ -z $OUTDIR ]] ; then OUTDIR="data" ; fi
33 | mkdir -p $OUTDIR
34 | for I in $INFILES ; do
35 | 	if [[ ! -r $I ]] ; then die "Sorry, ''$I'' not readable" ; fi
36 | done
37 | ################################################################################
38 | let ALGCOUNT=0; for A in $ALGS ; do let ++ALGCOUNT; done
39 | ################################################################################
40 | echo "ALGS=$ALGS"
41 | echo "INFILES=$INFILES"
42 | echo "Starting measurements ..."
43 | for I in $INFILES ; do
44 | 	echo "Input file $I ..."
45 | 	KDIALOG=`which kdialog 2>/dev/null`
46 | 	if [[ -x "$KDIALOG" ]] ; then
47 | 		# dont spawn kdialog in a subshell
48 | 		kdialog --progressbar "-" $((7*$ALGCOUNT)) 2>/dev/null >.tmp
49 | 		DCOPREF=`cat .tmp`
50 | 		rm .tmp
51 | 	fi
52 | 	if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setAutoClose 1 ; fi
53 | 	for A in $ALGS ; do
54 | 		if [[ -n "$DCOPREF" ]] ; then
55 | 			dcop $DCOPREF setLabel "<pre>$I</pre><pre>`$BIN --alg-name=$A`</pre>" 2>/dev/null
56 | 		fi
57 | 		for ITER in `seq 1 7` ; do
58 | 			$BIN --xml-stats $A $I \
59 | 				>$OUTDIR/timings_`basename ${I}`_${A}_${ITER}.xml
60 | 			if [[ $? -ne 0 ]] ; then
61 | 				echo "WARNING: failure with input=$I alg=$A"
62 | 			fi
63 | 			if [[ -n "$DCOPREF" ]] ; then PROGRESS=`dcop $DCOPREF progress` ; fi
64 | 			if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setProgress $((PROGRESS+1)) 2>/dev/null ; fi
65 | 		done
66 | 	done
67 | done
68 | 


--------------------------------------------------------------------------------
/report/collect-memusage-statistics:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ################################################################################
 3 | # Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to
 7 | # deal in the Software without restriction, including without limitation the
 8 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 9 | # sell copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 | # IN THE SOFTWARE.
22 | ################################################################################
23 | function die() {
24 | 	echo "ERROR: $1"
25 | 	exit 1
26 | }
27 | ################################################################################
28 | if [[ -z $BIN  ]] ; then BIN=./sortstring ; fi
29 | if [[ ! -x $BIN ]] ; then die "Sorry, binary not executable" ; fi
30 | if [[ -z $ALGS ]] ; then ALGS=`$BIN --alg-nums` ; fi
31 | if [[ -z $INFILES ]] ; then INFILES="input/url3 input/nodup3 input/genome3" ; fi
32 | if [[ -z $OUTDIR ]] ; then OUTDIR="data" ; fi
33 | mkdir -p $OUTDIR
34 | if [[ -z $TOOLOUTDIR ]] ; then TOOLOUTDIR="tool-output"; fi
35 | mkdir -p $TOOLOUTDIR
36 | for I in $INFILES ; do
37 |         if [[ ! -r $I ]] ; then die "Sorry, ''$I'' not readable" ; fi
38 | done
39 | ################################################################################
40 | let ALGCOUNT=0; for A in $ALGS ; do let ++ALGCOUNT; done
41 | let FILECOUNT=0; for F in $INFILES ; do let ++FILECOUNT; done
42 | ################################################################################
43 | echo "ALGS=$ALGS"
44 | echo "INFILES=$INFILES"
45 | echo "Starting measurements ..."
46 | KDIALOG=`which kdialog 2>/dev/null`
47 | if [[ -x "$KDIALOG" ]] ; then
48 | 	# dont spawn kdialog in a subshell
49 | 	kdialog --caption "memusage" --progressbar "-" $(($FILECOUNT*$ALGCOUNT)) 2>/dev/null >.tmp
50 | 	DCOPREF=`cat .tmp`
51 | 	rm .tmp
52 | fi
53 | if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setAutoClose 1 ; fi
54 | for FILE in $INFILES ; do
55 | 	echo "Input file $FILE ..."
56 | 	if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setLabel "<pre>$FILE</pre><pre>Baseline calculation ...</pre>" ; fi
57 | 	# Find out baseline values by running with non-existent algorithm. We
58 | 	# can then find out how much memory the actual algorithm requires and
59 | 	# how many malloc calls it makes.
60 | 	TOOLOUT=$TOOLOUTDIR/memusage_`basename ${FILE}`_0
61 | 	memusage $BIN 0 $FILE >/dev/null 2>$TOOLOUT
62 | 	for ALG in $ALGS ; do
63 | 		if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setLabel "<pre>$FILE</pre><pre>`$BIN --alg-name=$ALG`</pre>" ; fi
64 | 		TOOLOUT=$TOOLOUTDIR/memusage_`basename ${FILE}`_${ALG}
65 | 		memusage $BIN $ALG $FILE >/dev/null 2>$TOOLOUT
66 | 		if [[ $? -ne 0 ]] ; then echo "WARNING: failure with alg=$ALG input=$FILE" ; fi
67 | 		if [[ -n "$DCOPREF" ]] ; then PROGRESS=`dcop $DCOPREF progress` ; fi
68 | 		if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setProgress $((PROGRESS+1)) ; fi
69 | 	done
70 | done
71 | 


--------------------------------------------------------------------------------
/report/collect-oprofile-statistics:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | # Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to
  7 | # deal in the Software without restriction, including without limitation the
  8 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  9 | # sell copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 21 | # IN THE SOFTWARE.
 22 | ################################################################################
 23 | if [[ `id -u` != 0 ]] ; then
 24 | 	echo "Sorry, profiling requires root priviledges"
 25 | 	exit 1
 26 | fi
 27 | ################################################################################
 28 | function die() {
 29 | 	echo "ERROR: $1"
 30 | 	exit 1
 31 | }
 32 | ################################################################################
 33 | if [[ -z $EVENTS ]] ; then
 34 | 	EVENTS="CPU_CLK_UNHALTED:100000
 35 | 		INST_RETIRED:100000
 36 | 		DTLB_MISSES:10000
 37 | 		L1D_REPL:10000
 38 | 		L2_LINES_IN:10000
 39 | 		LOAD_BLOCK:10000:0x02"
 40 | fi
 41 | ################################################################################
 42 | if [[ -z $BIN  ]] ; then BIN=./sortstring ; fi
 43 | if [[ ! -x $BIN ]] ; then die "Sorry, binary not executable" ; fi
 44 | if [[ -z $ALGS ]] ; then ALGS=`$BIN --alg-nums` ; fi
 45 | if [[ -z $INFILES ]] ; then INFILES="input/url3 input/nodup3 input/genome3" ; fi
 46 | if [[ -z $OUTDIR ]] ; then OUTDIR="data" ; fi
 47 | mkdir -p $OUTDIR
 48 | if [[ -z $TOOLOUTDIR ]] ; then TOOLOUTDIR="tool-output"; fi
 49 | for I in $INFILES ; do
 50 |         if [[ ! -r "$I" ]] ; then die "Sorry, ''$I'' not readable" ; fi
 51 | done
 52 | # Oprofile requires absolute path.
 53 | OPSESSIONDIR=$PWD/$TOOLOUTDIR/oprofile-session-dir
 54 | mkdir -p $OPSESSIONDIR
 55 | ################################################################################
 56 | # 1. event to monitor
 57 | # 2. algorithm
 58 | # 3. input file name
 59 | # 4. oprofile XML output file name
 60 | ################################################################################
 61 | function round() {
 62 | 	echo "  Profiling algorithm ''$2'', input ''$3'', event ''$1'' ..."
 63 | 	local SESSIONNAME=`basename "$3"`_${2}_`echo $1 | sed 's/:/_/g'`
 64 | 	if [[ -e "$OPSESSIONDIR/samples/$SESSIONNAME" ]] ; then
 65 | 		rm -rf "$OPSESSIONDIR/samples/$SESSIONNAME"
 66 | 	fi
 67 | 	opcontrol --session-dir=$OPSESSIONDIR --reset >>.op_log 2>&1 || die "Could not reset OProfile"
 68 | 	# Nuke old configuration file. Only (?) way to _really_ clean up old
 69 | 	# settings.
 70 | 	rm -f /root/.oprofile/daemonrc
 71 | 	opcontrol --session-dir=$OPSESSIONDIR --image=$BIN --separate=kernel --vmlinux=/usr/lib/debug/lib/modules/`uname -r`/vmlinux --event=$1 >>.op_log 2>&1 || die "Could not setup OProfile correctly"
 72 | 	$BIN --oprofile $2 "$3" >>.op_log 2>&1
 73 | 	if [[ $? -ne 0 ]] ; then echo "WARNING: failure was reported" ; fi
 74 | 	opcontrol --session-dir=$OPSESSIONDIR --shutdown >>.op_log 2>&1 || die "Could not shutdown OProfile correctly"
 75 | 	opcontrol --session-dir=$OPSESSIONDIR --save=$SESSIONNAME
 76 | 	if [[ $? -ne 0 ]] ; then die "Failed in --save"; fi
 77 | }
 78 | ################################################################################
 79 | echo "ALGS=$ALGS"
 80 | echo "EVENTS=$EVENTS"
 81 | echo "INFILES=$INFILES"
 82 | if [[ -e $OPSESSIONDIR/lock ]] ; then
 83 | 	echo "WARNING: oprofile lock file exists!"
 84 | 	KILLPID=`cat $OPSESSIONDIR/lock`
 85 | 	echo "killing PID $KILLPID"
 86 | 	kill $KILLPID
 87 | 	sleep 1
 88 | fi
 89 | # kill old daemons
 90 | ps -C oprofiled >/dev/null 2>&1
 91 | if [[ $? -ne 1 ]] ; then
 92 | 	echo "WARNING: oprofiled alive, killing!"
 93 | 	killall oprofiled
 94 | fi
 95 | echo "Starting measurements ..."
 96 | for FILE in $INFILES ; do
 97 | 	echo "Input file $FILE ..."
 98 | 	for ALG in $ALGS ; do
 99 | 		for EVENT in $EVENTS ; do
100 | 			round $EVENT $ALG "$FILE" $XMLOUT
101 | 		done
102 | 	done
103 | done
104 | 


--------------------------------------------------------------------------------
/report/generate-algs-xml:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | OUT=algs.xml
 3 | if [[ -e $OUT ]] ; then
 4 | 	rm $OUT
 5 | fi
 6 | echo "<algs>" >> $OUT
 7 | for ALGNUM in `./sortstring --alg-nums` ; do
 8 | 	# replace normal spaces with non-breaking space &#x00a0;
 9 | 	# replace normal hyphens with non-breaking hyphens &#x2011;
10 | 	ALGNAME="`./sortstring --alg-name=$ALGNUM | sed 's/ /\\&#x00a0;/g' | sed 's/-/\\&#x2011;/g'`"
11 | 	echo "    <alg algnum=\"$ALGNUM\" algname=\"$ALGNAME\"/>" >> $OUT
12 | done
13 | echo "</algs>" >> $OUT
14 | echo "Generated ''$OUT''."
15 | 


--------------------------------------------------------------------------------
/report/oprofile-simplify.xsl:
--------------------------------------------------------------------------------
 1 | <!-- Simplifies the XML file produced by OProfile by giving one line that
 2 |      contains only the event name, mask and total count.
 3 | -->
 4 | <stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
 5 | 	<output method="text" omit-xml-declaration="yes" indent="no"/>
 6 | 	<strip-space elements="*"/>
 7 | 	<template name="newline"><text>
 8 | </text></template>
 9 | 	<template match="/profile/binary/count">
10 | 		<text>&lt;event name="</text>
11 | 		<value-of select="/profile/setup/eventsetup/@eventname"/>
12 | 		<text>" mask="</text>
13 | 		<value-of select="/profile/setup/eventsetup/@unitmask"/>
14 | 		<text>" value="</text>
15 | 		<value-of select=". * /profile/setup/eventsetup/@setupcount"/>
16 | 		<text>"/&gt;</text>
17 | 		<call-template name="newline"/>
18 | 	</template>
19 | 	<!-- override implicit behaviour -->
20 | 	<template match="text()|@*"/>
21 | </stylesheet>
22 | 


--------------------------------------------------------------------------------
/report/process-memusage-statistics:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ################################################################################
 3 | # Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | # of this software and associated documentation files (the "Software"), to
 7 | # deal in the Software without restriction, including without limitation the
 8 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 9 | # sell copies of the Software, and to permit persons to whom the Software is
10 | # furnished to do so, subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in
13 | # all copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 | # IN THE SOFTWARE.
22 | ################################################################################
23 | #
24 | # Postprocesses the outputs from the tools we have used to gather data to HTML.
25 | #
26 | ################################################################################
27 | if [[ -z $BIN  ]] ; then BIN=./sortstring ; fi
28 | if [[ ! -x $BIN ]] ; then die "Sorry, binary not executable" ; fi
29 | if [[ -z $ALGS ]] ; then ALGS=`$BIN --alg-nums` ; fi
30 | if [[ -z $INFILES ]] ; then INFILES="input/url3 input/nodup3 input/genome3" ; fi
31 | if [[ -z $OUTDIR ]] ; then OUTDIR="data" ; fi
32 | mkdir -p $OUTDIR
33 | if [[ -z $TOOLOUTDIR ]] ; then TOOLOUTDIR="tool-output"; fi
34 | for I in $INFILES ; do
35 |         if [[ ! -r $I ]] ; then die "Sorry, ''$I'' not readable" ; fi
36 | done
37 | ################################################################################
38 | echo "ALGS=$ALGS"
39 | echo "INFILES=$INFILES"
40 | echo "Processing XML ..."
41 | for FILE in $INFILES ; do
42 | 	TOOLOUT=$TOOLOUTDIR/memusage_`basename ${FILE}`_0
43 | 	BLHEAPTOTAL=`grep "heap total" $TOOLOUT | sed 's/.*heap total: \([0-9]*\),.*/\1/'`
44 | 	BLHEAPPEAK=`grep "heap peak" $TOOLOUT   | sed 's/.*heap peak: \([0-9]*\),.*/\1/'`
45 | 	BLSTACKPEAK=`grep "stack peak" $TOOLOUT | sed 's/.*stack peak: \([0-9]*\)/\1/'`
46 | 	BLCALLSMALLOC=`grep "malloc" $TOOLOUT   | awk '{print \$3}'`
47 | 	BLCALLSREALLOC=`grep "realloc" $TOOLOUT | awk '{print \$2}'`
48 | 	BLCALLSCALLOC=`grep "calloc" $TOOLOUT   | awk '{print \$3}'`
49 | 	#echo "BLHEAPTOTAL=$BLHEAPTOTAL"
50 | 	#echo "BLHEAPPEAK=$BLHEAPPEAK"
51 | 	#echo "BLSTACKPEAK=$BLSTACKPEAK"
52 | 	#echo "BLCALLSMALLOC=$BLCALLSMALLOC"
53 | 	#echo "BLCALLSREALLOC=$BLCALLSREALLOC"
54 | 	#echo "BLCALLSCALLOC=$BLCALLSCALLOC"
55 | 	for ALG in $ALGS ; do
56 | 		XMLOUT=$OUTDIR/memusage_`basename ${FILE}`_${ALG}.xml
57 | 		TOOLOUT=$TOOLOUTDIR/memusage_`basename ${FILE}`_${ALG}
58 | 		HEAPTOTAL=`grep "heap total" $TOOLOUT | sed 's/.*heap total: \([0-9]*\),.*/\1/'`
59 | 		HEAPPEAK=`grep "heap peak" $TOOLOUT   | sed 's/.*heap peak: \([0-9]*\),.*/\1/'`
60 | 		STACKPEAK=`grep "stack peak" $TOOLOUT | sed 's/.*stack peak: \([0-9]*\)/\1/'`
61 | 		CALLSMALLOC=`grep "malloc" $TOOLOUT   | awk '{print \$3}'`
62 | 		CALLSREALLOC=`grep "realloc" $TOOLOUT | awk '{print \$2}'`
63 | 		CALLSCALLOC=`grep "calloc" $TOOLOUT   | awk '{print \$3}'`
64 | 		HEAPTOTAL=$(($HEAPTOTAL-$BLHEAPTOTAL))
65 | 		HEAPPEAK=$(($HEAPPEAK-$BLHEAPPEAK))
66 | 		STACKPEAK=$(($STACKPEAK-$BLSTACKPEAK))
67 | 		CALLSMALLOC=$(($CALLSMALLOC-$BLCALLSMALLOC))
68 | 		CALLSREALLOC=$(($CALLSREALLOC-$BLCALLSREALLOC))
69 | 		CALLSCALLOC=$(($CALLSCALLOC-$BLCALLSCALLOC))
70 | 		if [[ $HEAPTOTAL -lt 0 ]]    ; then HEAPTOTAL=0                                      ; fi
71 | 		if [[ $HEAPPEAK -lt 0 ]]     ; then echo "WARNING: heap-peak negative, alg=$ALG"     ; fi
72 | 		if [[ $STACKPEAK -lt 0 ]]    ; then echo "WARNING: stack-peak negative, alg=$ALG"    ; fi
73 | 		if [[ $CALLSMALLOC -lt 0 ]]  ; then echo "WARNING: malloc-calls negative, alg=$ALG"  ; fi
74 | 		if [[ $CALLSREALLOC -lt 0 ]] ; then echo "WARNING: realloc-calls negative, alg=$ALG" ; fi
75 | 		if [[ $CALLSCALLOC -lt 0 ]]  ; then echo "WARNING: calloc-calls negative, alg=$ALG"  ; fi
76 | 		echo "<memusage>" > $XMLOUT || die "Could not create $XMLOUT"
77 | 		echo "  <event heap-total=\"$HEAPTOTAL\" heap-peak=\"$HEAPPEAK\" stack-peak=\"$STACKPEAK\" calls-malloc=\"$CALLSMALLOC\" calls-realloc=\"$CALLSREALLOC\" calls-calloc=\"$CALLSCALLOC\"/>" >> $XMLOUT
78 | 		echo "</memusage>" >> $XMLOUT
79 | 	done
80 | done
81 | echo "Processing HTML ..."
82 | for FILE in $INFILES ; do
83 | 	for ALG in $ALGS ; do
84 | 		HTMLOUT=$OUTDIR/memusage_`basename ${FILE}`_${ALG}.html
85 | 		TOOLOUT=$TOOLOUTDIR/memusage_`basename ${FILE}`_${ALG}
86 | 		echo "<html><body><pre>" > $HTMLOUT
87 | 		# Rip ASCII color codes for now, maybe convert to equivalent CSS colors?
88 | 		sed 's/\x1b\[[0-9][0-9]*;*[0-9]*m//g' $TOOLOUT >> $HTMLOUT
89 | 		# Append baseline results
90 | 		echo "</pre><hr/>Baseline:<pre>" >> $HTMLOUT
91 | 		sed 's/\x1b\[[0-9][0-9]*;*[0-9]*m//g' `echo $TOOLOUT | sed 's/_[0-9]*\$//'`_0 >> $HTMLOUT
92 | 		echo "</pre></body></html>" >> $HTMLOUT
93 | 	done
94 | done
95 | 


--------------------------------------------------------------------------------
/report/process-oprofile-statistics:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | ################################################################################
  3 | # Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to
  7 | # deal in the Software without restriction, including without limitation the
  8 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  9 | # sell copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in
 13 | # all copies or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 20 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 21 | # IN THE SOFTWARE.
 22 | ################################################################################
 23 | function die() {
 24 | 	echo "ERROR: $1"
 25 | 	exit 1
 26 | }
 27 | ################################################################################
 28 | if [[ -z $EVENTS ]] ; then
 29 | 	EVENTS="CPU_CLK_UNHALTED:100000
 30 | 		INST_RETIRED:100000
 31 | 		DTLB_MISSES:10000
 32 | 		L1D_REPL:10000
 33 | 		L2_LINES_IN:10000
 34 | 		LOAD_BLOCK:10000:0x02"
 35 | fi
 36 | ################################################################################
 37 | if [[ -z $BIN  ]] ; then BIN=./sortstring ; fi
 38 | if [[ ! -x $BIN ]] ; then die "Sorry, binary not executable" ; fi
 39 | if [[ -z $ALGS ]] ; then ALGS=`$BIN --alg-nums` ; fi
 40 | if [[ -z $INFILES ]] ; then INFILES="input/url3 input/nodup3 input/genome3" ; fi
 41 | if [[ -z $OUTDIR ]] ; then OUTDIR="data" ; fi
 42 | mkdir -p $OUTDIR
 43 | if [[ -z $TOOLOUTDIR ]] ; then TOOLOUTDIR="tool-output"; fi
 44 | for I in $INFILES ; do
 45 |         if [[ ! -r "$I" ]] ; then die "Sorry, ''$I'' not readable" ; fi
 46 | done
 47 | # Oprofile requires absolute path.
 48 | OPSESSIONDIR=$PWD/$TOOLOUTDIR/oprofile-session-dir
 49 | ################################################################################
 50 | let ALGCOUNT=0; for A in $ALGS ; do let ++ALGCOUNT; done
 51 | let FILECOUNT=0; for F in $INFILES ; do let ++FILECOUNT; done
 52 | let EVENTCOUNT=0; for E in $EVENTS ; do let ++EVENTCOUNT; done
 53 | ################################################################################
 54 | echo "ALGS=$ALGS"
 55 | echo "EVENTS=$EVENTS"
 56 | echo "INFILES=$INFILES"
 57 | echo "Processing XML ..."
 58 | KDIALOG=`which kdialog 2>/dev/null`
 59 | if [[ -x "$KDIALOG" ]] ; then
 60 | 	# dont spawn kdialog in a subshell
 61 | 	kdialog --caption "Process OProfile data" --progressbar "-" $((2*$FILECOUNT*$ALGCOUNT*$EVENTCOUNT)) 2>/dev/null >.tmp
 62 | 	DCOPREF=`cat .tmp`
 63 | 	rm .tmp
 64 | fi
 65 | if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setAutoClose 1 ; fi
 66 | for FILE in $INFILES ; do
 67 | 	echo "   Input file $FILE ..."
 68 | 	BFILE=`basename "$FILE"`
 69 | 	for ALG in $ALGS ; do
 70 | 		XMLOUT="$OUTDIR/oprofile_${BFILE}_${ALG}.xml"
 71 | 		echo "<simple>" > $XMLOUT || die "Could not create $XMLOUT"
 72 | 		for EVENT in $EVENTS ; do
 73 | 			if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setLabel "<pre>Generating XML ...</pre><pre>$FILE</pre><pre>`$BIN --alg-name=$ALG`</pre><pre>$EVENT</pre>" ; fi
 74 | 			NICE_EVENT="`echo $EVENT | sed 's/:/_/g'`"
 75 | 			SESSIONNAME="${BFILE}_${ALG}_${NICE_EVENT}"
 76 | 			opreport --session-dir=$OPSESSIONDIR --xml \
 77 | 				session:$SESSIONNAME $BIN >.report 2>/dev/null
 78 | 			if [[ $? -ne 0 ]] ; then
 79 | 				echo "WARNING: session $SESSIONNAME failed"
 80 | 			else
 81 | 				xsltproc oprofile-simplify.xsl .report >> $XMLOUT
 82 | 			fi
 83 | 			rm -f .report
 84 | 			if [[ -n "$DCOPREF" ]] ; then PROGRESS=`dcop $DCOPREF progress` ; fi
 85 | 			if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setProgress $((PROGRESS+1)) ; fi
 86 | 		done
 87 | 		echo "</simple>" >> $XMLOUT
 88 | 	done
 89 | done
 90 | echo "Processing HTML ..."
 91 | for FILE in $INFILES ; do
 92 | 	echo "   Input file $FILE ..."
 93 | 	BFILE=`basename "$FILE"`
 94 | 	for ALG in $ALGS ; do
 95 | 		for EVENT in $EVENTS ; do
 96 | 			if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setLabel "<pre>Generating HTML ...</pre><pre>$FILE</pre><pre>`$BIN --alg-name=$ALG`</pre><pre>$EVENT</pre>" ; fi
 97 | 			# convert : -> _
 98 | 			NICE_EVENT="`echo $EVENT | tr : _`"
 99 | 			SESSIONNAME="${BFILE}_${ALG}_${NICE_EVENT}"
100 | 			# strip event count for HTML file name:
101 | 			#   STORE_BLOCK:10000:0x02  -> STORE_BLOCK_0x02
102 | 			#   DTLB_MISSES:10000       -> DTLB_MISSES
103 | 			HTML_EVENT="`echo $EVENT | awk -F: '{if (\$3) print \$1 "_" \$3; else print \$1;}'`"
104 | 			HTMLOUT="$OUTDIR/opannotate_${BFILE}_${ALG}_${HTML_EVENT}.html"
105 | 			opannotate --session-dir=$OPSESSIONDIR \
106 | 				session:$SESSIONNAME --demangle smart \
107 | 				--image-path=/lib/modules/`uname -r`/kernel \
108 | 				--source --threshold 2 >> .report
109 | 			ST=$?
110 | 			if [[ $ST -eq 0 ]] ; then
111 | 				if [[ -x "./cpp2html" ]] ; then
112 | 					./cpp2html .report > $HTMLOUT
113 | 				else
114 | 					echo "<html><body><pre>" > $HTMLOUT
115 | 					cat .report >> $HTMLOUT
116 | 					echo "</pre></body></html>" >> $HTMLOUT
117 | 				fi
118 | 			fi
119 | 			rm -f .report
120 | 			if [[ -n "$DCOPREF" ]] ; then PROGRESS=`dcop $DCOPREF progress` ; fi
121 | 			if [[ -n "$DCOPREF" ]] ; then dcop $DCOPREF setProgress $((PROGRESS+1)) ; fi
122 | 		done
123 | 	done
124 | done
125 | 


--------------------------------------------------------------------------------
/report/sortable.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rantala/string-sorting/56c7d3ff1abcfe253d714992c841ce17e311d567/report/sortable.js


--------------------------------------------------------------------------------
/src/losertree.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /* The loser tree is defined in:
 24 |  *
 25 |  *     Donald Knuth: The Art of Computer Programming,
 26 |  *              Volume III: Sorting and Searching, 1973,
 27 |  *              section 5.4.1, page 253
 28 |  *
 29 |  * It is used to implement multi-way merging.
 30 |  */
 31 | 
 32 | /* Example with 8 streams:
 33 |  *
 34 |  * _nodes:
 35 |  *   Each node contains an index to the _streams array. The winner (smallest
 36 |  *   item) is stored in position 0. Other nodes contain the loser of each
 37 |  *   comparison.
 38 |  *
 39 |  *                   <0>
 40 |  *                    |
 41 |  *                   <1>
 42 |  *                  /   \
 43 |  *                 /     \
 44 |  *                /       \
 45 |  *              <2>       <3>
 46 |  *              / \       / \
 47 |  *             /   \     /   \
 48 |  *           <4>   <5> <6>   <7>
 49 |  *
 50 |  * _streams:
 51 |  *   0:(T*,n), 1:(T*,n), ..., 7:(T*,n)
 52 |  *
 53 |  * Both structures contain exactly 2^k items. Empty streams are inserted if
 54 |  * required.
 55 |  */
 56 | 
 57 | #ifndef LOSERTREE_H
 58 | #define LOSERTREE_H
 59 | 
 60 | #include "util/debug.h"
 61 | #include <cassert>
 62 | #include <cstdlib>
 63 | #include <cstring>
 64 | #include <algorithm>
 65 | 
 66 | static inline unsigned log2(unsigned n)
 67 | { return 8*sizeof(unsigned)-1-__builtin_clz(n); }
 68 | 
 69 | template <typename T>
 70 | struct loser_tree
 71 | {
 72 | 	typedef struct { T* stream; size_t n; } Stream;
 73 | 	unsigned* restrict _nodes;
 74 | 	Stream* restrict _streams;
 75 | 	unsigned _nonempty_streams;
 76 | 	const unsigned _stream_offset;
 77 | 
 78 | 	template <typename Iterator>
 79 | 	loser_tree(Iterator begin, Iterator end)
 80 | 		: _nodes(0), _streams(0), _nonempty_streams(end-begin),
 81 | 		  _stream_offset(1 << (log2(_nonempty_streams-1)+1))
 82 | 	{
 83 | 		assert(_nonempty_streams>1);
 84 | 		void* raw = malloc(_stream_offset*sizeof(unsigned) +
 85 | 		                   _stream_offset*sizeof(Stream));
 86 | 		_nodes = static_cast<unsigned*>(raw);
 87 | 		_streams = reinterpret_cast<Stream*>(
 88 | 				static_cast<char*>(raw) +
 89 | 					_stream_offset*sizeof(unsigned));
 90 | 		for (unsigned i=0; i < _nonempty_streams; ++i) {
 91 | 			_streams[i].stream = begin[i].first;
 92 | 			_streams[i].n      = begin[i].second;
 93 | 		}
 94 | 		(void) memset(_streams+_nonempty_streams, 0,
 95 | 			(_stream_offset-_nonempty_streams)*sizeof(Stream));
 96 | 		_nodes[0] = init_min(1);
 97 | 		//debug()<<*this;
 98 | 	}
 99 | 
100 | 	~loser_tree()
101 | 	{
102 | 		assert(_nodes); assert(_streams);
103 | 		free(static_cast<void*>(_nodes));
104 | 	}
105 | 
106 | 	Stream& node2stream(unsigned pos)
107 | 	{
108 | 		assert(pos < _stream_offset);
109 | 		assert(_nodes[pos] < _stream_offset);
110 | 		return _streams[_nodes[pos]];
111 | 	}
112 | 
113 | 	bool stream_empty(const Stream& pos) const { return pos.n == size_t(0); }
114 | 
115 | 	unsigned init_min(unsigned root)
116 | 	{
117 | 		if (root >= _stream_offset) { return root-_stream_offset; }
118 | 		//debug() << __PRETTY_FUNCTION__ << " root="<<root<<"\n";
119 | 		const unsigned l = init_min(root << 1);
120 | 		const unsigned r = init_min((root << 1) + 1);
121 | 		if (stream_empty(_streams[r])) {
122 | 			_nodes[root] = r;
123 | 			return l;
124 | 		}
125 | 		if (stream_empty(_streams[l])) {
126 | 			_nodes[root] = l;
127 | 			return r;
128 | 		}
129 | 		if (cmp(*(_streams[l].stream), *(_streams[r].stream)) <= 0) {
130 | 			_nodes[root] = r;
131 | 			return l;
132 | 		}
133 | 		_nodes[root] = l;
134 | 		return r;
135 | 	}
136 | 
137 | 	bool empty() const { return _nonempty_streams == 0; }
138 | 
139 | 	void update()
140 | 	{
141 | 		//debug() << __PRETTY_FUNCTION__ << std::endl;
142 | 		unsigned new_min = _nodes[0];
143 | 		for (unsigned i=(_stream_offset+new_min) >> 1; i!=0; i >>= 1) {
144 | 			if (stream_empty(_streams[new_min]) or
145 | 			    (not stream_empty(node2stream(i)) and
146 | 			     cmp(*node2stream(i).stream,
147 | 			         *_streams[new_min].stream) < 0)) {
148 | 				std::swap(new_min, _nodes[i]);
149 | 			}
150 | 		}
151 | 		_nodes[0] = new_min;
152 | 	}
153 | 
154 | 	T min()
155 | 	{
156 | 		//debug() << __PRETTY_FUNCTION__ << std::endl;
157 | 		assert(_nonempty_streams);
158 | 		assert(not stream_empty(node2stream(0)));
159 | 		T ret = *(node2stream(0).stream++);
160 | 		if (--node2stream(0).n == size_t(0)) { --_nonempty_streams; }
161 | 		update();
162 | 		//debug() << "\t -> " << ret << std::endl;
163 | 		return ret;
164 | 	}
165 | };
166 | 
167 | #ifndef NDEBUG
168 | #include <ostream>
169 | template <typename T>
170 | std::ostream& operator<<(std::ostream& strm, const loser_tree<T>& tree)
171 | {
172 | 	strm<<"/-------------------\n";
173 | 	for(unsigned i=0;i<tree._stream_offset;++i){
174 | 		if(i==1)strm<<"--------------------\n";
175 | 		strm<<i<<": "<<tree._nodes[i]<<"\n";
176 | 	}
177 | 	strm<<"--------------------\n";
178 | 	for(unsigned i=0;i<tree._stream_offset;++i)
179 | 		strm<<i<<": "<<tree._streams[i].stream
180 | 		    <<", n="<<tree._streams[i].n<<"\n";
181 | 	strm<<"-------------------/\n";
182 | 	return strm;
183 | }
184 | #endif //NDEBUG
185 | #endif //LOSERTREE_H
186 | 


--------------------------------------------------------------------------------
/src/mergesort_losertree.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /* Implements a multi-way mergesort based on the loser tree.
 24 |  */
 25 | 
 26 | #include "routine.h"
 27 | #include "util/debug.h"
 28 | #include <cassert>
 29 | #include <cstring>
 30 | #include <array>
 31 | 
 32 | static inline int
 33 | cmp(const unsigned char* a, const unsigned char* b)
 34 | {
 35 | 	assert(a != 0);
 36 | 	assert(b != 0);
 37 | 	return strcmp(reinterpret_cast<const char*>(a),
 38 | 	              reinterpret_cast<const char*>(b));
 39 | }
 40 | 
 41 | #include "losertree.h"
 42 | 
 43 | void mergesort_4way(unsigned char**, size_t, unsigned char**);
 44 | 
 45 | template <unsigned K>
 46 | static void
 47 | mergesort_losertree(unsigned char** strings, size_t n, unsigned char** tmp)
 48 | {
 49 | 	if (n < 0x10000) {
 50 | 		mergesort_4way(strings, n, tmp);
 51 | 		return;
 52 | 	}
 53 | 	debug() << __func__ << "(), n="<<n<<"\n";
 54 | 	const size_t split = size_t(double(n) / double(K));
 55 | 	std::array<std::pair<unsigned char**, size_t>, K> ranges;
 56 | 	for (unsigned i=0; i < K-1; ++i) {
 57 | 		ranges[i] = std::make_pair(strings+i*split, split);
 58 | 	}
 59 | 	ranges[K-1] = std::make_pair(strings+(K-1)*split, n-(K-1)*split);
 60 | 	for (unsigned i=0; i < K; ++i) {
 61 | 		mergesort_losertree<K>(ranges[i].first, ranges[i].second,
 62 | 				tmp+(ranges[i].first-strings));
 63 | 	}
 64 | 	unsigned char** result = tmp;
 65 | 	loser_tree<unsigned char*> tree(ranges.begin(), ranges.end());
 66 | 	while (tree._nonempty_streams) { *result++ = tree.min(); }
 67 | 	(void) memcpy(strings, tmp, n*sizeof(unsigned char*));
 68 | }
 69 | 
 70 | void mergesort_losertree_64way(unsigned char** strings, size_t n)
 71 | {
 72 | 	unsigned char** tmp = static_cast<unsigned char**>(
 73 | 			malloc(n*sizeof(unsigned char*)));
 74 | 	mergesort_losertree<64>(strings, n, tmp);
 75 | 	free(tmp);
 76 | }
 77 | void mergesort_losertree_128way(unsigned char** strings, size_t n)
 78 | {
 79 | 	unsigned char** tmp = static_cast<unsigned char**>(
 80 | 			malloc(n*sizeof(unsigned char*)));
 81 | 	mergesort_losertree<128>(strings, n, tmp);
 82 | 	free(tmp);
 83 | }
 84 | void mergesort_losertree_256way(unsigned char** strings, size_t n)
 85 | {
 86 | 	unsigned char** tmp = static_cast<unsigned char**>(
 87 | 			malloc(n*sizeof(unsigned char*)));
 88 | 	mergesort_losertree<256>(strings, n, tmp);
 89 | 	free(tmp);
 90 | }
 91 | void mergesort_losertree_512way(unsigned char** strings, size_t n)
 92 | {
 93 | 	unsigned char** tmp = static_cast<unsigned char**>(
 94 | 			malloc(n*sizeof(unsigned char*)));
 95 | 	mergesort_losertree<512>(strings, n, tmp);
 96 | 	free(tmp);
 97 | }
 98 | void mergesort_losertree_1024way(unsigned char** strings, size_t n)
 99 | {
100 | 	unsigned char** tmp = static_cast<unsigned char**>(
101 | 			malloc(n*sizeof(unsigned char*)));
102 | 	mergesort_losertree<1024>(strings, n, tmp);
103 | 	free(tmp);
104 | }
105 | 
106 | ROUTINE_REGISTER_SINGLECORE(mergesort_losertree_64way,
107 | 		"64way loser tree based mergesort")
108 | ROUTINE_REGISTER_SINGLECORE(mergesort_losertree_128way,
109 | 		"128way loser tree based mergesort")
110 | ROUTINE_REGISTER_SINGLECORE(mergesort_losertree_256way,
111 | 		"256way loser tree based mergesort")
112 | ROUTINE_REGISTER_SINGLECORE(mergesort_losertree_512way,
113 | 		"512way loser tree based mergesort")
114 | ROUTINE_REGISTER_SINGLECORE(mergesort_losertree_1024way,
115 | 		"1024way loser tree based mergesort")
116 | 
117 | void mergesort_4way_parallel(unsigned char**, size_t, unsigned char**);
118 | 
119 | template <unsigned K>
120 | static void
121 | mergesort_losertree_parallel(unsigned char** strings, size_t n, unsigned char** tmp)
122 | {
123 | 	if (n < 0x10000) {
124 | 		mergesort_4way_parallel(strings, n, tmp);
125 | 		return;
126 | 	}
127 | 	debug() << __func__ << "(), n="<<n<<"\n";
128 | 	const size_t split = size_t(double(n) / double(K));
129 | 	std::array<std::pair<unsigned char**, size_t>, K> ranges;
130 | 	for (unsigned i=0; i < K-1; ++i) {
131 | 		ranges[i] = std::make_pair(strings+i*split, split);
132 | 	}
133 | 	ranges[K-1] = std::make_pair(strings+(K-1)*split, n-(K-1)*split);
134 | #pragma omp parallel for
135 | 	for (unsigned i=0; i < K; ++i) {
136 | 		mergesort_losertree_parallel<K>(ranges[i].first, ranges[i].second,
137 | 				tmp+(ranges[i].first-strings));
138 | 	}
139 | 	unsigned char** result = tmp;
140 | 	loser_tree<unsigned char*> tree(ranges.begin(), ranges.end());
141 | 	while (tree._nonempty_streams) { *result++ = tree.min(); }
142 | 	(void) memcpy(strings, tmp, n*sizeof(unsigned char*));
143 | }
144 | 
145 | void mergesort_losertree_64way_parallel(unsigned char** strings, size_t n)
146 | {
147 | 	unsigned char** tmp = static_cast<unsigned char**>(
148 | 			malloc(n*sizeof(unsigned char*)));
149 | 	mergesort_losertree_parallel<64>(strings, n, tmp);
150 | 	free(tmp);
151 | }
152 | void mergesort_losertree_128way_parallel(unsigned char** strings, size_t n)
153 | {
154 | 	unsigned char** tmp = static_cast<unsigned char**>(
155 | 			malloc(n*sizeof(unsigned char*)));
156 | 	mergesort_losertree_parallel<128>(strings, n, tmp);
157 | 	free(tmp);
158 | }
159 | void mergesort_losertree_256way_parallel(unsigned char** strings, size_t n)
160 | {
161 | 	unsigned char** tmp = static_cast<unsigned char**>(
162 | 			malloc(n*sizeof(unsigned char*)));
163 | 	mergesort_losertree_parallel<256>(strings, n, tmp);
164 | 	free(tmp);
165 | }
166 | void mergesort_losertree_512way_parallel(unsigned char** strings, size_t n)
167 | {
168 | 	unsigned char** tmp = static_cast<unsigned char**>(
169 | 			malloc(n*sizeof(unsigned char*)));
170 | 	mergesort_losertree_parallel<512>(strings, n, tmp);
171 | 	free(tmp);
172 | }
173 | void mergesort_losertree_1024way_parallel(unsigned char** strings, size_t n)
174 | {
175 | 	unsigned char** tmp = static_cast<unsigned char**>(
176 | 			malloc(n*sizeof(unsigned char*)));
177 | 	mergesort_losertree_parallel<1024>(strings, n, tmp);
178 | 	free(tmp);
179 | }
180 | 
181 | ROUTINE_REGISTER_MULTICORE(mergesort_losertree_64way_parallel,
182 | 		"Parallel 64way loser tree based mergesort")
183 | ROUTINE_REGISTER_MULTICORE(mergesort_losertree_128way_parallel,
184 | 		"Parallel 128way loser tree based mergesort")
185 | ROUTINE_REGISTER_MULTICORE(mergesort_losertree_256way_parallel,
186 | 		"Parallel 256way loser tree based mergesort")
187 | ROUTINE_REGISTER_MULTICORE(mergesort_losertree_512way_parallel,
188 | 		"Parallel 512way loser tree based mergesort")
189 | ROUTINE_REGISTER_MULTICORE(mergesort_losertree_1024way_parallel,
190 | 		"Parallel 1024way loser tree based mergesort")
191 | 


--------------------------------------------------------------------------------
/src/msd_a.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2007-2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * msd_A() is an implementation of the MSD radix sort that maintains a manual
 25 |  * cache of a few bytes from each string to be sorted. This cache is updated
 26 |  * along the execution of the algorithm.
 27 |  *
 28 |  * The idea can be found in the CRadix algorithm by Ng and Kakehi.
 29 |  *    http://dx.doi.org/10.1093/ietfec/e90-a.2.457
 30 |  */
 31 | 
 32 | #include "routine.h"
 33 | #include <stdlib.h>
 34 | #include <string.h>
 35 | #include <stdio.h>
 36 | #include <unistd.h>
 37 | #include <inttypes.h>
 38 | 
 39 | #define CACHED_BYTES 4
 40 | 
 41 | typedef struct {
 42 | 	unsigned char bytes[CACHED_BYTES];
 43 | 	unsigned char* ptr;
 44 | } cacheblock_t;
 45 | 
 46 | static inline void
 47 | inssort_cache(cacheblock_t* cache, int n, size_t depth)
 48 | {
 49 | 	cacheblock_t *pi, *pj;
 50 | 	unsigned char *s, *t;
 51 | 	for (pi = cache + 1; --n > 0; ++pi) {
 52 | 		unsigned char* tmp = pi->ptr;
 53 | 		for (pj = pi; pj > cache; --pj) {
 54 | 			t = tmp + depth;
 55 | 			for (s=(pj-1)->ptr+depth; *s==*t && *s!=0; ++s, ++t)
 56 | 				;
 57 | 			if (*s <= *t)
 58 | 				break;
 59 | 			pj->ptr = (pj-1)->ptr;
 60 | 		}
 61 | 		pj->ptr = tmp;
 62 | 	}
 63 | }
 64 | 
 65 | static void
 66 | fill_cache(cacheblock_t* cache, size_t N, size_t depth)
 67 | {
 68 | 	for (size_t i=0; i < N; ++i) {
 69 | 		unsigned int j=0;
 70 | 		while (j < CACHED_BYTES && cache[i].ptr[depth+j]) {
 71 | 			cache[i].bytes[j] = cache[i].ptr[depth+j];
 72 | 			++j;
 73 | 		}
 74 | 		while (j < CACHED_BYTES) {
 75 | 			cache[i].bytes[j] = 0;
 76 | 			++j;
 77 | 		}
 78 | 	}
 79 | }
 80 | 
 81 | static void
 82 | msd_A(cacheblock_t* cache, size_t N, size_t cache_depth, size_t true_depth)
 83 | {
 84 | 	if (N < 32) {
 85 | 		inssort_cache(cache, N, true_depth);
 86 | 		return;
 87 | 	}
 88 | 	if (cache_depth >= CACHED_BYTES) {
 89 | 		fill_cache(cache, N, true_depth);
 90 | 		cache_depth = 0;
 91 | 	}
 92 | 	size_t bucketsize[256] = {0};
 93 | 	for (size_t i=0; i < N; ++i)
 94 | 		++bucketsize[cache[i].bytes[cache_depth]];
 95 | 	cacheblock_t* sorted = (cacheblock_t*)
 96 | 		malloc(N*sizeof(cacheblock_t));
 97 | 	static size_t bucketindex[256];
 98 | 	bucketindex[0] = 0;
 99 | 	for (unsigned i=1; i < 256; ++i)
100 | 		bucketindex[i] = bucketindex[i-1] + bucketsize[i-1];
101 | 	for (size_t i=0; i < N; ++i)
102 | 		memcpy(&sorted[bucketindex[cache[i].bytes[cache_depth]]++],
103 | 				cache+i, sizeof(cacheblock_t));
104 | 	memcpy(cache, sorted, N*sizeof(cacheblock_t));
105 | 	free(sorted);
106 | 	size_t bsum = bucketsize[0];
107 | 	for (unsigned i=1; i < 256; ++i) {
108 | 		if (bucketsize[i] == 0) continue;
109 | 		msd_A(cache+bsum, bucketsize[i],
110 | 			  cache_depth+1, true_depth+1);
111 | 		bsum += bucketsize[i];
112 | 	}
113 | }
114 | 
115 | static void
116 | msd_A_adaptive(cacheblock_t* cache,
117 |                size_t N,
118 |                size_t cache_depth,
119 |                size_t true_depth)
120 | {
121 | 	if (N < 0x10000) {
122 | 		msd_A(cache, N, cache_depth, true_depth);
123 | 		return;
124 | 	}
125 | 	if (cache_depth >= CACHED_BYTES) {
126 | 		fill_cache(cache, N, true_depth);
127 | 		cache_depth = 0;
128 | 	}
129 | 	size_t* bucketsize = (size_t*) calloc(0x10000, sizeof(size_t));
130 | 	for (size_t i=0; i < N; ++i) {
131 | 		uint16_t bucket =
132 | 			(cache[i].bytes[cache_depth] << 8) |
133 | 			cache[i].bytes[cache_depth+1];
134 | 		++bucketsize[bucket];
135 | 	}
136 | 	cacheblock_t* sorted = (cacheblock_t*)
137 | 		malloc(N*sizeof(cacheblock_t));
138 | 	static size_t bucketindex[0x10000];
139 | 	bucketindex[0] = 0;
140 | 	for (unsigned i=1; i < 0x10000; ++i)
141 | 		bucketindex[i] = bucketindex[i-1] + bucketsize[i-1];
142 | 	for (size_t i=0; i < N; ++i) {
143 | 		uint16_t bucket = (cache[i].bytes[cache_depth] << 8)
144 | 			| cache[i].bytes[cache_depth+1];
145 | 		memcpy(&sorted[bucketindex[bucket]++],
146 | 				cache+i, sizeof(cacheblock_t));
147 | 	}
148 | 	memcpy(cache, sorted, N*sizeof(cacheblock_t));
149 | 	free(sorted);
150 | 	size_t bsum = bucketsize[0];
151 | 	for (unsigned i=1; i < 0x10000; ++i) {
152 | 		if (bucketsize[i] == 0) continue;
153 | 		if (i & 0xFF) msd_A_adaptive(cache+bsum, bucketsize[i],
154 | 				cache_depth+2, true_depth+2);
155 | 		bsum += bucketsize[i];
156 | 	}
157 | 	free(bucketsize);
158 | }
159 | 
160 | void
161 | msd_A(unsigned char** strings, size_t N)
162 | {
163 | 	cacheblock_t* cache = (cacheblock_t*) malloc(N*sizeof(cacheblock_t));
164 | 	for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i];
165 | 	fill_cache(cache, N, 0);
166 | 	msd_A(cache, N, 0, 0);
167 | 	for (size_t i=0; i < N; ++i) strings[i] = cache[i].ptr;
168 | 	free(cache);
169 | }
170 | ROUTINE_REGISTER_SINGLECORE(msd_A, "msd_A")
171 | 
172 | void
173 | msd_A_adaptive(unsigned char** strings, size_t N)
174 | {
175 | 	cacheblock_t* cache = (cacheblock_t*) malloc(N*sizeof(cacheblock_t));
176 | 	for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i];
177 | 	fill_cache(cache, N, 0);
178 | 	msd_A_adaptive(cache, N, 0, 0);
179 | 	for (size_t i=0; i < N; ++i) strings[i] = cache[i].ptr;
180 | 	free(cache);
181 | }
182 | ROUTINE_REGISTER_SINGLECORE(msd_A_adaptive, "msd_A_adaptive")
183 | 


--------------------------------------------------------------------------------
/src/msd_a2.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * msd_A2 is identical to msd_A, with one exception: we now use the original
 25 |  * input array as temporary space. msd_A is memory hungry, because it uses the
 26 |  * external array distribution method.
 27 |  */
 28 | 
 29 | #include "routine.h"
 30 | #include "util/debug.h"
 31 | #include <cassert>
 32 | #include <cstdlib>
 33 | #include <cstring>
 34 | #include <inttypes.h>
 35 | #include <array>
 36 | 
 37 | #define CACHED_BYTES 4
 38 | 
 39 | typedef struct {
 40 | 	unsigned char bytes[CACHED_BYTES];
 41 | 	unsigned char* ptr;
 42 | } cacheblock_t;
 43 | 
 44 | static inline void
 45 | inssort_cache(cacheblock_t* cache, int n, size_t depth)
 46 | {
 47 | 	cacheblock_t *pi, *pj;
 48 | 	unsigned char *s, *t;
 49 | 	for (pi = cache + 1; --n > 0; ++pi) {
 50 | 		unsigned char* tmp = pi->ptr;
 51 | 		for (pj = pi; pj > cache; --pj) {
 52 | 			t = tmp + depth;
 53 | 			for (s=(pj-1)->ptr+depth; *s==*t && *s!=0; ++s, ++t)
 54 | 				;
 55 | 			if (*s <= *t)
 56 | 				break;
 57 | 			pj->ptr = (pj-1)->ptr;
 58 | 		}
 59 | 		pj->ptr = tmp;
 60 | 	}
 61 | }
 62 | 
 63 | static void
 64 | fill_cache(cacheblock_t* cache, size_t N, size_t depth)
 65 | {
 66 | 	for (size_t i=0; i < N; ++i) {
 67 | 		unsigned int j=0;
 68 | 		while (j < CACHED_BYTES && cache[i].ptr[depth+j]) {
 69 | 			cache[i].bytes[j] = cache[i].ptr[depth+j];
 70 | 			++j;
 71 | 		}
 72 | 		while (j < CACHED_BYTES) {
 73 | 			cache[i].bytes[j] = 0;
 74 | 			++j;
 75 | 		}
 76 | 	}
 77 | }
 78 | 
 79 | struct TempSpace
 80 | {
 81 | 	cacheblock_t* strings;
 82 | 	cacheblock_t* allocated;
 83 | 	size_t elements_in_strings;
 84 | 	TempSpace(unsigned char** strs, size_t n)
 85 | 		: strings(0), allocated(0), elements_in_strings(0)
 86 | 	{
 87 | 		debug()<<__PRETTY_FUNCTION__<<"\n";
 88 | 		char* raw = reinterpret_cast<char*>(strs);
 89 | 		size_t rawbytes = n*sizeof(unsigned char*);
 90 | 		if (std::ptrdiff_t(raw) % sizeof(cacheblock_t)) {
 91 | 			unsigned diff = std::ptrdiff_t(raw) % sizeof(cacheblock_t);
 92 | 			debug()<<"\t: alignment mismatch by "<<diff<<"bytes\n";
 93 | 			raw      += diff;
 94 | 			rawbytes -= diff;
 95 | 		}
 96 | 		if (rawbytes % sizeof(cacheblock_t)) {
 97 | 			unsigned diff = rawbytes % sizeof(cacheblock_t);
 98 | 			debug()<<"\t: truncate by "<<diff<<"bytes\n";
 99 | 			rawbytes -= diff;
100 | 		}
101 | 		strings = reinterpret_cast<cacheblock_t*>(raw);
102 | 		elements_in_strings = rawbytes / sizeof(cacheblock_t);
103 | 	}
104 | 	cacheblock_t& operator[](size_t index)
105 | 	{
106 | 		if (index < elements_in_strings) {
107 | 			return strings[index];
108 | 		} else {
109 | 			assert(allocated);
110 | 			return allocated[index-elements_in_strings];
111 | 		}
112 | 	}
113 | 	void allocate(size_t elems)
114 | 	{
115 | 		assert(allocated==0);
116 | 		if (elems > elements_in_strings) {
117 | 			allocated = static_cast<cacheblock_t*>(
118 | 					malloc((elems-elements_in_strings) *
119 | 						sizeof(cacheblock_t)));
120 | 		}
121 | 	}
122 | 	void deallocate()
123 | 	{
124 | 		if (allocated) {
125 | 			::free(allocated);
126 | 			allocated = 0;
127 | 		}
128 | 	}
129 | };
130 | 
131 | static inline void
132 | copy(TempSpace& src, cacheblock_t* dst, size_t n)
133 | {
134 | 	if (n > src.elements_in_strings) {
135 | 		(void) memcpy(dst, src.strings,
136 | 			src.elements_in_strings*sizeof(cacheblock_t));
137 | 		(void) memcpy(dst+src.elements_in_strings, src.allocated,
138 | 			(n-src.elements_in_strings)*sizeof(cacheblock_t));
139 | 	} else {
140 | 		(void) memcpy(dst, src.strings, n*sizeof(cacheblock_t));
141 | 	}
142 | }
143 | 
144 | static void
145 | msd_A2(cacheblock_t* cache,
146 |        size_t N,
147 |        size_t cache_depth,
148 |        size_t true_depth,
149 |        TempSpace& tmp)
150 | {
151 | 	if (N < 32) {
152 | 		inssort_cache(cache, N, true_depth);
153 | 		return;
154 | 	}
155 | 	if (cache_depth >= CACHED_BYTES) {
156 | 		fill_cache(cache, N, true_depth);
157 | 		cache_depth = 0;
158 | 	}
159 | 	std::array<size_t, 256> bucketsize;
160 | 	bucketsize.fill(0);
161 | 	for (size_t i=0; i < N; ++i)
162 | 		++bucketsize[cache[i].bytes[cache_depth]];
163 | 	tmp.allocate(N);
164 | 	static std::array<size_t, 256> bucketindex;
165 | 	bucketindex[0] = 0;
166 | 	for (unsigned i=1; i < 256; ++i)
167 | 		bucketindex[i] = bucketindex[i-1] + bucketsize[i-1];
168 | 	for (size_t i=0; i < N; ++i)
169 | 		tmp[bucketindex[cache[i].bytes[cache_depth]]++] = cache[i];
170 | 	copy(tmp, cache, N);
171 | 	tmp.deallocate();
172 | 	size_t bsum = bucketsize[0];
173 | 	for (unsigned i=1; i < 256; ++i) {
174 | 		if (bucketsize[i] == 0) continue;
175 | 		msd_A2(cache+bsum, bucketsize[i],
176 | 			  cache_depth+1, true_depth+1, tmp);
177 | 		bsum += bucketsize[i];
178 | 	}
179 | }
180 | 
181 | static void
182 | msd_A2_adaptive(cacheblock_t* cache,
183 |                 size_t N,
184 |                 size_t cache_depth,
185 |                 size_t true_depth,
186 |                 TempSpace& tmp)
187 | {
188 | 	if (N < 0x10000) {
189 | 		msd_A2(cache, N, cache_depth, true_depth, tmp);
190 | 		return;
191 | 	}
192 | 	if (cache_depth >= CACHED_BYTES) {
193 | 		fill_cache(cache, N, true_depth);
194 | 		cache_depth = 0;
195 | 	}
196 | 	tmp.allocate(N);
197 | 	size_t* bucketsize = static_cast<size_t*>(calloc(0x10000,
198 | 				sizeof(size_t)));
199 | 	for (size_t i=0; i < N; ++i) {
200 | 		uint16_t bucket =
201 | 			(cache[i].bytes[cache_depth] << 8) |
202 | 			 cache[i].bytes[cache_depth+1];
203 | 		++bucketsize[bucket];
204 | 	}
205 | 	static std::array<size_t, 0x10000> bucketindex;
206 | 	bucketindex[0] = 0;
207 | 	for (unsigned i=1; i < 0x10000; ++i)
208 | 		bucketindex[i] = bucketindex[i-1] + bucketsize[i-1];
209 | 	for (size_t i=0; i < N; ++i) {
210 | 		uint16_t bucket = (cache[i].bytes[cache_depth] << 8)
211 | 			| cache[i].bytes[cache_depth+1];
212 | 		tmp[bucketindex[bucket]++] = cache[i];
213 | 	}
214 | 	copy(tmp, cache, N);
215 | 	tmp.deallocate();
216 | 	size_t bsum = bucketsize[0];
217 | 	for (unsigned i=1; i < 0x10000; ++i) {
218 | 		if (bucketsize[i] == 0) continue;
219 | 		if (i & 0xFF) msd_A2_adaptive(cache+bsum, bucketsize[i],
220 | 				cache_depth+2, true_depth+2, tmp);
221 | 		bsum += bucketsize[i];
222 | 	}
223 | 	free(bucketsize);
224 | }
225 | 
226 | void
227 | msd_A2(unsigned char** strings, size_t N)
228 | {
229 | 	cacheblock_t* cache =
230 | 		static_cast<cacheblock_t*>(malloc(N*sizeof(cacheblock_t)));
231 | 	for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i];
232 | 	TempSpace tmp(strings, N);
233 | 	fill_cache(cache, N, 0);
234 | 	msd_A2(cache, N, 0, 0, tmp);
235 | 	for (size_t i=0; i < N; ++i) strings[i] = cache[i].ptr;
236 | 	free(cache);
237 | }
238 | ROUTINE_REGISTER_SINGLECORE(msd_A2, "msd_A2")
239 | 
240 | void
241 | msd_A2_adaptive(unsigned char** strings, size_t N)
242 | {
243 | 	cacheblock_t* cache =
244 | 		static_cast<cacheblock_t*>(malloc(N*sizeof(cacheblock_t)));
245 | 	for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i];
246 | 	TempSpace tmp(strings, N);
247 | 	fill_cache(cache, N, 0);
248 | 	msd_A2_adaptive(cache, N, 0, 0, tmp);
249 | 	for (size_t i=0; i < N; ++i) strings[i] = cache[i].ptr;
250 | 	free(cache);
251 | }
252 | ROUTINE_REGISTER_SINGLECORE(msd_A2_adaptive, "msd_A2_adaptive")
253 | 


--------------------------------------------------------------------------------
/src/msd_ci.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2007-2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /* 
 24 |  * msd_ci() is an implementation of the MSD radix sort using
 25 |  *  - double sweep counting sort
 26 |  *  - O(n) oracle to reduce cache misses and memory stalls
 27 |  *  - the in-place distribution method described by McIlroy, Bostic & McIlroy
 28 |  *
 29 |  * The adaptive variant msd_ci_adaptive() uses superalphabet when size of the
 30 |  * subinput is large, as described by Andersson & Nilsson.
 31 |  */
 32 | 
 33 | #include "routine.h"
 34 | #include "util/insertion_sort.h"
 35 | #include "util/get_char.h"
 36 | #include <cstddef>
 37 | #include <cstdlib>
 38 | #include <sys/types.h>
 39 | #include <iostream>
 40 | #include <limits>
 41 | 
 42 | template <typename BucketType>
 43 | struct distblock {
 44 | 	unsigned char* ptr;
 45 | 	BucketType bucket;
 46 | };
 47 | 
 48 | template <typename BucketsizeType>
 49 | static void
 50 | msd_ci(unsigned char** strings, size_t n, size_t depth)
 51 | {
 52 | 	if (n < 32) {
 53 | 		insertion_sort(strings, n, depth);
 54 | 		return;
 55 | 	}
 56 | 	BucketsizeType bucketsize[256] = {0};
 57 | 	unsigned char* restrict oracle =
 58 | 		(unsigned char*) malloc(n);
 59 | 	for (size_t i=0; i < n; ++i)
 60 | 		oracle[i] = strings[i][depth];
 61 | 	for (size_t i=0; i < n; ++i)
 62 | 		++bucketsize[oracle[i]];
 63 | 	static ssize_t bucketindex[256];
 64 | 	bucketindex[0] = bucketsize[0];
 65 | 	BucketsizeType last_bucket_size = bucketsize[0];
 66 | 	for (unsigned i=1; i < 256; ++i) {
 67 | 		bucketindex[i] = bucketindex[i-1] + bucketsize[i];
 68 | 		if (bucketsize[i]) last_bucket_size = bucketsize[i];
 69 | 	}
 70 | 	for (size_t i=0; i < n-last_bucket_size; ) {
 71 | 		distblock<uint8_t> tmp = { strings[i], oracle[i] };
 72 | 		while (1) {
 73 | 			// Continue until the current bucket is completely in
 74 | 			// place
 75 | 			if (--bucketindex[tmp.bucket] <= ssize_t(i))
 76 | 				break;
 77 | 			// backup all information of the position we are about
 78 | 			// to overwrite
 79 | 			size_t backup_idx = bucketindex[tmp.bucket];
 80 | 			distblock<uint8_t> tmp2 = { strings[backup_idx], oracle[backup_idx] };
 81 | 			// overwrite everything, ie. move the string to correct
 82 | 			// position
 83 | 			strings[backup_idx] = tmp.ptr;
 84 | 			oracle[backup_idx]  = tmp.bucket;
 85 | 			tmp = tmp2;
 86 | 		}
 87 | 		// Commit last pointer to place. We don't need to copy the
 88 | 		// oracle entry, it's not read after this.
 89 | 		strings[i] = tmp.ptr;
 90 | 		i += bucketsize[tmp.bucket];
 91 | 	}
 92 | 	free(oracle);
 93 | 	size_t bsum = bucketsize[0];
 94 | 	for (size_t i=1; i < 256; ++i) {
 95 | 		if (bucketsize[i] == 0) continue;
 96 | 		msd_ci<BucketsizeType>(strings+bsum, bucketsize[i], depth+1);
 97 | 		bsum += bucketsize[i];
 98 | 	}
 99 | }
100 | 
101 | static void
102 | msd_ci_adaptive(unsigned char** strings, size_t n, size_t depth)
103 | {
104 | 	if (n < 0x10000) {
105 | 		msd_ci<uint16_t>(strings, n, depth);
106 | 		return;
107 | 	}
108 | 	uint16_t* restrict oracle =
109 | 		(uint16_t*) malloc(n*sizeof(uint16_t));
110 | 	for (size_t i=0; i < n; ++i)
111 | 		oracle[i] = get_char<uint16_t>(strings[i], depth);
112 | 	size_t* restrict bucketsize = (size_t*)
113 | 		calloc(0x10000, sizeof(size_t));
114 | 	for (size_t i=0; i < n; ++i)
115 | 		++bucketsize[oracle[i]];
116 | 	static ssize_t bucketindex[0x10000];
117 | 	bucketindex[0] = bucketsize[0];
118 | 	size_t last_bucket_size = bucketsize[0];
119 | 	for (unsigned i=1; i < 0x10000; ++i) {
120 | 		bucketindex[i] = bucketindex[i-1] + bucketsize[i];
121 | 		if (bucketsize[i]) last_bucket_size = bucketsize[i];
122 | 	}
123 | 	for (size_t i=0; i < n-last_bucket_size; ) {
124 | 		distblock<uint16_t> tmp = { strings[i], oracle[i] };
125 | 		while (1) {
126 | 			// Continue until the current bucket is completely in
127 | 			// place
128 | 			if (--bucketindex[tmp.bucket] <= ssize_t(i))
129 | 				break;
130 | 			// backup all information of the position we are about
131 | 			// to overwrite
132 | 			size_t backup_idx = bucketindex[tmp.bucket];
133 | 			distblock<uint16_t> tmp2 = { strings[backup_idx], oracle[backup_idx] };
134 | 			// overwrite everything, ie. move the string to correct
135 | 			// position
136 | 			strings[backup_idx] = tmp.ptr;
137 | 			oracle[backup_idx]  = tmp.bucket;
138 | 			tmp = tmp2;
139 | 		}
140 | 		// Commit last pointer to place. We don't need to copy the
141 | 		// oracle entry, it's not read after this.
142 | 		strings[i] = tmp.ptr;
143 | 		i += bucketsize[tmp.bucket];
144 | 	}
145 | 	free(oracle);
146 | 	size_t bsum = bucketsize[0];
147 | 	for (size_t i=1; i < 0x10000; ++i) {
148 | 		if (bucketsize[i] == 0) continue;
149 | 		if (i & 0xFF) msd_ci_adaptive(strings+bsum,
150 | 				bucketsize[i], depth+2);
151 | 		bsum += bucketsize[i];
152 | 	}
153 | 	free(bucketsize);
154 | }
155 | 
156 | void msd_ci(unsigned char** strings, size_t n)
157 | {
158 | 	if (n > size_t(std::numeric_limits<ssize_t>::max())) {
159 | 		std::cerr << "ERROR: "
160 | 			<< __func__ << "(): too many input strings: "
161 | 			<< n << " > " << std::numeric_limits<ssize_t>::max()
162 | 			<< std::endl;
163 | 		abort();
164 | 	}
165 | 	msd_ci<size_t>(strings, n, 0);
166 | }
167 | void msd_ci_adaptive(unsigned char** strings, size_t n)
168 | {
169 | 	if (n > size_t(std::numeric_limits<ssize_t>::max())) {
170 | 		std::cerr << "ERROR: "
171 | 			<< __func__ << "(): too many input strings: "
172 | 			<< n << " > " << std::numeric_limits<ssize_t>::max()
173 | 			<< std::endl;
174 | 		abort();
175 | 	}
176 | 	msd_ci_adaptive(strings, n, 0);
177 | }
178 | 
179 | ROUTINE_REGISTER_SINGLECORE(msd_ci, "msd_CI")
180 | ROUTINE_REGISTER_SINGLECORE(msd_ci_adaptive, "msd_CI: adaptive")
181 | 


--------------------------------------------------------------------------------
/src/msd_dyn_block.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2007-2008,2012 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * msd_DB() is an implementation of the MSD radix sort
 25 |  *  - uses dynamic bucket sizes, ie. just one sweep over data
 26 |  *  - uses the almost in place distribution method described in:
 27 |  *
 28 |  *        @article{1217858,
 29 |  *           author = {Juha K\"{a}rkk\"{a}inen and Peter Sanders and Stefan Burkhardt},
 30 |  *           title = {Linear work suffix array construction},
 31 |  *           journal = {J. ACM},
 32 |  *           volume = {53},
 33 |  *           number = {6},
 34 |  *           year = {2006},
 35 |  *           issn = {0004-5411},
 36 |  *           pages = {918--936},
 37 |  *           doi = {http://doi.acm.org/10.1145/1217856.1217858},
 38 |  *           publisher = {ACM},
 39 |  *           address = {New York, NY, USA},
 40 |  *        }
 41 |  *
 42 |  *    See appendix B.
 43 |  *
 44 |  *  - The idea is to save some memory when the size of the subinput is large.
 45 |  *    For small n, we switch to more efficient MSD radix sort variants.
 46 |  */
 47 | 
 48 | #include "routine.h"
 49 | #include <cstddef>
 50 | #include <vector>
 51 | #include <list>
 52 | #include <algorithm>
 53 | #include <iostream>
 54 | //#include <deque>
 55 | #include <array>
 56 | #include <cassert>
 57 | #include <cstring>
 58 | 
 59 | void msd_CE2_16bit(unsigned char** strings, size_t n, size_t depth);
 60 | 
 61 | typedef unsigned char** Block;
 62 | //typedef std::deque<Block> FreeBlocks;
 63 | typedef std::list<Block> FreeBlocks;
 64 | typedef std::list<Block> Bucket;
 65 | typedef std::array<Bucket, 256> Buckets;
 66 | typedef std::vector<Block*> BackLinks;
 67 | 
 68 | static inline Block
 69 | take_free_block(FreeBlocks& fb)
 70 | {
 71 | 	assert(not fb.empty());
 72 | 	Block b(fb.front());
 73 | 	fb.pop_front();
 74 | 	return b;
 75 | }
 76 | 
 77 | template <unsigned B>
 78 | static void
 79 | msd_D(unsigned char** strings, size_t n, size_t depth)
 80 | {
 81 | 	if (n < 0x10000) {
 82 | 		msd_CE2_16bit(strings, n, depth);
 83 | 		return;
 84 | 	}
 85 | 	assert(n > B);
 86 | 	static Buckets buckets;
 87 | 	static std::array<unsigned char*, (256+6)*B> temp_space;
 88 | 	static FreeBlocks freeblocks;
 89 | 	BackLinks backlinks(n/B+1);
 90 | 	std::array<size_t, 256> bucketsize;
 91 | 
 92 | 	for (unsigned i=0; i < 256; ++i) {
 93 | 		bucketsize[i] = 0;
 94 | 		buckets[i].clear();
 95 | 	}
 96 | 
 97 | 	// Initialize our list of free blocks.
 98 | 	for (size_t i=0; i < 256+6; ++i)
 99 | 		freeblocks.push_back(&temp_space[i*B]);
100 | 	for (size_t i=0; i < n-B; i+=B)
101 | 		freeblocks.push_back(&strings[i]);
102 | 
103 | 	// Distribute strings to buckets. Use a small cache to reduce memory
104 | 	// stalls. The exact size of the cache is not very important.
105 | 	size_t i=0;
106 | 	for (; i < n-n%32; i+=32) {
107 | 		unsigned char cache[32];
108 | 		for (unsigned j=0; j < 32; ++j) {
109 | 			cache[j] = strings[i+j][depth];
110 | 		}
111 | 		for (unsigned j=0; j < 32; ++j) {
112 | 			const unsigned char c = cache[j];
113 | 			if (bucketsize[c] % B == 0) {
114 | 				Block b = take_free_block(freeblocks);
115 | 				buckets[c].push_back(b);
116 | 				// Backlinks must be set for those blocks, that
117 | 				// use the original string array space.
118 | 				if (b >= strings && b < strings+n) {
119 | 					backlinks[(b-strings)/B] = &(buckets[c].back());
120 | 				}
121 | 			}
122 | 			assert(not buckets[c].empty());
123 | 			buckets[c].back()[bucketsize[c] % B] = strings[i+j];
124 | 			++bucketsize[c];
125 | 		}
126 | 	}
127 | 	for (; i < n; ++i) {
128 | 		const unsigned char c = strings[i][depth];
129 | 		if (bucketsize[c] % B == 0) {
130 | 			Block b = take_free_block(freeblocks);
131 | 			buckets[c].push_back(b);
132 | 			if (b >= strings && b < strings+n) {
133 | 				backlinks[(b-strings)/B] = &(buckets[c].back());
134 | 			}
135 | 		}
136 | 		assert(not buckets[c].empty());
137 | 		buckets[c].back()[bucketsize[c] % B] = strings[i];
138 | 		++bucketsize[c];
139 | 	}
140 | 
141 | 	// Process each bucket, and copy all strings in that bucket to proper
142 | 	// place in the original string pointer array. This means that those
143 | 	// positions that are occupied by other blocks must be moved to free
144 | 	// space etc.
145 | 	size_t pos = 0;
146 | 	for (unsigned i=0; i < 256; ++i) {
147 | 		if (bucketsize[i] == 0) continue;
148 | 
149 | 		Bucket::const_iterator it = buckets[i].begin();
150 | 		for (size_t bucket_pos=0; bucket_pos < bucketsize[i]; ++it, bucket_pos+=B) {
151 | 			const size_t block_items = std::min(size_t(B), bucketsize[i]-bucket_pos);
152 | 			const size_t block_overlap = (pos+block_items-1)/B;
153 | 
154 | 			if (*it == (strings+pos)) {
155 | 				assert(pos%B==0);
156 | 				backlinks[pos/B] = 0;
157 | 				pos += block_items;
158 | 				continue;
159 | 			}
160 | 
161 | 			// Don't overwrite the block in the position we are
162 | 			// about to write to, but copy it into safety.
163 | 			if (backlinks[block_overlap]) {
164 | 				// Take a free block. The block can be 'stale',
165 | 				// i.e. it can point to positions we have
166 | 				// already copied new strings into. Take free
167 | 				// blocks until we have non-stale block.
168 | 				Block tmp = take_free_block(freeblocks);
169 | 				while (tmp >= strings && tmp < strings+pos)
170 | 					tmp = take_free_block(freeblocks);
171 | 				if (tmp >= strings && tmp < strings+n) {
172 | 					assert(backlinks[(tmp-strings)/B]==0);
173 | 					backlinks[(tmp-strings)/B] = backlinks[block_overlap];
174 | 				}
175 | 				memcpy(tmp, *(backlinks[block_overlap]), B*sizeof(unsigned char*));
176 | 				*(backlinks[block_overlap]) = tmp;
177 | 				backlinks[block_overlap] = 0;
178 | 			}
179 | 
180 | 			if (*it >= strings && *it < strings+n) {
181 | 				assert(*it > strings+pos);
182 | 				backlinks[(*it-strings)/B] = 0;
183 | 			}
184 | 
185 | 			// Copy string pointers to correct position.
186 | 			memcpy(strings+pos, *it, block_items*sizeof(unsigned char*));
187 | 
188 | 			// Return block for later use. Favor those in the
189 | 			// temporary space.
190 | 			if (*it >= strings && *it < strings+n) {
191 | 				freeblocks.push_back(*it);
192 | 			} else {
193 | 				freeblocks.push_front(*it);
194 | 			}
195 | 
196 | 			pos += block_items;
197 | 		}
198 | 	}
199 | 	freeblocks.clear();
200 | 	backlinks.clear();
201 | 	pos = bucketsize[0];
202 | 	for (unsigned i=1; i < 256; ++i) {
203 | 		if (bucketsize[i] == 0) continue;
204 | 		msd_D<B>(strings+pos, bucketsize[i], depth+1);
205 | 		pos += bucketsize[i];
206 | 	}
207 | }
208 | 
209 | void msd_DB(unsigned char** strings, size_t n)
210 | { msd_D<1024>(strings, n, 0); }
211 | ROUTINE_REGISTER_SINGLECORE(msd_DB, "msd_DB")
212 | 


--------------------------------------------------------------------------------
/src/msd_dyn_vector.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2007-2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * This file contains several variants of MSD radix sort that uses dynamic
 25 |  * buckets instead of first making the extra sweep over the input to calculate
 26 |  * each size.
 27 |  *
 28 |  * Because strings can be expensive to access (indirect addressing, cache
 29 |  * misses, memory stalls), these variants are actually rather efficient.
 30 |  *
 31 |  * There are several variants that use different choice of the actual dynamic
 32 |  * memory structure. For each implementation, there is also an adaptive
 33 |  * version, that uses two byte superalphabet when the size of the subinput is
 34 |  * large, and normal alphabet otherwise.
 35 |  */
 36 | 
 37 | #include "routine.h"
 38 | #include "util/insertion_sort.h"
 39 | #include "util/get_char.h"
 40 | #include <cstring>
 41 | #include <cstddef>
 42 | #include <vector>
 43 | #include <list>
 44 | #include <deque>
 45 | #include <algorithm>
 46 | #include "vector_realloc.h"
 47 | #include "vector_malloc.h"
 48 | #include "vector_block.h"
 49 | #include "vector_brodnik.h"
 50 | #include "vector_bagwell.h"
 51 | #include <array>
 52 | 
 53 | // std::list::size() is O(n), so keep track of size manually.
 54 | template <typename T>
 55 | class counting_list : public std::list<T>
 56 | {
 57 | public:
 58 | 	counting_list() : _size(0) {}
 59 | 
 60 | 	void push_back(const T& x)
 61 | 	{ 
 62 | 		++_size;
 63 | 		std::list<T>::push_back(x);
 64 | 	}
 65 | 
 66 | 	size_t size() const
 67 | 	{
 68 | 		return _size;
 69 | 	}
 70 | 
 71 | 	void clear()
 72 | 	{
 73 | 		_size = 0;
 74 | 		std::list<T>::clear();
 75 | 	}
 76 | 
 77 | private:
 78 | 	size_t _size;
 79 | };
 80 | 
 81 | template <typename BucketT, typename OutputIterator>
 82 | static inline void
 83 | copy(const BucketT& bucket, OutputIterator dst)
 84 | {
 85 | 	std::copy(bucket.begin(), bucket.end(), dst);
 86 | }
 87 | 
 88 | template <typename Bucket, typename BucketsizeType>
 89 | static void
 90 | msd_D(unsigned char** strings, size_t n, size_t depth, Bucket* buckets)
 91 | {
 92 | 	if (n < 32) {
 93 | 		insertion_sort(strings, n, depth);
 94 | 		return;
 95 | 	}
 96 | 	// Use a small cache to reduce memory stalls.
 97 | 	size_t i=0;
 98 | 	for (; i < n-n%32; i+=32) {
 99 | 		unsigned char cache[32];
100 | 		for (unsigned j=0; j < 32; ++j) {
101 | 			cache[j] = strings[i+j][depth];
102 | 		}
103 | 		for (unsigned j=0; j < 32; ++j) {
104 | 			buckets[cache[j]].push_back(strings[i+j]);
105 | 		}
106 | 	}
107 | 	for (; i < n; ++i) {
108 | 		buckets[strings[i][depth]].push_back(strings[i]);
109 | 	}
110 | 	std::array<BucketsizeType, 256> bucketsize;
111 | 	for (unsigned i=0; i < 256; ++i) {
112 | 		bucketsize[i] = buckets[i].size();
113 | 	}
114 | 	size_t pos = 0;
115 | 	for (unsigned i=0; i < 256; ++i) {
116 | 		if (bucketsize[i] == 0) continue;
117 | 		copy(buckets[i], strings+pos);
118 | 		pos += bucketsize[i];
119 | 	}
120 | 	for (unsigned i=0; i < 256; ++i) {
121 | 		buckets[i].clear();
122 | 	}
123 | 	pos = bucketsize[0];
124 | 	for (unsigned i=1; i < 256; ++i) {
125 | 		if (bucketsize[i] == 0) continue;
126 | 		msd_D<Bucket, BucketsizeType>(strings+pos, bucketsize[i], depth+1, buckets);
127 | 		pos += bucketsize[i];
128 | 	}
129 | }
130 | 
131 | template <typename Bucket>
132 | static void
133 | msd_D_adaptive(unsigned char** strings, size_t n, size_t depth, Bucket* buckets)
134 | {
135 | 	if (n < 0x10000) {
136 | 		msd_D<Bucket, uint16_t>(strings, n, depth, buckets);
137 | 		return;
138 | 	}
139 | 	size_t* bucketsize = (size_t*) malloc(0x10000 * sizeof(size_t));
140 | 	size_t i=0;
141 | 	for (; i < n-n%16; i+=16) {
142 | 		uint16_t cache[16];
143 | 		for (size_t j=0; j < 16; ++j) {
144 | 			cache[j] = get_char<uint16_t>(strings[i+j], depth);
145 | 		}
146 | 		for (size_t j=0; j < 16; ++j) {
147 | 			buckets[cache[j]].push_back(strings[i+j]);
148 | 		}
149 | 	}
150 | 	for (; i < n; ++i) {
151 | 		const uint16_t ch = get_char<uint16_t>(strings[i], depth);
152 | 		buckets[ch].push_back(strings[i]);
153 | 	}
154 | 	for (unsigned i=0; i < 0x10000; ++i) {
155 | 		bucketsize[i] = buckets[i].size();
156 | 	}
157 | 	size_t pos = 0;
158 | 	for (unsigned i=0; i < 0x10000; ++i) {
159 | 		if (bucketsize[i] == 0) continue;
160 | 		copy(buckets[i], strings+pos);
161 | 		pos += bucketsize[i];
162 | 	}
163 | 	for (unsigned i=0; i < 0x10000; ++i) {
164 | 		buckets[i].clear();
165 | 	}
166 | 	pos = bucketsize[0];
167 | 	for (unsigned i=1; i < 0x10000; ++i) {
168 | 		if (bucketsize[i] == 0) continue;
169 | 		if (i & 0xFF) msd_D_adaptive(
170 | 				strings+pos, bucketsize[i],
171 | 				depth+2, buckets);
172 | 		pos += bucketsize[i];
173 | 	}
174 | 	free(bucketsize);
175 | }
176 | 
177 | #define MAKE_ALG2(name, vec)                                                   \
178 | void msd_D_##name(unsigned char** strings, size_t n)                           \
179 | {                                                                              \
180 |         vec<unsigned char*> buckets[256];                                      \
181 |         msd_D<vec<unsigned char*>, size_t>(strings, n, 0, buckets);            \
182 | }                                                                              \
183 | ROUTINE_REGISTER_SINGLECORE(msd_D_##name, "msd_D_"#name)                       \
184 | void msd_D_##name##_adaptive(unsigned char** strings, size_t n)                \
185 | {                                                                              \
186 |         vec<unsigned char*>* buckets = new vec<unsigned char*>[0x10000];       \
187 |         msd_D_adaptive(strings, n, 0, buckets);                                \
188 |         delete [] buckets;                                                     \
189 | }                                                                              \
190 | ROUTINE_REGISTER_SINGLECORE(msd_D_##name##_adaptive, "msd_D_"#name"_adaptive")
191 | 
192 | #define MAKE_ALG1(vec) MAKE_ALG2(vec, vec)
193 | 
194 | MAKE_ALG2(std_vector, std::vector)
195 | MAKE_ALG2(std_deque, std::deque)
196 | MAKE_ALG2(std_list, counting_list)
197 | MAKE_ALG1(vector_realloc)
198 | MAKE_ALG1(vector_malloc)
199 | MAKE_ALG1(vector_realloc_counter_clear)
200 | MAKE_ALG1(vector_malloc_counter_clear)
201 | MAKE_ALG1(vector_realloc_shrink_clear)
202 | MAKE_ALG1(vector_block)
203 | MAKE_ALG1(vector_brodnik)
204 | MAKE_ALG1(vector_bagwell)
205 | 


--------------------------------------------------------------------------------
/src/multikey_block.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * Implements the Multi-Key-Quicksort using the input array as temporary space.
 25 |  * See also msd_DB.
 26 |  */
 27 | 
 28 | #include "routine.h"
 29 | #include "util/insertion_sort.h"
 30 | #include "util/get_char.h"
 31 | #include "util/median.h"
 32 | #include <inttypes.h>
 33 | #include <cassert>
 34 | #include <array>
 35 | #include <list>
 36 | #include <cstring>
 37 | #include <vector>
 38 | 
 39 | template <typename CharT>
 40 | static inline unsigned
 41 | get_bucket(CharT c, CharT pivot)
 42 | {
 43 |         return ((c > pivot) << 1) | (c == pivot);
 44 | }
 45 | 
 46 | extern "C" void mkqsort(unsigned char**, int, int);
 47 | 
 48 | typedef unsigned char** Block;
 49 | //typedef std::deque<Block> FreeBlocks;
 50 | typedef std::list<Block> FreeBlocks;
 51 | typedef std::list<Block> Bucket;
 52 | typedef std::array<Bucket, 3> Buckets;
 53 | typedef std::vector<Block*> BackLinks;
 54 | 
 55 | static inline Block
 56 | take_free_block(FreeBlocks& fb)
 57 | {
 58 | 	assert(not fb.empty());
 59 | 	Block b(fb.front());
 60 | 	fb.pop_front();
 61 | 	return b;
 62 | }
 63 | 
 64 | template <unsigned B, typename CharT>
 65 | static void
 66 | multikey_block(unsigned char** strings, size_t n, size_t depth)
 67 | {
 68 | 	if (n < 10000) {
 69 | 		mkqsort(strings, n, depth);
 70 | 		return;
 71 | 	}
 72 | 	assert(n > B);
 73 | 	static Buckets buckets;
 74 | 	static std::array<unsigned char*, 32*B> temp_space;
 75 | 	static FreeBlocks freeblocks;
 76 | 	const CharT partval = pseudo_median<CharT>(strings, n, depth);
 77 | 	BackLinks backlinks(n/B+1);
 78 | 	std::array<size_t, 3> bucketsize;
 79 | 	bucketsize.fill(0);
 80 | 	buckets[0].clear();
 81 | 	buckets[1].clear();
 82 | 	buckets[2].clear();
 83 | 	// Initialize our list of free blocks.
 84 | 	assert(freeblocks.empty());
 85 | 	for (size_t i=0; i < 32; ++i)
 86 | 		freeblocks.push_back(&temp_space[i*B]);
 87 | 	for (size_t i=0; i < n-n%B; i+=B)
 88 | 		freeblocks.push_back(strings+i);
 89 | 	// Distribute strings to buckets. Use a small cache to reduce memory
 90 | 	// stalls. The exact size of the cache is not very important.
 91 | 	size_t i=0;
 92 | 	for (; i < n-n%32; i+=32) {
 93 | 		std::array<CharT, 32> cache;
 94 | 		for (unsigned j=0; j<32; ++j) {
 95 | 			cache[j] = get_char<CharT>(strings[i+j], depth);
 96 | 		}
 97 | 		for (unsigned j=0; j<32; ++j) {
 98 | 			const CharT c = cache[j];
 99 | 			const unsigned b = get_bucket(c, partval);
100 | 			if (bucketsize[b] % B == 0) {
101 | 				Block block = take_free_block(freeblocks);
102 | 				buckets[b].push_back(block);
103 | 				// Backlinks must be set for those blocks, that
104 | 				// use the original string array space.
105 | 				if (block >= strings && block < strings+n) {
106 | 					backlinks[(block-strings)/B] =
107 | 						&(buckets[b].back());
108 | 				}
109 | 			}
110 | 			assert(not buckets[b].empty());
111 | 			buckets[b].back()[bucketsize[b] % B] = strings[i+j];
112 | 			++bucketsize[b];
113 | 		}
114 | 	}
115 | 	for (; i < n; ++i) {
116 | 		const CharT c = get_char<CharT>(strings[i], depth);
117 | 		const unsigned b = get_bucket(c, partval);
118 | 		if (bucketsize[b] % B == 0) {
119 | 			Block block = take_free_block(freeblocks);
120 | 			buckets[b].push_back(block);
121 | 			// Backlinks must be set for those blocks, that
122 | 			// use the original string array space.
123 | 			if (block >= strings && block < strings+n) {
124 | 				backlinks[(block-strings)/B] =
125 | 					&(buckets[b].back());
126 | 			}
127 | 		}
128 | 		assert(not buckets[b].empty());
129 | 		buckets[b].back()[bucketsize[b] % B] = strings[i];
130 | 		++bucketsize[b];
131 | 	}
132 | 	assert(bucketsize[0]+bucketsize[1]+bucketsize[2]==n);
133 | 	// Process each bucket, and copy all strings in that bucket to proper
134 | 	// place in the original string pointer array. This means that those
135 | 	// positions that are occupied by other blocks must be moved to free
136 | 	// space etc.
137 | 	size_t pos = 0;
138 | 	for (unsigned i=0; i < 3; ++i) {
139 | 		if (bucketsize[i] == 0) continue;
140 | 		Bucket::const_iterator it = buckets[i].begin();
141 | 		for (size_t bucket_pos=0; bucket_pos < bucketsize[i]; ++it, bucket_pos+=B) {
142 | 			const size_t block_items = std::min(size_t(B), bucketsize[i]-bucket_pos);
143 | 			const size_t block_overlap = (pos+block_items-1)/B;
144 | 			if (*it == (strings+pos)) {
145 | 				// Already at correct place.
146 | 				assert(pos%B==0);
147 | 				backlinks[pos/B] = 0;
148 | 				pos += block_items;
149 | 				continue;
150 | 			}
151 | 			// Don't overwrite the block in the position we are
152 | 			// about to write to, but copy it into safety.
153 | 			if (backlinks[block_overlap]) {
154 | 				// Take a free block. The block can be 'stale',
155 | 				// i.e. it can point to positions we have
156 | 				// already copied new strings into. Take free
157 | 				// blocks until we have non-stale block.
158 | 				Block tmp = take_free_block(freeblocks);
159 | 				while (tmp >= strings && tmp < strings+pos)
160 | 					tmp = take_free_block(freeblocks);
161 | 				if (tmp >= strings && tmp < strings+n) {
162 | 					assert(backlinks[(tmp-strings)/B]==0);
163 | 					backlinks[(tmp-strings)/B] = backlinks[block_overlap];
164 | 				}
165 | 				memcpy(tmp, *(backlinks[block_overlap]), B*sizeof(unsigned char*));
166 | 				*(backlinks[block_overlap]) = tmp;
167 | 				backlinks[block_overlap] = 0;
168 | 			}
169 | 			if (*it >= strings && *it < strings+n) {
170 | 				assert(*it > strings+pos);
171 | 				backlinks[(*it-strings)/B] = 0;
172 | 			}
173 | 			// Copy string pointers to correct position.
174 | 			memcpy(strings+pos, *it, block_items*sizeof(unsigned char*));
175 | 			// Return block for later use. Favor those in the
176 | 			// temporary space.
177 | 			if (*it >= strings && *it < strings+n) {
178 | 				freeblocks.push_back(*it);
179 | 			} else {
180 | 				freeblocks.push_front(*it);
181 | 			}
182 | 			pos += block_items;
183 | 		}
184 | 	}
185 | 	freeblocks.clear();
186 | 	backlinks.clear(); BackLinks().swap(backlinks);
187 | 	multikey_block<B, CharT>(strings, bucketsize[0], depth);
188 | 	if (not is_end(partval))
189 | 		multikey_block<B, CharT>(strings+bucketsize[0], bucketsize[1],
190 | 				depth+sizeof(CharT));
191 | 	multikey_block<B, CharT>(strings+bucketsize[0]+bucketsize[1],
192 | 			bucketsize[2], depth);
193 | }
194 | 
195 | void multikey_block1(unsigned char** strings, size_t n)
196 | {
197 | 	multikey_block<1024, unsigned char>(strings, n, 0);
198 | }
199 | void multikey_block2(unsigned char** strings, size_t n)
200 | {
201 | 	multikey_block<1024, uint16_t>(strings, n, 0);
202 | }
203 | void multikey_block4(unsigned char** strings, size_t n)
204 | {
205 | 	multikey_block<1024, uint32_t>(strings, n, 0);
206 | }
207 | 
208 | ROUTINE_REGISTER_SINGLECORE(multikey_block1,
209 | 		"multikey_block with 1byte alphabet")
210 | ROUTINE_REGISTER_SINGLECORE(multikey_block2,
211 | 		"multikey_block with 2byte alphabet")
212 | ROUTINE_REGISTER_SINGLECORE(multikey_block4,
213 | 		"multikey_block with 4byte alphabet")
214 | 


--------------------------------------------------------------------------------
/src/multikey_cache.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2007-2008,2012 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * multikey_cache() implements the Multi-Key-Quicksort using a O(n) cache. The
 25 |  * idea is to reduce the number of times we access the strings via pointers,
 26 |  * and to improve the locality of access patterns.
 27 |  *
 28 |  * Ng and Kakehi give results for similar ''CMKQ'' algorithm in the CRadix
 29 |  * paper, but they mainly focus on the radix sort variant.
 30 |  */
 31 | 
 32 | #include "routine.h"
 33 | #include "util/median.h"
 34 | #include <algorithm>
 35 | 
 36 | template <unsigned CachedChars>
 37 | struct Cacheblock;
 38 | 
 39 | template <>
 40 | struct Cacheblock<4>
 41 | {
 42 | 	typedef uint32_t CacheType;
 43 | 
 44 | 	uint32_t cached_bytes;
 45 | 	unsigned char* ptr;
 46 | };
 47 | 
 48 | template <>
 49 | struct Cacheblock<8>
 50 | {
 51 | 	typedef uint64_t CacheType;
 52 | 
 53 | 	uint64_t cached_bytes;
 54 | 	unsigned char* ptr;
 55 | };
 56 | 
 57 | struct Cmp
 58 | {
 59 | 	template <unsigned CachedChars>
 60 | 	int operator()(const Cacheblock<CachedChars>& lhs,
 61 | 	               const Cacheblock<CachedChars>& rhs) const
 62 | 	{
 63 | 		if (lhs.cached_bytes > rhs.cached_bytes) return 1;
 64 | 		if (lhs.cached_bytes < rhs.cached_bytes) return -1;
 65 | 		return 0;
 66 | 	}
 67 | };
 68 | 
 69 | // Insertion sort, ignores any cached characters.
 70 | template <unsigned CachedChars>
 71 | static inline void
 72 | insertion_sort(Cacheblock<CachedChars>* cache, int n, size_t depth)
 73 | {
 74 | 	Cacheblock<CachedChars> *pi, *pj;
 75 | 	unsigned char *s, *t;
 76 | 	for (pi = cache + 1; --n > 0; ++pi) {
 77 | 		unsigned char* tmp = pi->ptr;
 78 | 		for (pj = pi; pj > cache; --pj) {
 79 | 			for (s=(pj-1)->ptr+depth, t=tmp+depth; *s==*t && *s!=0;
 80 | 					++s, ++t)
 81 | 				;
 82 | 			if (*s <= *t)
 83 | 				break;
 84 | 			pj->ptr = (pj-1)->ptr;
 85 | 		}
 86 | 		pj->ptr = tmp;
 87 | 	}
 88 | }
 89 | 
 90 | // Insertion sorts the strings only based on the cached characters.
 91 | template <unsigned CachedChars>
 92 | static inline void
 93 | inssort_cache_block(Cacheblock<CachedChars>* cache, int n)
 94 | {
 95 | 	Cacheblock<CachedChars> *pi, *pj;
 96 | 	for (pi = cache + 1; --n > 0; ++pi) {
 97 | 		Cacheblock<CachedChars> tmp = *pi;
 98 | 		for (pj = pi; pj > cache; --pj) {
 99 | 			if (Cmp()(*(pj-1), tmp) <= 0)
100 | 				break;
101 | 			*pj = *(pj-1);
102 | 		}
103 | 		*pj = tmp;
104 | 	}
105 | }
106 | 
107 | // Fill the cache, but swap the characters in such order that we may load them
108 | // as integers in little endian machines.
109 | template <unsigned CachedChars>
110 | static inline void
111 | fill_cache(Cacheblock<CachedChars>* cache, size_t N, size_t depth)
112 | {
113 | 	for (size_t i=0; i < N; ++i) {
114 | 		unsigned si=0, ci=CachedChars-1; //string index, cache index
115 | 		typename Cacheblock<CachedChars>::CacheType ch = 0;
116 | 		while (ci < CachedChars) {
117 | 			const typename Cacheblock<CachedChars>::CacheType c =
118 | 				cache[i].ptr[depth+si];
119 | 			ch |= (c << (ci*8));
120 | 			--ci; ++si;
121 | 			if (is_end(c)) break;
122 | 		}
123 | 		cache[i].cached_bytes = ch;
124 | 	}
125 | }
126 | 
127 | template <unsigned CachedChars, bool CacheDirty>
128 | static void
129 | multikey_cache(Cacheblock<CachedChars>* cache, size_t N, size_t depth)
130 | {
131 | 	if (N < 32) {
132 | 		if (N==0) return;
133 | 		if (CacheDirty) {
134 | 			insertion_sort(cache, N, depth);
135 | 			return;
136 | 		}
137 | 		inssort_cache_block(cache, N);
138 | 		size_t start=0, cnt=1;
139 | 		for (size_t i=0; i < N-1; ++i) {
140 | 			if (Cmp()(cache[i], cache[i+1]) == 0) {
141 | 				++cnt;
142 | 				continue;
143 | 			}
144 | 			if (cnt > 1 and cache[start].cached_bytes & 0xFF)
145 | 				insertion_sort(cache+start, cnt,
146 | 						depth+CachedChars);
147 | 			cnt = 1;
148 | 			start = i+1;
149 | 		}
150 | 		if (cnt > 1 and cache[start].cached_bytes & 0xFF)
151 | 			insertion_sort(cache+start, cnt, depth+CachedChars);
152 | 		return;
153 | 	}
154 | 	if (CacheDirty) {
155 | 		fill_cache(cache, N, depth);
156 | 	}
157 | 	// Move pivot to first position to avoid wrapping the unsigned values
158 | 	// we are using in the main loop from zero to max.
159 | 	std::swap(cache[0], med3char(
160 | 		med3char(cache[0],       cache[N/8],     cache[N/4],    Cmp()),
161 | 		med3char(cache[N/2-N/8], cache[N/2],     cache[N/2+N/8],Cmp()),
162 | 		med3char(cache[N-1-N/4], cache[N-1-N/8], cache[N-3],    Cmp()),
163 | 		Cmp()));
164 | 	Cacheblock<CachedChars> partval = cache[0];
165 | 	size_t first   = 1;
166 | 	size_t last    = N-1;
167 | 	size_t beg_ins = 1;
168 | 	size_t end_ins = N-1;
169 | 	while (true) {
170 | 		while (first <= last) {
171 | 			const int res = Cmp()(cache[first], partval);
172 | 			if (res > 0) {
173 | 				break;
174 | 			} else if (res == 0) {
175 | 				std::swap(cache[beg_ins++], cache[first]);
176 | 			}
177 | 			++first;
178 | 		}
179 | 		while (first <= last) {
180 | 			const int res = Cmp()(cache[last], partval);
181 | 			if (res < 0) {
182 | 				break;
183 | 			} else if (res == 0) {
184 | 				std::swap(cache[end_ins--], cache[last]);
185 | 			}
186 | 			--last;
187 | 		}
188 | 		if (first > last)
189 | 			break;
190 | 		std::swap(cache[first], cache[last]);
191 | 		++first;
192 | 		--last;
193 | 	}
194 | 	// Some calculations to make the code more readable.
195 | 	const size_t num_eq_beg = beg_ins;
196 | 	const size_t num_eq_end = N-1-end_ins;
197 | 	const size_t num_eq     = num_eq_beg+num_eq_end;
198 | 	const size_t num_lt     = first-beg_ins;
199 | 	const size_t num_gt     = end_ins-last;
200 | 	// Swap the equal pointers from the beginning to proper position.
201 | 	const size_t size1 = std::min(num_eq_beg, num_lt);
202 | 	std::swap_ranges(cache, cache+size1, cache+first-size1);
203 | 	// Swap the equal pointers from the end to proper position.
204 | 	const size_t size2 = std::min(num_eq_end, num_gt);
205 | 	std::swap_ranges(cache+first, cache+first+size2, cache+N-size2);
206 | 	// Now recurse.
207 | 	multikey_cache<CachedChars, false>(cache, num_lt, depth);
208 | 	multikey_cache<CachedChars, false>(cache+num_lt+num_eq, num_gt, depth);
209 | 	if (partval.cached_bytes & 0xFF)
210 | 		multikey_cache<CachedChars, true>(
211 | 			cache+num_lt, num_eq, depth+CachedChars);
212 | }
213 | 
214 | template <unsigned CachedChars>
215 | static inline void
216 | multikey_cache(unsigned char** strings, size_t n, size_t depth)
217 | {
218 | 	Cacheblock<CachedChars>* cache =
219 | 		static_cast<Cacheblock<CachedChars>*>(
220 | 			malloc(n*sizeof(Cacheblock<CachedChars>)));
221 | 	for (size_t i=0; i < n; ++i) {
222 | 		cache[i].ptr = strings[i];
223 | 	}
224 | 	multikey_cache<CachedChars, true>(cache, n, depth);
225 | 	for (size_t i=0; i < n; ++i) {
226 | 		strings[i] = cache[i].ptr;
227 | 	}
228 | 	free(cache);
229 | }
230 | 
231 | void multikey_cache4(unsigned char** strings, size_t n)
232 | { multikey_cache<4>(strings, n, 0); }
233 | 
234 | void multikey_cache8(unsigned char** strings, size_t n)
235 | { multikey_cache<8>(strings, n, 0); }
236 | 
237 | ROUTINE_REGISTER_SINGLECORE(multikey_cache4, "multikey_cache with 4byte cache")
238 | ROUTINE_REGISTER_SINGLECORE(multikey_cache8, "multikey_cache with 8byte cache")
239 | 


--------------------------------------------------------------------------------
/src/multikey_dynamic.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * A variant of the Multi-Key-Quicksort using dynamic arrays to store the three
 25 |  * buckets.
 26 |  */
 27 | 
 28 | #include "routine.h"
 29 | #include "util/insertion_sort.h"
 30 | #include "util/get_char.h"
 31 | #include "util/median.h"
 32 | #include <inttypes.h>
 33 | #include <cassert>
 34 | #include <array>
 35 | #include "vector_bagwell.h"
 36 | #include "vector_brodnik.h"
 37 | #include "vector_block.h"
 38 | 
 39 | template <typename CharT>
 40 | static inline unsigned
 41 | get_bucket(CharT c, CharT pivot)
 42 | {
 43 |         return ((c > pivot) << 1) | (c == pivot);
 44 | }
 45 | 
 46 | static inline void
 47 | copy(const std::vector<unsigned char*>& bucket, unsigned char** dst)
 48 | {
 49 | 	std::copy(bucket.begin(), bucket.end(), dst);
 50 | }
 51 | 
 52 | template <typename BucketT>
 53 | static inline void
 54 | clear_bucket(BucketT& bucket)
 55 | { bucket.clear(); }
 56 | 
 57 | static inline void
 58 | clear_bucket(std::vector<unsigned char*>& bucket)
 59 | { bucket.clear(); std::vector<unsigned char*>().swap(bucket); }
 60 | 
 61 | extern "C" void mkqsort(unsigned char**, int, int);
 62 | 
 63 | template <typename BucketT, typename CharT>
 64 | static void
 65 | multikey_dynamic(unsigned char** strings, size_t N, size_t depth)
 66 | {
 67 | 	if (N < 10000) {
 68 | 		mkqsort(strings, N, depth);
 69 | 		return;
 70 | 	}
 71 | 	std::array<BucketT, 3> buckets;
 72 | 	CharT partval = pseudo_median<CharT>(strings, N, depth);
 73 | 	// Use a small cache to reduce memory stalls.
 74 | 	size_t i=0;
 75 | 	for (; i < N-N%32; i+=32) {
 76 | 		std::array<CharT, 32> cache;
 77 | 		for (unsigned j=0; j<32; ++j) {
 78 | 			cache[j] = get_char<CharT>(strings[i+j], depth);
 79 | 		}
 80 | 		for (unsigned j=0; j<32; ++j) {
 81 | 			const unsigned b = get_bucket(cache[j], partval);
 82 | 			buckets[b].push_back(strings[i+j]);
 83 | 		}
 84 | 	}
 85 | 	for (; i < N; ++i) {
 86 | 		const CharT c = get_char<CharT>(strings[i], depth);
 87 | 		const unsigned b = get_bucket(c, partval);
 88 | 		buckets[b].push_back(strings[i]);
 89 | 	}
 90 | 	const size_t bucketsize0 = buckets[0].size();
 91 | 	const size_t bucketsize1 = buckets[1].size();
 92 | 	const size_t bucketsize2 = buckets[2].size();
 93 | 	assert(bucketsize0 + bucketsize1 + bucketsize2 == N);
 94 | 	if (bucketsize0) copy(buckets[0], strings);
 95 | 	if (bucketsize1) copy(buckets[1], strings+bucketsize0);
 96 | 	if (bucketsize2) copy(buckets[2], strings+bucketsize0+bucketsize1);
 97 | 	clear_bucket(buckets[0]);
 98 | 	clear_bucket(buckets[1]);
 99 | 	clear_bucket(buckets[2]);
100 | 	multikey_dynamic<BucketT, CharT>(strings, bucketsize0, depth);
101 | 	if (not is_end(partval))
102 | 		multikey_dynamic<BucketT, CharT>(strings+bucketsize0,
103 | 				bucketsize1, depth+sizeof(CharT));
104 | 	multikey_dynamic<BucketT, CharT>(strings+bucketsize0+bucketsize1,
105 | 			bucketsize2, depth);
106 | }
107 | 
108 | void multikey_dynamic_vector1(unsigned char** strings, size_t n)
109 | {
110 | 	typedef std::vector<unsigned char*> BucketT;
111 | 	typedef unsigned char CharT;
112 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
113 | }
114 | void multikey_dynamic_vector2(unsigned char** strings, size_t n)
115 | {
116 | 	typedef std::vector<unsigned char*> BucketT;
117 | 	typedef uint16_t CharT;
118 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
119 | }
120 | void multikey_dynamic_vector4(unsigned char** strings, size_t n)
121 | {
122 | 	typedef std::vector<unsigned char*> BucketT;
123 | 	typedef uint32_t CharT;
124 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
125 | }
126 | 
127 | void multikey_dynamic_brodnik1(unsigned char** strings, size_t n)
128 | {
129 | 	typedef vector_brodnik<unsigned char*> BucketT;
130 | 	typedef unsigned char CharT;
131 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
132 | }
133 | void multikey_dynamic_brodnik2(unsigned char** strings, size_t n)
134 | {
135 | 	typedef vector_brodnik<unsigned char*> BucketT;
136 | 	typedef uint16_t CharT;
137 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
138 | }
139 | void multikey_dynamic_brodnik4(unsigned char** strings, size_t n)
140 | {
141 | 	typedef vector_brodnik<unsigned char*> BucketT;
142 | 	typedef uint32_t CharT;
143 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
144 | }
145 | 
146 | void multikey_dynamic_bagwell1(unsigned char** strings, size_t n)
147 | {
148 | 	typedef vector_bagwell<unsigned char*> BucketT;
149 | 	typedef unsigned char CharT;
150 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
151 | }
152 | void multikey_dynamic_bagwell2(unsigned char** strings, size_t n)
153 | {
154 | 	typedef vector_bagwell<unsigned char*> BucketT;
155 | 	typedef uint16_t CharT;
156 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
157 | }
158 | void multikey_dynamic_bagwell4(unsigned char** strings, size_t n)
159 | {
160 | 	typedef vector_bagwell<unsigned char*> BucketT;
161 | 	typedef uint32_t CharT;
162 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
163 | }
164 | 
165 | void multikey_dynamic_vector_block1(unsigned char** strings, size_t n)
166 | {
167 | 	typedef vector_block<unsigned char*> BucketT;
168 | 	typedef unsigned char CharT;
169 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
170 | }
171 | void multikey_dynamic_vector_block2(unsigned char** strings, size_t n)
172 | {
173 | 	typedef vector_block<unsigned char*> BucketT;
174 | 	typedef uint16_t CharT;
175 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
176 | }
177 | void multikey_dynamic_vector_block4(unsigned char** strings, size_t n)
178 | {
179 | 	typedef vector_block<unsigned char*> BucketT;
180 | 	typedef uint32_t CharT;
181 | 	multikey_dynamic<BucketT, CharT>(strings, n, 0);
182 | }
183 | 
184 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_vector1,
185 | 	"multikey_dynamic with std::vector bucket type and 1byte alphabet")
186 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_vector2,
187 | 	"multikey_dynamic with std::vector bucket type and 2byte alphabet")
188 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_vector4,
189 | 	"multikey_dynamic with std::vector bucket type and 4byte alphabet")
190 | 
191 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_brodnik1,
192 | 	"multikey_dynamic with vector_brodnik bucket type and 1byte alphabet")
193 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_brodnik2,
194 | 	"multikey_dynamic with vector_brodnik bucket type and 2byte alphabet")
195 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_brodnik4,
196 | 	"multikey_dynamic with vector_brodnik bucket type and 4byte alphabet")
197 | 
198 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_bagwell1,
199 | 	"multikey_dynamic with vector_bagwell bucket type and 1byte alphabet")
200 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_bagwell2,
201 | 	"multikey_dynamic with vector_bagwell bucket type and 2byte alphabet")
202 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_bagwell4,
203 | 	"multikey_dynamic with vector_bagwell bucket type and 4byte alphabet")
204 | 
205 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_vector_block1,
206 | 	"multikey_dynamic with vector_block bucket type and 1byte alphabet")
207 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_vector_block2,
208 | 	"multikey_dynamic with vector_block bucket type and 2byte alphabet")
209 | ROUTINE_REGISTER_SINGLECORE(multikey_dynamic_vector_block4,
210 | 	"multikey_dynamic with vector_block bucket type and 4byte alphabet")
211 | 


--------------------------------------------------------------------------------
/src/routine.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2011 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #ifndef ROUTINE_H
24 | #define ROUTINE_H
25 | 
26 | #include <stdlib.h>
27 | 
28 | #ifdef __cplusplus
29 | extern "C" {
30 | #endif
31 | 
32 | struct routine {
33 | 	void (*f)(unsigned char **, size_t);
34 | 	const char *name;
35 | 	const char *desc;
36 | 	unsigned multicore : 1;
37 | };
38 | 
39 | void routine_register(const struct routine *);
40 | 
41 | #define ROUTINE_REGISTER(_func, _desc, _multicore) \
42 | 	static const struct routine _func##_routine = {    \
43 | 	        _func,                                     \
44 | 	        #_func,                                    \
45 | 	        _desc,                                     \
46 | 	        _multicore,                                \
47 | 	};                                                 \
48 | 	static void _func##_register_hook(void)            \
49 | 	        __attribute__((constructor));              \
50 | 	static void _func##_register_hook(void)            \
51 | 	{                                                  \
52 | 	        routine_register(&_func##_routine);        \
53 | 	}
54 | 
55 | #define ROUTINE_REGISTER_SINGLECORE(_func, _desc) \
56 | 	ROUTINE_REGISTER(_func, _desc, 0)
57 | 
58 | #define ROUTINE_REGISTER_MULTICORE(_func, _desc) \
59 | 	ROUTINE_REGISTER(_func, _desc, 1)
60 | 
61 | #ifdef __cplusplus
62 | }
63 | #endif
64 | 
65 | #endif /* ROUTINE_H */
66 | 


--------------------------------------------------------------------------------
/src/routines.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2011 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #include "routine.h"
24 | #include <string.h>
25 | 
26 | #define ROUTINES_MAX 256
27 | 
28 | static const struct routine *routines[ROUTINES_MAX];
29 | static unsigned routine_cnt;
30 | 
31 | void
32 | routine_register(const struct routine *r)
33 | {
34 | 	if (!r)
35 | 		abort();
36 | 	if (!r->name)
37 | 		abort();
38 | 	if (!r->desc)
39 | 		abort();
40 | 	if (routine_cnt >= ROUTINES_MAX)
41 | 		abort();
42 | 	routines[routine_cnt++] = r;
43 | }
44 | 
45 | const struct routine *
46 | routine_from_name(const char *name)
47 | {
48 | 	unsigned i;
49 | 	for (i=0; i < routine_cnt; ++i)
50 | 		if (strcmp(name, routines[i]->name) == 0)
51 | 			return routines[i];
52 | 	return NULL;
53 | }
54 | 
55 | static int
56 | routine_cmp(const void *a, const void *b)
57 | {
58 | 	const struct routine *aa = *(const struct routine **)a;
59 | 	const struct routine *bb = *(const struct routine **)b;
60 | 	if (aa->f == bb->f)
61 | 		return 0;
62 | 	if (aa->multicore < bb->multicore)
63 | 		return -1;
64 | 	if (aa->multicore > bb->multicore)
65 | 		return 1;
66 | 	return strcmp(aa->name, bb->name);
67 | }
68 | 
69 | void
70 | routine_get_all(const struct routine ***r, unsigned *cnt)
71 | {
72 | 	*r = routines;
73 | 	*cnt = routine_cnt;
74 | 	qsort(*r, *cnt, sizeof(struct routine *), routine_cmp);
75 | }
76 | 


--------------------------------------------------------------------------------
/src/routines.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2011 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #ifndef ROUTINES_H
24 | #define ROUTINES_H
25 | 
26 | #include "routine.h"
27 | 
28 | #ifdef __cplusplus
29 | extern "C" {
30 | #endif
31 | 
32 | const struct routine *routine_from_name(const char *);
33 | void routine_get_all(const struct routine ***, unsigned *);
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 
39 | #endif /* ROUTINES_H */
40 | 


--------------------------------------------------------------------------------
/src/util/cpus_allowed.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2012 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #include "cpus_allowed.h"
 24 | 
 25 | #include <stdio.h>
 26 | #include <string.h>
 27 | #include <stdlib.h>
 28 | 
 29 | static char *
 30 | status_entry(const char *key)
 31 | {
 32 | 	char *result = NULL;
 33 | 	char *line = NULL;
 34 | 	size_t line_n = 0;
 35 | 	FILE *fp;
 36 | 	fp = fopen("/proc/self/status", "r");
 37 | 	if (!fp)
 38 | 		goto done;
 39 | 	while (getline(&line, &line_n, fp) != -1) {
 40 | 		char *v;
 41 | 		v = strchr(line, ':');
 42 | 		if (!v || *v == '\0')
 43 | 			continue;
 44 | 		*v = '\0';
 45 | 		if (strcmp(line, key) != 0)
 46 | 			continue;
 47 | 		++v;
 48 | 		while (*v == ' ' || *v == '\t')
 49 | 			++v;
 50 | 		if (strlen(v) > 1)
 51 | 			v[strlen(v)-1] = '\0';
 52 | 		if (*v == '\0')
 53 | 			goto done;
 54 | 		result = line;
 55 | 		while ((*line++ = *v++))
 56 | 			;
 57 | 		goto done;
 58 | 	}
 59 | done:
 60 | 	if (!result)
 61 | 		free(line);
 62 | 	if (fp)
 63 | 		fclose(fp);
 64 | 	return result;
 65 | }
 66 | 
 67 | char *
 68 | cpus_allowed_list(void)
 69 | {
 70 | 	return status_entry("Cpus_allowed_list");
 71 | }
 72 | 
 73 | static int
 74 | ishexdigit(char ch)
 75 | {
 76 | 	return (ch >= '0' && ch <= '9')
 77 | 	    || (ch >= 'a' && ch <= 'f');
 78 | }
 79 | 
 80 | static int
 81 | hex2int(char ch)
 82 | {
 83 | 	if (ch >= '0' && ch <= '9')
 84 | 		return ch - '0';
 85 | 	if (ch >= 'a' && ch <= 'f')
 86 | 		return ch - 'a' + 10;
 87 | 	abort();
 88 | 	return 0;
 89 | }
 90 | 
 91 | static int
 92 | high_bit_order(char *allowed)
 93 | {
 94 | 	int order = -1;
 95 | 	int i = 0;
 96 | 	int k = strlen(allowed)-1;
 97 | 	for (; k >= 0; --k) {
 98 | 		char ch = allowed[k];
 99 | 		if (!ishexdigit(ch))
100 | 			continue;
101 | 		int mask = hex2int(ch);
102 | 		if (mask) {
103 | 			int neworder;
104 | 			if (mask & 8)
105 | 				neworder = 4 + i;
106 | 			else if (mask & 4)
107 | 				neworder = 3 + i;
108 | 			else if (mask & 2)
109 | 				neworder = 2 + i;
110 | 			else
111 | 				neworder = 1 + i;
112 | 			if (neworder > order)
113 | 				order = neworder;
114 | 		}
115 | 		i += 4;
116 | 	}
117 | 	return order;
118 | }
119 | 
120 | static void
121 | set_cpu_bits(char *allowed, cpu_set_t *c, size_t setsize)
122 | {
123 | 	int i = 0;
124 | 	int k = strlen(allowed)-1;
125 | 	for (; k >= 0; --k) {
126 | 		char ch = allowed[k];
127 | 		if (!ishexdigit(ch))
128 | 			continue;
129 | 		int mask = hex2int(ch);
130 | 		if (mask & 1) CPU_SET_S(i+0, setsize, c);
131 | 		if (mask & 2) CPU_SET_S(i+1, setsize, c);
132 | 		if (mask & 4) CPU_SET_S(i+2, setsize, c);
133 | 		if (mask & 8) CPU_SET_S(i+3, setsize, c);
134 | 		i += 4;
135 | 	}
136 | }
137 | 
138 | cpu_set_t *
139 | cpus_allowed(size_t *setsize, int *maxcpu)
140 | {
141 | 	cpu_set_t *c = NULL;
142 | 	char *allowed = status_entry("Cpus_allowed");
143 | 	if (!allowed || strlen(allowed) == 0)
144 | 		goto done;
145 | 	*maxcpu = high_bit_order(allowed);
146 | 	if (*maxcpu == -1)
147 | 		goto done;
148 | 	c = CPU_ALLOC(*maxcpu+1);
149 | 	if (!c)
150 | 		goto done;
151 | 	*setsize = CPU_ALLOC_SIZE(*maxcpu+1);
152 | 	set_cpu_bits(allowed, c, *setsize);
153 | done:
154 | 	free(allowed);
155 | 	return c;
156 | }
157 | 
158 | int
159 | cpu_scaling_min_freq(int cpu)
160 | {
161 | 	int min_freq;
162 | 	FILE *fp;
163 | 	char *filename = NULL;
164 | 	if (asprintf(&filename,
165 | 		"/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq",
166 | 			cpu) == -1) {
167 | 		return -1;
168 | 	}
169 | 	fp = fopen(filename, "r");
170 | 	free(filename);
171 | 	if (!fp)
172 | 		return -1;
173 | 	if (fscanf(fp, "%d", &min_freq) != 1)
174 | 		min_freq = -1;
175 | 	fclose(fp);
176 | 	return min_freq;
177 | }
178 | 
179 | int
180 | cpu_scaling_max_freq(int cpu)
181 | {
182 | 	int max_freq;
183 | 	FILE *fp;
184 | 	char *filename = NULL;
185 | 	if (asprintf(&filename,
186 | 		"/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq",
187 | 			cpu) == -1) {
188 | 		return -1;
189 | 	}
190 | 	fp = fopen(filename, "r");
191 | 	free(filename);
192 | 	if (!fp)
193 | 		return -1;
194 | 	if (fscanf(fp, "%d", &max_freq) != 1)
195 | 		max_freq = -1;
196 | 	fclose(fp);
197 | 	return max_freq;
198 | }
199 | 


--------------------------------------------------------------------------------
/src/util/cpus_allowed.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2012 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #ifndef CPUS_ALLOWED_H
24 | #define CPUS_ALLOWED_H
25 | 
26 | #define _GNU_SOURCE
27 | #include <sched.h>
28 | 
29 | char *cpus_allowed_list(void);
30 | cpu_set_t *cpus_allowed(size_t *, int *maxcpu);
31 | int cpu_scaling_max_freq(int cpu);
32 | int cpu_scaling_min_freq(int cpu);
33 | 
34 | #endif /* CPUS_ALLOWED_H */
35 | 


--------------------------------------------------------------------------------
/src/util/debug.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2008 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #ifndef UTIL_DEBUG_H
24 | #define UTIL_DEBUG_H
25 | 
26 | #include <string.h>
27 | #include <stdio.h>
28 | 
29 | #ifdef __cplusplus
30 | #include <iostream>
31 | #if defined(NDEBUG) || defined(UNIT_TEST)
32 | #define debug_indent
33 | #define debug() if (1) {} else std::cerr
34 | #else
35 | static std::string __debug_indent_str;
36 | #define debug_indent struct DI{std::string&i;DI(std::string&_i):i(_i){i+="    ";}~DI(){i=i.substr(0,i.size()-4);}}__d(__debug_indent_str);
37 | #define debug() std::cerr << __debug_indent_str
38 | #endif
39 | #endif /* __cplusplus */
40 | 
41 | static inline int
42 | check_result(unsigned char **strings, size_t n)
43 | {
44 | 	size_t wrong = 0;
45 | 	size_t identical = 0;
46 | 	size_t invalid = 0;
47 | 	for (size_t i=0; i < n-1; ++i) {
48 | 		if (strings[i] == strings[i+1])
49 | 			++identical;
50 | 		if (strings[i]==NULL || strings[i+1]==NULL)
51 | 			++invalid;
52 | 		else if (strcmp((char*)strings[i], (char*)strings[i+1]) > 0)
53 | 			++wrong;
54 | 	}
55 | 	if (identical)
56 | 		fprintf(stderr,
57 | 			"WARNING: found %zu identical pointers!\n",
58 | 			identical);
59 | 	if (wrong)
60 | 		fprintf(stderr,
61 | 			"WARNING: found %zu incorrect orderings!\n",
62 | 			wrong);
63 | 	if (invalid)
64 | 		fprintf(stderr,
65 | 			"WARNING: found %zu invalid pointers!\n",
66 | 			invalid);
67 | 	if (identical || wrong || invalid)
68 | 		return 1;
69 | 	return 0;
70 | }
71 | 
72 | #endif //UTIL_DEBUG_H
73 | 


--------------------------------------------------------------------------------
/src/util/get_char.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2007-2008,2011 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #ifndef GET_CHAR_H
 24 | #define GET_CHAR_H
 25 | 
 26 | #include <cstddef>
 27 | #include <inttypes.h>
 28 | #include <cassert>
 29 | 
 30 | template <typename CharT>
 31 | inline CharT
 32 | get_char(unsigned char* ptr, size_t depth);
 33 | 
 34 | template <>
 35 | inline unsigned char
 36 | get_char<unsigned char>(unsigned char* ptr, size_t depth)
 37 | {
 38 | 	assert(ptr);
 39 | 	return ptr[depth];
 40 | }
 41 | 
 42 | template <>
 43 | inline uint16_t
 44 | get_char<uint16_t>(unsigned char* ptr, size_t depth)
 45 | {
 46 | 	assert(ptr);
 47 | 	uint16_t ch = ptr[depth];
 48 | 	if (ch) ch = (ch << 8) | ptr[depth+1];
 49 | 	return ch;
 50 | }
 51 | 
 52 | template <>
 53 | inline uint32_t
 54 | get_char<uint32_t>(unsigned char* ptr, size_t depth)
 55 | {
 56 | 	assert(ptr);
 57 | 	uint32_t c = 0;
 58 | 	ptr += depth;
 59 | 	if (*ptr == 0) return c;
 60 | 	c  = (uint32_t(*ptr++) << 24);
 61 | 	if (*ptr == 0) return c;
 62 | 	c |= (uint32_t(*ptr++) << 16);
 63 | 	if (*ptr == 0) return c;
 64 | 	c |= (uint32_t(*ptr++) << 8 );
 65 | 	return c | *ptr;
 66 | }
 67 | 
 68 | template <>
 69 | inline uint64_t
 70 | get_char<uint64_t>(unsigned char* ptr, size_t depth)
 71 | {
 72 | 	uint64_t c = 0;
 73 | 	if (ptr[depth] == 0) return c;
 74 | 	c = (uint64_t(ptr[depth]) << 56); ++ptr;
 75 | 	if (ptr[depth] == 0) return c;
 76 | 	c |= (uint64_t(ptr[depth]) << 48); ++ptr;
 77 | 	if (ptr[depth] == 0) return c;
 78 | 	c |= (uint64_t(ptr[depth]) << 40); ++ptr;
 79 | 	if (ptr[depth] == 0) return c;
 80 | 	c |= (uint64_t(ptr[depth]) << 32); ++ptr;
 81 | 	if (ptr[depth] == 0) return c;
 82 | 	c |= (ptr[depth] << 24); ++ptr;
 83 | 	if (ptr[depth] == 0) return c;
 84 | 	c |= (ptr[depth] << 16); ++ptr;
 85 | 	if (ptr[depth] == 0) return c;
 86 | 	c |= (ptr[depth] << 8); ++ptr;
 87 | 	c |= ptr[depth];
 88 | 	return c;
 89 | }
 90 | 
 91 | template <typename CharT, int depth>
 92 | static inline CharT
 93 | get_char(unsigned char* ptr)
 94 | {
 95 | 	if (sizeof(CharT) == 1) {
 96 | 		return ptr[depth];
 97 | 	}
 98 | 	else if (sizeof(CharT) == 2) {
 99 | 		if (ptr[depth] == 0) return 0;
100 | 		else                 return (ptr[depth] << 8) | ptr[depth+1];
101 | 	}
102 | 	else if (sizeof(CharT) == 4) {
103 | 		CharT c = 0;
104 | 
105 | 		if (ptr[depth] == 0) return c;
106 | 		c = (ptr[depth] << 24);
107 | 		++ptr;
108 | 
109 | 		if (ptr[depth] == 0) return c;
110 | 		c |= (ptr[depth] << 16);
111 | 		++ptr;
112 | 
113 | 		if (ptr[depth] == 0) return c;
114 | 		c |= (ptr[depth] << 8);
115 | 		++ptr;
116 | 
117 | 		c |= ptr[depth];
118 | 
119 | 		return c;
120 | 	}
121 | 	else if (sizeof(CharT) == 8) {
122 | 		CharT c = 0;
123 | 
124 | 		if (ptr[depth] == 0) return c;
125 | 		c = (uint64_t(ptr[depth]) << 56); ++ptr;
126 | 
127 | 		if (ptr[depth] == 0) return c;
128 | 		c |= (uint64_t(ptr[depth]) << 48); ++ptr;
129 | 
130 | 		if (ptr[depth] == 0) return c;
131 | 		c |= (uint64_t(ptr[depth]) << 40); ++ptr;
132 | 
133 | 		if (ptr[depth] == 0) return c;
134 | 		c |= (uint64_t(ptr[depth]) << 32); ++ptr;
135 | 
136 | 		if (ptr[depth] == 0) return c;
137 | 		c |= (ptr[depth] << 24); ++ptr;
138 | 
139 | 		if (ptr[depth] == 0) return c;
140 | 		c |= (ptr[depth] << 16); ++ptr;
141 | 
142 | 		if (ptr[depth] == 0) return c;
143 | 		c |= (ptr[depth] << 8); ++ptr;
144 | 
145 | 		c |= ptr[depth];
146 | 
147 | 		return c;
148 | 	}
149 | 	else {
150 | 		assert(0);
151 | 	}
152 | }
153 | 
154 | template <typename CharT>
155 | inline bool is_end(CharT c);
156 | 
157 | template <> inline bool is_end(unsigned char c)
158 | {
159 | 	return c==0;
160 | }
161 | 
162 | template <> inline bool is_end(uint16_t c)
163 | {
164 | 	return (c&0xFF)==0;
165 | }
166 | 
167 | template <> inline bool is_end(uint32_t c)
168 | {
169 | 	return (c&0xFF)==0;
170 | }
171 | 
172 | template <> inline bool is_end(uint64_t c)
173 | {
174 | 	return (c&0xFF)==0;
175 | }
176 | 
177 | #endif //GET_CHAR_H
178 | 


--------------------------------------------------------------------------------
/src/util/insertion_sort.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2008 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #ifndef INSERTION_SORT_H
24 | #define INSERTION_SORT_H
25 | 
26 | #include <cstddef>
27 | #include "get_char.h"
28 | 
29 | static inline void
30 | insertion_sort(unsigned char** strings, int n, size_t depth)
31 | {
32 | 	for (unsigned char** i = strings + 1; --n > 0; ++i) {
33 | 		unsigned char** j = i;
34 | 		unsigned char* tmp = *i;
35 | 		while (j > strings) {
36 | 			unsigned char* s = *(j-1)+depth;
37 | 			unsigned char* t = tmp+depth;
38 | 			while (*s == *t and not is_end(*s)) {
39 | 				++s;
40 | 				++t;
41 | 			}
42 | 			if (*s <= *t) break;
43 | 			*j = *(j-1);
44 | 			--j;
45 | 		}
46 | 		*j = tmp;
47 | 	}
48 | }
49 | 
50 | #endif //INSERTION_SORT_H
51 | 


--------------------------------------------------------------------------------
/src/util/median.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2007-2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #ifndef UTIL_H
 24 | #define UTIL_H
 25 | 
 26 | #include "get_char.h"
 27 | 
 28 | template <typename CharT>
 29 | CharT
 30 | med3char(CharT a, CharT b, CharT c)
 31 | {
 32 | 	if (a == b)           return a;
 33 | 	if (c == a || c == b) return c;
 34 | 	if (a < b) {
 35 | 		if (b < c) return b;
 36 | 		if (a < c) return c;
 37 | 		return a;
 38 | 	}
 39 | 	if (b > c) return b;
 40 | 	if (a < c) return a;
 41 | 	return c;
 42 | }
 43 | 
 44 | template <typename CharT, typename Cmp>
 45 | CharT&
 46 | med3char(CharT& a, CharT& b, CharT& c, Cmp cmp)
 47 | {
 48 | 	if (cmp(a, b) == 0)                   return a;
 49 | 	if (cmp(c, a) == 0 or cmp(c, b) == 0) return c;
 50 | 	if (cmp(a, b) < 0) {
 51 | 		if (cmp(b, c) < 0) return b;
 52 | 		if (cmp(a, c) < 0) return c;
 53 | 		return a;
 54 | 	}
 55 | 	if (cmp(b, c) > 0) return b;
 56 | 	if (cmp(a, c) < 0) return a;
 57 | 	return c;
 58 | }
 59 | 
 60 | template <typename CharT>
 61 | inline CharT
 62 | pseudo_median(CharT* begin, CharT* end)
 63 | {
 64 | 	size_t N=end-begin;
 65 | 	assert(N>3);
 66 | 	return med3char(
 67 | 			med3char(begin[0],   begin[1],     begin[2]),
 68 | 			med3char(begin[N/2], begin[N/2+1], begin[N/2+2]),
 69 | 			med3char(begin[N-3], begin[N-2],   begin[N-1])
 70 | 		       );
 71 | }
 72 | 
 73 | template <typename CharT>
 74 | CharT
 75 | pseudo_median(unsigned char** strings, size_t N, size_t depth)
 76 | {
 77 | 	if (N > 30)
 78 | 		return med3char(
 79 | 			med3char(
 80 | 				get_char<CharT>(strings[0], depth),
 81 | 				get_char<CharT>(strings[1], depth),
 82 | 				get_char<CharT>(strings[2], depth)
 83 | 				),
 84 | 			med3char(
 85 | 				get_char<CharT>(strings[N/2  ], depth),
 86 | 				get_char<CharT>(strings[N/2+1], depth),
 87 | 				get_char<CharT>(strings[N/2+2], depth)
 88 | 				),
 89 | 			med3char(
 90 | 				get_char<CharT>(strings[N-3], depth),
 91 | 				get_char<CharT>(strings[N-2], depth),
 92 | 				get_char<CharT>(strings[N-1], depth)
 93 | 				)
 94 | 		       );
 95 | 	else
 96 | 		return med3char(get_char<CharT>(strings[0  ], depth),
 97 | 				get_char<CharT>(strings[N/2], depth),
 98 | 				get_char<CharT>(strings[N-1], depth));
 99 | }
100 | 
101 | #endif //UTIL_H
102 | 


--------------------------------------------------------------------------------
/src/util/sdt.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2020 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #ifndef UTIL_SDT_H
24 | #define UTIL_SDT_H
25 | 
26 | #ifdef HAVE_SYS_SDT_H
27 | #include <sys/sdt.h>
28 | #else
29 | #define STAP_PROBE(a,b)
30 | #define STAP_PROBE1(a,b,arg1)
31 | #define STAP_PROBE2(a,b,arg1,arg2)
32 | #endif
33 | 
34 | #endif /* UTIL_SDT_H */
35 | 


--------------------------------------------------------------------------------
/src/util/timing.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2007-2008,2011 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #define _GNU_SOURCE
24 | #include <stdio.h>
25 | #include <time.h>
26 | #include <sys/time.h>
27 | #include <sys/resource.h>
28 | 
29 | static struct timespec process_cputime_start;
30 | static struct timespec process_cputime_stop;
31 | static struct timespec monotonic_start;
32 | static struct timespec monotonic_stop;
33 | static struct rusage startclock;
34 | static struct rusage stopclock;
35 | 
36 | void timing_start(void)
37 | {
38 | 	getrusage(RUSAGE_SELF, &startclock);
39 | 	clock_gettime(CLOCK_MONOTONIC, &monotonic_start);
40 | 	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &process_cputime_start);
41 | }
42 | 
43 | void timing_stop(void)
44 | {
45 | 	getrusage(RUSAGE_SELF, &stopclock);
46 | 	clock_gettime(CLOCK_MONOTONIC, &monotonic_stop);
47 | 	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &process_cputime_stop);
48 | }
49 | 
50 | double gettime_wall_clock(void)
51 | {
52 | 	double msecs_1 = monotonic_start.tv_nsec/1000000 + 1000*monotonic_start.tv_sec;
53 | 	double msecs_2 = monotonic_stop.tv_nsec/1000000 + 1000*monotonic_stop.tv_sec;
54 | 	return msecs_2 - msecs_1;
55 | }
56 | 
57 | double gettime_user(void)
58 | {
59 | 	struct timeval result;
60 | 	timersub(&stopclock.ru_utime, &startclock.ru_utime, &result);
61 | 	return (double)(result.tv_sec*1000)+(double)(result.tv_usec)/1e3;
62 | }
63 | 
64 | double gettime_sys(void)
65 | {
66 | 	struct timeval result;
67 | 	timersub(&stopclock.ru_stime, &startclock.ru_stime, &result);
68 | 	return (double)(result.tv_sec*1000)+(double)(result.tv_usec)/1e3;
69 | }
70 | 
71 | double gettime_user_sys(void)
72 | {
73 | 	struct timeval result_user;
74 | 	struct timeval result_sys;
75 | 	struct timeval result;
76 | 	timersub(&stopclock.ru_utime, &startclock.ru_utime, &result_user);
77 | 	timersub(&stopclock.ru_stime, &startclock.ru_stime, &result_sys);
78 | 	timeradd(&result_user, &result_sys, &result);
79 | 	return (double)(result.tv_sec*1000)+(double)(result.tv_usec)/1e3;
80 | }
81 | 
82 | double gettime_process_cputime(void)
83 | {
84 | 	double msecs_1 = process_cputime_start.tv_nsec/1000000 + 1000*process_cputime_start.tv_sec;
85 | 	double msecs_2 = process_cputime_stop.tv_nsec/1000000 + 1000*process_cputime_stop.tv_sec;
86 | 	return msecs_2 - msecs_1;
87 | }
88 | 


--------------------------------------------------------------------------------
/src/util/timing.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2011 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #ifndef TIMING_H
24 | #define TIMING_H
25 | 
26 | void timing_start(void);
27 | void timing_stop(void);
28 | 
29 | double gettime_user(void);
30 | double gettime_sys(void);
31 | double gettime_user_sys(void);
32 | double gettime_process_cputime(void);
33 | double gettime_wall_clock(void);
34 | 
35 | #endif /* TIMING_H */
36 | 


--------------------------------------------------------------------------------
/src/util/vmainfo.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2011 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #define _GNU_SOURCE
 24 | #include "vmainfo.h"
 25 | #include <stdio.h>
 26 | #include <string.h>
 27 | #include <stdlib.h>
 28 | 
 29 | /* Format the /proc/pid/smaps key-value pairs into two columns:
 30 |  *
 31 |  * Size:                136 kB
 32 |  * Rss:                   8 kB
 33 |  * Pss:                   8 kB
 34 |  * Shared_Clean:          0 kB
 35 |  * Shared_Dirty:          0 kB
 36 |  * Private_Clean:         0 kB
 37 |  * Private_Dirty:         8 kB
 38 |  * Referenced:            8 kB
 39 |  * Anonymous:             8 kB
 40 |  * AnonHugePages:         0 kB
 41 |  * Swap:                  0 kB
 42 |  * KernelPageSize:        4 kB
 43 |  * MMUPageSize:           4 kB
 44 |  * Locked:                0 kB
 45 |  *
 46 |  * =>
 47 |  *
 48 |  * Size:             390636 kB  |  Referenced:       390636 kB
 49 |  * Rss:              390636 kB  |  Anonymous:        390636 kB
 50 |  * Pss:              390636 kB  |  AnonHugePages:         0 kB
 51 |  * Shared_Clean:          0 kB  |  Swap:                  0 kB
 52 |  * Shared_Dirty:          0 kB  |  KernelPageSize:        4 kB
 53 |  * Private_Clean:         0 kB  |  MMUPageSize:           4 kB
 54 |  * Private_Dirty:    390636 kB  |  Locked:                0 kB
 55 |  */
 56 | static void
 57 | add_smaps(char *buf, char **pairs, unsigned pairs_cnt)
 58 | {
 59 | 	unsigned i, j;
 60 | 	for (i=0, j=pairs_cnt/2; i < pairs_cnt/2; ++i, ++j) {
 61 | 		pairs[i][strlen(pairs[i])-1] = '\0';
 62 | 		strcat(buf, "    ");
 63 | 		strcat(buf, pairs[i]);
 64 | 		strcat(buf, "  |  ");
 65 | 		strcat(buf, pairs[j]);
 66 | 	}
 67 | 	if (j < pairs_cnt) {
 68 | 		strcat(buf, "    ");
 69 | 		strcat(buf, pairs[j]);
 70 | 	}
 71 | }
 72 | 
 73 | static void
 74 | free_pairs(char **pairs, unsigned pairs_cnt)
 75 | {
 76 | 	unsigned i;
 77 | 	for (i=0; i < pairs_cnt; ++i)
 78 | 		free(pairs[i]);
 79 | 	free(pairs);
 80 | }
 81 | 
 82 | char *
 83 | vma_info(void *ptr)
 84 | {
 85 | 	FILE *fp = NULL;
 86 | 	char *buf = NULL;
 87 | 	char *line = NULL;
 88 | 	char **pairs = NULL, **tmp = NULL;
 89 | 	unsigned pairs_cnt = 0;
 90 | 	size_t line_n = 0;
 91 | 	buf = malloc(2048);
 92 | 	if (!buf)
 93 | 		goto done;
 94 | 	buf[0] = 0;
 95 | 	fp = fopen("/proc/self/smaps", "r");
 96 | 	if (!fp)
 97 | 		goto done;
 98 | 	while (getline(&line, &line_n, fp) != -1) {
 99 | 		unsigned long a, b;
100 | 		if (sscanf(line, "%lx-%lx", &a, &b) != 2)
101 | 			continue;
102 | 		if (a <= (unsigned long)ptr && (unsigned long)ptr < b) {
103 | 			/* OK, found it! */
104 | 			strcat(buf, "    ");
105 | 			strcat(buf, line);
106 | 			while (getline(&line, &line_n, fp) != -1) {
107 | 				if (line[0] >= 'A' && line[0] <= 'Z'
108 | 						&& strchr(line, ':') != NULL) {
109 | 					tmp = realloc(pairs, (pairs_cnt+1) * sizeof(char *));
110 | 					if (!tmp)
111 | 						goto done;
112 | 					pairs = tmp;
113 | 					pairs[pairs_cnt++] = line;
114 | 					line = NULL;
115 | 					line_n = 0;
116 | 				} else {
117 | 					free(line);
118 | 					goto done;
119 | 				}
120 | 			}
121 | 		}
122 | 	}
123 | done:
124 | 	add_smaps(buf, pairs, pairs_cnt);
125 | 	free_pairs(pairs, pairs_cnt);
126 | 	if (fp)
127 | 		fclose(fp);
128 | 	return buf;
129 | }
130 | 


--------------------------------------------------------------------------------
/src/util/vmainfo.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2011 by Tommi Rantala <tt.rantala@gmail.com>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 5 |  * of this software and associated documentation files (the "Software"), to
 6 |  * deal in the Software without restriction, including without limitation the
 7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 8 |  * sell copies of the Software, and to permit persons to whom the Software is
 9 |  * furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice shall be included in
12 |  * all copies or substantial portions of the Software.
13 |  *
14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 |  * IN THE SOFTWARE.
21 |  */
22 | 
23 | #ifndef VMAINFO_H
24 | #define VMAINFO_H
25 | 
26 | /* Release return value with free() when no longer needed. */
27 | char *vma_info(void *ptr);
28 | 
29 | #endif /* VMAINFO_H */
30 | 


--------------------------------------------------------------------------------
/src/vector_bagwell.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008-2009 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * vector_bagwell implements the vector Abstract Data Type (ADT) using a series
 25 |  * of geometrically expanding memory blocks. Only a very limited set of
 26 |  * operations have been implemented.
 27 |  *
 28 |  * Might be faster than the typical geometrically expanding array in some
 29 |  * cases, because it does not need to copy elements from the smaller array to
 30 |  * the expanded larger array.
 31 |  *
 32 |  * Based on: Phil Bagwell: "Fast Functional Lists, Hash-Lists, Deques and
 33 |  * Variable Length Arrays"
 34 |  *
 35 |  * push_back: O(1) amortized time
 36 |  * indexing: O(1) time
 37 |  * size(): O(1) time
 38 |  *
 39 |  * wasted space: O(n)
 40 |  */
 41 | 
 42 | #ifndef VECTOR_BAGWELL
 43 | #define VECTOR_BAGWELL
 44 | 
 45 | #include <cstdlib>
 46 | #include <cstddef>
 47 | #include <vector>
 48 | #include <cassert>
 49 | 
 50 | // Initial: Size of the initial memory allocation. Has to be a power of two.
 51 | template <typename T, unsigned Initial = 16>
 52 | struct vector_bagwell
 53 | {
 54 | 	typedef T value_type;
 55 | 	typedef std::vector<T*> index_block_type;
 56 | 	void push_back(const T& t)
 57 | 	{
 58 | 		if (__builtin_expect(current_block_full(), false)) {
 59 | 			_left_in_block = Initial << _index_block.size();
 60 | 			_insertpos = static_cast<T*>(
 61 | 					malloc(_left_in_block*sizeof(T)));
 62 | 			_index_block.push_back(_insertpos);
 63 | 		}
 64 | 		*_insertpos++ = t;
 65 | 		--_left_in_block;
 66 | 	}
 67 | 	bool current_block_full() const { return _left_in_block == 0; }
 68 | 	T operator[](size_t index) const
 69 | 	{
 70 | 		assert(index < size());
 71 | 		static_assert(Initial==16, "this code assumes Initial==16");
 72 | 		static_assert(sizeof(size_t) <= sizeof(unsigned long),
 73 | 			"sizeof(size_t) must be max sizeof(unsigned long)");
 74 | 		const unsigned long fixed = index+16;
 75 | 		const unsigned long msb_diff =
 76 | 			(sizeof(unsigned long)*8-4-1) - __builtin_clzl(fixed);
 77 | 		const unsigned long msbit = 1 <<
 78 | 			((sizeof(unsigned long)*8-1) - __builtin_clzl(fixed));
 79 | 		const unsigned long fixed2 = fixed & ~msbit;
 80 | 		return _index_block[msb_diff][fixed2];
 81 | 	}
 82 | 	void clear()
 83 | 	{
 84 | 		for (unsigned i=0; i < _index_block.size(); ++i) {
 85 | 			free(_index_block[i]);
 86 | 		}
 87 | 		_index_block.clear();
 88 | 		_insertpos = 0;
 89 | 		_left_in_block = 0;
 90 | 	}
 91 | 	size_t size() const
 92 | 	{
 93 | 		static_assert(Initial==16, "this code assumes Initial==16");
 94 | 		if (empty()) return 0;
 95 | 		return (1<<(3+_index_block.size()))-1-15
 96 | 			+ _insertpos-_index_block.back();
 97 | 	}
 98 | 	bool empty() const { return _insertpos==0; }
 99 | 	~vector_bagwell()
100 | 	{
101 | 		for (unsigned i=0; i < _index_block.size(); ++i)
102 | 			free(_index_block[i]);
103 | 	}
104 | 	vector_bagwell() : _index_block(), _insertpos(0), _left_in_block(0) {}
105 | 	index_block_type _index_block;
106 | 	T* _insertpos;
107 | 	size_t _left_in_block;
108 | };
109 | 
110 | template <typename T, unsigned InitialSize, typename OutputIterator>
111 | static inline void
112 | copy(const vector_bagwell<T, InitialSize>& v, OutputIterator dst)
113 | {
114 | 	size_t bufsize = InitialSize;
115 | 	for (size_t i=1; i < v._index_block.size(); ++i) {
116 | 		std::copy(v._index_block[i-1],
117 | 		          v._index_block[i-1]+bufsize,
118 | 		          dst);
119 | 		dst += bufsize;
120 | 		bufsize *= 2;
121 | 	}
122 | 	std::copy(v._index_block.back(), v._insertpos, dst);
123 | }
124 | 
125 | #endif //VECTOR_BAGWELL
126 | 


--------------------------------------------------------------------------------
/src/vector_block.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * vector_block implements the vector Abstract Data Type (ADT) using a series
 25 |  * of memory blocks of fixed size B. Only a very limited set of operations have
 26 |  * been implemented.
 27 |  *
 28 |  * push_back: O(1) time + ceiling(n/B) malloc() calls
 29 |  * indexing: O(1) time
 30 |  * size(): O(1) time
 31 |  *
 32 |  * wasted space: at most B-1 elements + O(n/B) wasted pointers in the
 33 |  * geometrically expanding index block
 34 |  */
 35 | 
 36 | #ifndef VECTOR_BLOCK
 37 | #define VECTOR_BLOCK
 38 | 
 39 | template <typename T, unsigned B=1024>
 40 | struct vector_block
 41 | {
 42 | 	typedef T value_type;
 43 | 	typedef std::vector<T*> index_block_type;
 44 | 	void push_back(const T& t)
 45 | 	{
 46 | 		if (__builtin_expect(is_full(), false)) {
 47 | 			_insertpos = static_cast<T*>(malloc(B*sizeof(T)));
 48 | 			_index_block.push_back(_insertpos);
 49 | 			_left_in_block = B;
 50 | 		}
 51 | 		*_insertpos++ = t;
 52 | 		--_left_in_block;
 53 | 	}
 54 | 	bool is_full() const { return _left_in_block==0; }
 55 | 	T operator[](size_t index) const
 56 | 	{
 57 | 		assert(index < size());
 58 | 		return _index_block[index/B][index%B];
 59 | 	}
 60 | 	size_t size() const
 61 | 	{
 62 | 		return _index_block.size()*B - _left_in_block;
 63 | 	}
 64 | 	void clear()
 65 | 	{
 66 | 		for (size_t i=0; i < _index_block.size(); ++i) {
 67 | 			free(_index_block[i]);
 68 | 		}
 69 | 		_index_block.clear();
 70 | 		_insertpos=0;
 71 | 		_left_in_block=0;
 72 | 	}
 73 | 	~vector_block()
 74 | 	{
 75 | 		for (size_t i=0; i < _index_block.size(); ++i) {
 76 | 			free(_index_block[i]);
 77 | 		}
 78 | 	}
 79 | 	vector_block() : _index_block(), _insertpos(0), _left_in_block(0) {}
 80 | 	index_block_type _index_block;
 81 | 	T* _insertpos;
 82 | 	unsigned _left_in_block;
 83 | };
 84 | 
 85 | // Copies all elements from this container to the given output iterator.
 86 | // This is probably redundant if iterators are ever implemented.
 87 | template <typename T, unsigned B, typename OutputIterator>
 88 | static inline void
 89 | copy(const vector_block<T, B>& v, OutputIterator dst)
 90 | {
 91 | 	assert(not v._index_block.empty());
 92 | 	for (size_t i=1; i < v._index_block.size(); ++i) {
 93 | 		std::copy(v._index_block[i-1],
 94 | 		          v._index_block[i-1]+B,
 95 | 		          dst);
 96 | 		dst += B;
 97 | 	}
 98 | 	std::copy(v._index_block.back(), v._insertpos, dst);
 99 | }
100 | 
101 | #endif //VECTOR_BLOCK
102 | 


--------------------------------------------------------------------------------
/src/vector_brodnik.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | /*
 24 |  * @inproceedings{673194,
 25 |  *     author = {Andrej Brodnik and Svante Carlsson and Erik D. Demaine and J.
 26 |  *               Ian Munro and Robert Sedgewick},
 27 |  *     title = {Resizable Arrays in Optimal Time and Space},
 28 |  *     booktitle = {WADS '99: Proceedings of the 6th International Workshop on
 29 |  *                  Algorithms and Data Structures},
 30 |  *     year = {1999},
 31 |  *     isbn = {3-540-66279-0},
 32 |  *     pages = {37--48},
 33 |  *     publisher = {Springer-Verlag},
 34 |  *     address = {London, UK},
 35 |  * }
 36 |  */
 37 | 
 38 | /*
 39 |  * Example with superblocks, blocks and indices marked.
 40 |  *
 41 |  *  SB=0 : [0]
 42 |  *  SB=1 : [1,2]
 43 |  *  SB=2 : [3,4] [5,6]
 44 |  *  SB=3 : [7,8,9,10] [11,12,13,14]
 45 |  *  SB=4 : [15,16,17,18] [19,20,21,22] [23,24,25,26] [27,28,29,30]
 46 |  */
 47 | 
 48 | #ifndef VECTOR_BRODNIK
 49 | #define VECTOR_BRODNIK
 50 | 
 51 | #include <cstdlib>
 52 | #include <cassert>
 53 | #include <inttypes.h>
 54 | #include <vector>
 55 | 
 56 | template <typename T>
 57 | struct vector_brodnik
 58 | {
 59 | 	// Skip some of the smallest blocks for better performance.
 60 | 	enum {
 61 | 		InitialSuperBlock = 6,
 62 | 		InitialBlocksize  = 8,
 63 | 		InitialSuperBlocksize = 1 << (InitialSuperBlock/2),
 64 | 		SkippedElements       = (1 << InitialSuperBlock) - 1,
 65 | 		SkippedDatablocks     = 14
 66 | 	};
 67 | 	typedef T value_type;
 68 | 	typedef std::vector<T*> index_block_type;
 69 | 	void push_back(const T& t)
 70 | 	{
 71 | 		if (is_full()) { grow(); }
 72 | 		*_insertpos++ = t;
 73 | 		--_left_in_block;
 74 | 	}
 75 | 	bool is_full() const { return _left_in_block == 0; }
 76 | 	size_t size() const
 77 | 	{
 78 | 		// The sum of elements in superblocks 0,...,k-1 is 2^k-1.
 79 | 		return ((1 << (_superblock+1))-1)
 80 | 			- _left_in_block
 81 | 			- (_block_size*_left_in_superblock)
 82 | 			- SkippedElements;
 83 | 	}
 84 | 	void grow()
 85 | 	{
 86 | 		assert(_left_in_block == 0);
 87 | 		if (_left_in_superblock == 0) {
 88 | 			if (_superblock&1) { _superblock_size *= 2; }
 89 | 			else               { _block_size *= 2;      }
 90 | 			++_superblock;
 91 | 			_left_in_superblock = _superblock_size;
 92 | 		}
 93 | 		_insertpos = static_cast<T*>(malloc(_block_size*sizeof(T)));
 94 | 		_index_block.push_back(_insertpos);
 95 | 		_left_in_block = _block_size;
 96 | 		--_left_in_superblock;
 97 | 	}
 98 | 	T operator[](size_t index) const
 99 | 	{
100 | 		// See the paper for details.
101 | 		assert(index < size());
102 | 		const size_t r = index+1+SkippedElements;
103 | 		const unsigned k = 31 - __builtin_clz(r);
104 | 		const unsigned msbit = 1 << (31 - __builtin_clz(r));
105 | 		const size_t b = (r & ~msbit) >> (k-k/2);
106 | 		const size_t e = ~((~size_t(0) >> (k-k/2)) << (k-k/2)) & r;
107 | 		const size_t p = k&1 ? (3*(1<<(k>>1))-2) : ((1<<((k>>1)+1))-2);
108 | 		/*
109 | 		std::cerr<<__func__<<"\n"
110 | 			<<"\tindex="<<index<<"\n"
111 | 			<<"\tr    ="<<r<<"\n"
112 | 			<<"\tk    ="<<k<<"\n"
113 | 			<<"\tmsbit="<<msbit<<"\n"
114 | 			<<"\tb    ="<<b<<"\n"
115 | 			<<"\te    ="<<e<<"\n"
116 | 			<<"\tp    ="<<p<<"\n\n";
117 | 		*/
118 | 		assert((p+b-SkippedDatablocks) < _index_block.size());
119 | 		return _index_block[p+b-SkippedDatablocks][e];
120 | 	}
121 | 	void clear()
122 | 	{
123 | 		for (unsigned i=0; i < _index_block.size(); ++i) {
124 | 			free(_index_block[i]);
125 | 		}
126 | 		_index_block.clear();
127 | 		_insertpos = 0;
128 | 		_left_in_block = 0;
129 | 		_left_in_superblock = InitialSuperBlocksize;
130 | 		_block_size = InitialBlocksize;
131 | 		_superblock_size = InitialSuperBlocksize;
132 | 		_superblock = InitialSuperBlock;
133 | 	}
134 | 	~vector_brodnik()
135 | 	{
136 | 		for (unsigned i=0; i < _index_block.size(); ++i) {
137 | 			free(_index_block[i]);
138 | 		}
139 | 	}
140 | 	vector_brodnik() { clear(); }
141 | 	index_block_type _index_block;
142 | 	T* _insertpos;
143 | 	size_t _left_in_block;
144 | 	size_t _block_size;
145 | 	uint16_t _left_in_superblock;
146 | 	uint16_t _superblock_size;
147 | 	uint8_t _superblock;
148 | };
149 | 
150 | template <typename T, typename OutputIterator>
151 | static inline void
152 | copy(const vector_brodnik<T>& bucket, OutputIterator dst)
153 | {
154 | 	bool superblock_odd=(vector_brodnik<T>::InitialSuperBlock % 2 == 1);
155 | 	size_t superblocksize=vector_brodnik<T>::InitialSuperBlocksize;
156 | 	size_t blocksize=vector_brodnik<T>::InitialBlocksize;
157 | 	for (size_t i=0; i < bucket._index_block.size(); ) {
158 | 		for (size_t j=0; j < superblocksize; ++j) {
159 | 			if (i+j == (bucket._index_block.size()-1)) goto done;
160 | 			std::copy(bucket._index_block[i+j],
161 | 			          bucket._index_block[i+j]+blocksize,
162 | 			          dst);
163 | 			dst += blocksize;
164 | 		}
165 | 		i += superblocksize;
166 | 		superblocksize <<= superblock_odd;
167 | 		blocksize     <<= !superblock_odd;
168 | 		superblock_odd = !superblock_odd;
169 | 	}
170 | done:
171 | 	std::copy(bucket._index_block.back(),
172 | 	          bucket._insertpos,
173 | 	          dst);
174 | }
175 | 
176 | #endif //VECTOR_BRODNIK
177 | 


--------------------------------------------------------------------------------
/src/vector_malloc.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008,2012 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #ifndef VECTOR_MALLOC
 24 | #define VECTOR_MALLOC
 25 | 
 26 | #include <cstdlib>
 27 | #include <cstddef>
 28 | #include <cstring>
 29 | #include <cassert>
 30 | 
 31 | template <typename T, unsigned InitialSize=16>
 32 | class vector_malloc
 33 | {
 34 | public:
 35 | 	vector_malloc() : _data(0), _size(0), _capacity(0) {}
 36 | 	~vector_malloc() { free(_data); }
 37 | 	void push_back(const T& t)
 38 | 	{
 39 | 		if (_size == _capacity) { grow(); }
 40 | 		_data[_size] = t;
 41 | 		++_size;
 42 | 	}
 43 | 	void clear()
 44 | 	{
 45 | 		free(_data);
 46 | 		_data = 0;
 47 | 		_size = 0;
 48 | 		_capacity = 0;
 49 | 	}
 50 | 	T operator[](size_t index) const
 51 | 	{
 52 | 		assert(index < size());
 53 | 		return _data[index];
 54 | 	}
 55 | 	const T* begin() const { return _data; }
 56 | 	const T* end() const  { return _data+_size; }
 57 | 	size_t capacity() const { return _capacity; }
 58 | 	size_t size() const     { return _size;     }
 59 | private:
 60 | 	void grow()
 61 | 	{
 62 | 		if (_capacity == 0) {
 63 | 			_capacity = InitialSize;
 64 | 			_data = static_cast<T*>(malloc(_capacity*sizeof(T)));
 65 | 		} else {
 66 | 			_capacity <<= 1;
 67 | 			T* t = static_cast<T*>(malloc(_capacity*sizeof(T)));
 68 | 			(void) memcpy(t, _data, _size*sizeof(T));
 69 | 			free(_data);
 70 | 			_data = t;
 71 | 		}
 72 | 	}
 73 | 	T* _data;
 74 | 	size_t _size;
 75 | 	size_t _capacity;
 76 | };
 77 | 
 78 | template <typename T, unsigned InitialSize=16>
 79 | class vector_malloc_counter_clear
 80 | {
 81 | public:
 82 | 	vector_malloc_counter_clear() : _data(0), _size(0), _capacity(0) {}
 83 | 	~vector_malloc_counter_clear() { free(_data); }
 84 | 	void push_back(const T& t)
 85 | 	{
 86 | 		if (_size == _capacity) { grow(); }
 87 | 		_data[_size] = t;
 88 | 		++_size;
 89 | 	}
 90 | 	void clear() { _size=0; }
 91 | 	T operator[](size_t index) const
 92 | 	{
 93 | 		assert(index < size());
 94 | 		return _data[index];
 95 | 	}
 96 | 	const T* begin() const  { return _data;       }
 97 | 	const T* end() const    { return _data+_size; }
 98 | 	size_t capacity() const { return _capacity;   }
 99 | 	size_t size() const     { return _size;       }
100 | private:
101 | 	void grow()
102 | 	{
103 | 		if (_capacity == 0) {
104 | 			_capacity = InitialSize;
105 | 			_data = static_cast<T*>(malloc(_capacity*sizeof(T)));
106 | 		} else {
107 | 			_capacity <<= 1;
108 | 			T* t = static_cast<T*>(malloc(_capacity*sizeof(T)));
109 | 			(void) memcpy(t, _data, _size*sizeof(T));
110 | 			free(_data);
111 | 			_data = t;
112 | 		}
113 | 	}
114 | 	T* _data;
115 | 	size_t _size;
116 | 	size_t _capacity;
117 | };
118 | 
119 | #endif //VECTOR_MALLOC
120 | 


--------------------------------------------------------------------------------
/src/vector_realloc.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2008,2012 by Tommi Rantala <tt.rantala@gmail.com>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to
  6 |  * deal in the Software without restriction, including without limitation the
  7 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 |  * sell copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  *
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 |  * IN THE SOFTWARE.
 21 |  */
 22 | 
 23 | #ifndef VECTOR_REALLOC
 24 | #define VECTOR_REALLOC
 25 | 
 26 | #include <cstdlib>
 27 | #include <cstddef>
 28 | #include <cstring>
 29 | #include <cassert>
 30 | 
 31 | template <typename T, unsigned InitialSize=16>
 32 | class vector_realloc
 33 | {
 34 | public:
 35 | 	vector_realloc() : _data(0), _size(0), _capacity(0) {}
 36 | 	~vector_realloc() { free(_data); }
 37 | 	void push_back(const T& t)
 38 | 	{
 39 | 		if (_size == _capacity) { grow(); }
 40 | 		_data[_size] = t;
 41 | 		++_size;
 42 | 	}
 43 | 	void clear()
 44 | 	{
 45 | 		free(_data);
 46 | 		_data = 0;
 47 | 		_size = 0;
 48 | 		_capacity = 0;
 49 | 	}
 50 | 	T operator[](size_t index) const
 51 | 	{
 52 | 		assert(index < size());
 53 | 		return _data[index];
 54 | 	}
 55 | 	const T* begin() const { return _data; }
 56 | 	const T* end() const  { return _data+_size; }
 57 | 	size_t capacity() const { return _capacity; }
 58 | 	size_t size() const     { return _size;     }
 59 | private:
 60 | 	void grow()
 61 | 	{
 62 | 		_capacity <<= 1;
 63 | 		if (_capacity == 0) {
 64 | 			_capacity = InitialSize;
 65 | 		}
 66 | 		_data = static_cast<T*>(realloc(_data, _capacity*sizeof(T)));
 67 | 	}
 68 | 	T* _data;
 69 | 	size_t _size;
 70 | 	size_t _capacity;
 71 | };
 72 | 
 73 | template <typename T, unsigned InitialSize=16>
 74 | class vector_realloc_counter_clear
 75 | {
 76 | public:
 77 | 	vector_realloc_counter_clear() : _data(0), _size(0), _capacity(0) {}
 78 | 	~vector_realloc_counter_clear() { free(_data); }
 79 | 	void push_back(const T& t)
 80 | 	{
 81 | 		if (_size == _capacity) { grow(); }
 82 | 		_data[_size] = t;
 83 | 		++_size;
 84 | 	}
 85 | 	void clear() { _size=0; }
 86 | 	T operator[](size_t index) const
 87 | 	{
 88 | 		assert(index < size());
 89 | 		return _data[index];
 90 | 	}
 91 | 	const T* begin() const  { return _data;       }
 92 | 	const T* end() const    { return _data+_size; }
 93 | 	size_t capacity() const { return _capacity;   }
 94 | 	size_t size() const     { return _size;       }
 95 | private:
 96 | 	void grow()
 97 | 	{
 98 | 		_capacity <<= 1;
 99 | 		if (_capacity == 0) {
100 | 			_capacity = InitialSize;
101 | 		}
102 | 		_data = static_cast<T*>(realloc(_data, _capacity*sizeof(T)));
103 | 	}
104 | 	T* _data;
105 | 	size_t _size;
106 | 	size_t _capacity;
107 | };
108 | 
109 | template <typename T, unsigned InitialSize=16>
110 | class vector_realloc_shrink_clear
111 | {
112 | public:
113 | 	vector_realloc_shrink_clear() : _data(0), _size(0), _capacity(0) {}
114 | 	~vector_realloc_shrink_clear() { free(_data); }
115 | 	void push_back(const T& t)
116 | 	{
117 | 		if (_size == _capacity) { grow(); }
118 | 		_data[_size] = t;
119 | 		++_size;
120 | 	}
121 | 	void clear()
122 | 	{
123 | 		_size = 0;
124 | 		shrink();
125 | 	}
126 | 	T operator[](size_t index) const
127 | 	{
128 | 		assert(index < size());
129 | 		return _data[index];
130 | 	}
131 | 	const T* begin() const  { return _data;       }
132 | 	const T* end() const    { return _data+_size; }
133 | 	size_t capacity() const { return _capacity;   }
134 | 	size_t size() const     { return _size;       }
135 | private:
136 | 	void grow()
137 | 	{
138 | 		_capacity <<= 1;
139 | 		if (_capacity == 0) {
140 | 			_capacity = InitialSize;
141 | 		}
142 | 		_data = static_cast<T*>(realloc(_data, _capacity*sizeof(T)));
143 | 	}
144 | 	void shrink()
145 | 	{
146 | 		if (_capacity > 0x80000) {
147 | 			_capacity = _capacity / 2;
148 | 			_data = static_cast<T*>(
149 | 				realloc(_data, _capacity*sizeof(T)));
150 | 		}
151 | 	}
152 | 	T* _data;
153 | 	size_t _size;
154 | 	size_t _capacity;
155 | };
156 | 
157 | #endif //VECTOR_REALLOC
158 | 


--------------------------------------------------------------------------------