├── .clang-format ├── .github └── workflows │ └── cmake.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── benchmark ├── CMakeLists.txt ├── darts │ └── darts.h ├── main.cc ├── marisa-0.2.5 │ ├── AUTHORS │ ├── Makefile.am │ ├── Makefile.in │ ├── aclocal.m4 │ ├── bindings │ │ ├── Makefile │ │ ├── marisa-swig.cxx │ │ ├── marisa-swig.h │ │ ├── marisa-swig.i │ │ ├── perl │ │ │ ├── Makefile.PL │ │ │ ├── marisa-swig.cxx │ │ │ ├── marisa-swig.h │ │ │ ├── marisa-swig_wrap.cxx │ │ │ ├── marisa.pm │ │ │ └── sample.pl │ │ ├── python │ │ │ ├── marisa-swig.cxx │ │ │ ├── marisa-swig.h │ │ │ ├── marisa-swig_wrap.cxx │ │ │ ├── marisa.py │ │ │ ├── sample.py │ │ │ └── setup.py │ │ └── ruby │ │ │ ├── extconf.rb │ │ │ ├── marisa-swig.cxx │ │ │ ├── marisa-swig.h │ │ │ ├── marisa-swig_wrap.cxx │ │ │ └── sample.rb │ ├── compile │ ├── config.guess │ ├── config.sub │ ├── configure │ ├── configure.ac │ ├── depcomp │ ├── docs │ │ ├── readme.en.html │ │ ├── readme.ja.html │ │ └── style.css │ ├── include │ │ ├── Makefile.am │ │ ├── Makefile.in │ │ ├── marisa.h │ │ └── marisa │ │ │ ├── Makefile.am │ │ │ ├── Makefile.in │ │ │ ├── agent.h │ │ │ ├── base.h │ │ │ ├── exception.h │ │ │ ├── iostream.h │ │ │ ├── key.h │ │ │ ├── keyset.h │ │ │ ├── query.h │ │ │ ├── scoped-array.h │ │ │ ├── scoped-ptr.h │ │ │ ├── stdio.h │ │ │ └── trie.h │ ├── install-sh │ ├── lib │ │ ├── Makefile.am │ │ ├── Makefile.in │ │ └── marisa │ │ │ ├── Makefile.am │ │ │ ├── Makefile.in │ │ │ ├── agent.cc │ │ │ ├── grimoire │ │ │ ├── Makefile.am │ │ │ ├── Makefile.in │ │ │ ├── algorithm.h │ │ │ ├── algorithm │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ └── sort.h │ │ │ ├── intrin.h │ │ │ ├── io.h │ │ │ ├── io │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── mapper.cc │ │ │ │ ├── mapper.h │ │ │ │ ├── reader.cc │ │ │ │ ├── reader.h │ │ │ │ ├── writer.cc │ │ │ │ └── writer.h │ │ │ ├── trie.h │ │ │ ├── trie │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── cache.h │ │ │ │ ├── config.h │ │ │ │ ├── entry.h │ │ │ │ ├── header.h │ │ │ │ ├── history.h │ │ │ │ ├── key.h │ │ │ │ ├── louds-trie.cc │ │ │ │ ├── louds-trie.h │ │ │ │ ├── range.h │ │ │ │ ├── state.h │ │ │ │ ├── tail.cc │ │ │ │ └── tail.h │ │ │ ├── vector.h │ │ │ └── vector │ │ │ │ ├── Makefile.am │ │ │ │ ├── Makefile.in │ │ │ │ ├── bit-vector.cc │ │ │ │ ├── bit-vector.h │ │ │ │ ├── flat-vector.h │ │ │ │ ├── pop-count.h │ │ │ │ ├── rank-index.h │ │ │ │ └── vector.h │ │ │ ├── keyset.cc │ │ │ └── trie.cc │ ├── ltmain.sh │ ├── m4 │ │ ├── libtool.m4 │ │ ├── ltoptions.m4 │ │ ├── ltsugar.m4 │ │ ├── ltversion.m4 │ │ └── lt~obsolete.m4 │ ├── marisa.pc.in │ ├── missing │ ├── test-driver │ ├── tests │ │ ├── Makefile.am │ │ ├── Makefile.in │ │ ├── base-test.cc │ │ ├── io-test.cc │ │ ├── marisa-assert.h │ │ ├── marisa-test.cc │ │ ├── trie-test.cc │ │ └── vector-test.cc │ ├── tools │ │ ├── Makefile.am │ │ ├── Makefile.in │ │ ├── cmdopt.cc │ │ ├── cmdopt.h │ │ ├── marisa-benchmark.cc │ │ ├── marisa-build.cc │ │ ├── marisa-common-prefix-search.cc │ │ ├── marisa-dump.cc │ │ ├── marisa-lookup.cc │ │ ├── marisa-predictive-search.cc │ │ └── marisa-reverse-lookup.cc │ └── vs2008 │ │ ├── base-test │ │ └── base-test.vcproj │ │ ├── io-test │ │ └── io-test.vcproj │ │ ├── libmarisa │ │ └── libmarisa.vcproj │ │ ├── marisa-benchmark │ │ └── marisa-benchmark.vcproj │ │ ├── marisa-build │ │ └── marisa-build.vcproj │ │ ├── marisa-common-prefix-search │ │ └── marisa-common-prefix-search.vcproj │ │ ├── marisa-dump │ │ └── marisa-dump.vcproj │ │ ├── marisa-lookup │ │ └── marisa-lookup.vcproj │ │ ├── marisa-predictive-search │ │ └── marisa-predictive-search.vcproj │ │ ├── marisa-reverse-lookup │ │ └── marisa-reverse-lookup.vcproj │ │ ├── marisa-test │ │ └── marisa-test.vcproj │ │ ├── trie-test │ │ └── trie-test.vcproj │ │ ├── vector-test │ │ └── vector-test.vcproj │ │ ├── vs2008.sln │ │ └── vs2008.suo └── ux-trie │ ├── bitVec.cpp │ ├── bitVec.hpp │ ├── bitVecTest.cpp │ ├── cmdline.h │ ├── rsDic.cpp │ ├── rsDic.hpp │ ├── ux.hpp │ ├── uxMain.cpp │ ├── uxMap.cpp │ ├── uxMap.hpp │ ├── uxMapTest.cpp │ ├── uxTest.cpp │ ├── uxTrie.cpp │ ├── uxTrie.hpp │ ├── uxUtil.cpp │ ├── uxUtil.hpp │ └── wscript ├── cmd ├── CMakeLists.txt ├── flags.h └── main.cc ├── fstlib.h ├── fstlib.pdf └── test ├── CMakeLists.txt ├── test.cc └── words.tsv /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | AllowShortBlocksOnASingleLine: true 3 | AllowShortCaseLabelsOnASingleLine: true 4 | AllowShortIfStatementsOnASingleLine: true 5 | Cpp11BracedListStyle: true 6 | -------------------------------------------------------------------------------- /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- 1 | name: CMake 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 7 | BUILD_TYPE: Release 8 | 9 | jobs: 10 | build: 11 | # The CMake configure and build commands are platform agnostic and should work equally 12 | # well on Windows or Mac. You can convert this to a matrix build if you need 13 | # cross-platform coverage. 14 | # See: https://docs.github.com/en/actions/configuring-and-managing-workflows/configuring-a-workflow#configuring-a-build-matrix 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [ubuntu-latest, macos-latest, windows-latest] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | 23 | - name: Create Build Environment 24 | # Some projects don't allow in-source building, so create a separate build directory 25 | # We'll use this as our working directory for all subsequent commands 26 | run: cmake -E make_directory ${{runner.workspace}}/build 27 | 28 | - name: Configure CMake 29 | # Use a bash shell so we can use the same syntax for environment variable 30 | # access regardless of the host operating system 31 | shell: bash 32 | working-directory: ${{runner.workspace}}/build 33 | # Note the current convention is to use the -S and -B options here to specify source 34 | # and build directories, but this is only available with CMake 3.13 and higher. 35 | # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 36 | run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE 37 | 38 | - name: Build 39 | working-directory: ${{runner.workspace}}/build 40 | shell: bash 41 | # Execute the build. You can specify a specific target with "--target " 42 | run: cmake --build . --config $BUILD_TYPE 43 | 44 | - name: Test 45 | working-directory: ${{runner.workspace}}/build 46 | shell: bash 47 | # Execute tests defined by the CMake configuration. 48 | # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail 49 | run: ctest -C $BUILD_TYPE 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | *.swp 31 | build*/ 32 | Debug/ 33 | Release/ 34 | tags 35 | *.*sdf 36 | *.user 37 | .vs/ 38 | out/ 39 | sample/ 40 | dict/ 41 | .idea/ 42 | cmake-build-*/ 43 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(cpp-fstlib) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_CXX_EXTENSIONS OFF) 6 | 7 | if(MSVC) 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus /utf-8") 9 | else() 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter") 11 | endif() 12 | 13 | enable_testing() 14 | 15 | add_subdirectory(test) 16 | add_subdirectory(cmd) 17 | add_subdirectory(benchmark) 18 | 19 | install(FILES fstlib.h DESTINATION include) 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 yhirose 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /benchmark/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(benchmark) 3 | 4 | if(NOT MSVC) 5 | add_definitions("-Wno-register") 6 | endif() 7 | 8 | add_executable(benchmark 9 | marisa-0.2.5/lib/marisa/agent.cc 10 | marisa-0.2.5/lib/marisa/keyset.cc 11 | marisa-0.2.5/lib/marisa/trie.cc 12 | marisa-0.2.5/lib/marisa/grimoire/io/mapper.cc 13 | marisa-0.2.5/lib/marisa/grimoire/io/reader.cc 14 | marisa-0.2.5/lib/marisa/grimoire/io/writer.cc 15 | marisa-0.2.5/lib/marisa/grimoire/vector/bit-vector.cc 16 | marisa-0.2.5/lib/marisa/grimoire/trie/louds-trie.cc 17 | marisa-0.2.5/lib/marisa/grimoire/trie/tail.cc 18 | ux-trie/bitVec.cpp 19 | ux-trie/rsDic.cpp 20 | ux-trie/uxTrie.cpp 21 | ux-trie/uxUtil.cpp 22 | main.cc) 23 | 24 | target_include_directories( 25 | benchmark PRIVATE 26 | .. 27 | marisa-0.2.5/include 28 | marisa-0.2.5/lib) 29 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/AUTHORS: -------------------------------------------------------------------------------- 1 | Susumu Yata 2 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I m4 2 | 3 | SUBDIRS = include lib tools tests 4 | 5 | pkgconfigdir = ${libdir}/pkgconfig 6 | pkgconfig_DATA = marisa.pc 7 | 8 | EXTRA_DIST = \ 9 | bindings/Makefile \ 10 | bindings/perl/Makefile.PL \ 11 | bindings/perl/marisa.pm \ 12 | bindings/python/setup.py \ 13 | bindings/python/marisa.py \ 14 | bindings/ruby/extconf.rb \ 15 | bindings/*.cxx \ 16 | bindings/*.h \ 17 | bindings/*.i \ 18 | bindings/*/*.cxx \ 19 | bindings/*/*.h \ 20 | bindings/*/sample.* \ 21 | docs/*.html \ 22 | docs/*.css \ 23 | vs2008/vs2008.* \ 24 | vs2008/*/* 25 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/Makefile: -------------------------------------------------------------------------------- 1 | ALL: swig-perl swig-python swig-ruby 2 | 3 | swig-perl: 4 | swig -Wall -c++ -perl -outdir perl marisa-swig.i 5 | mv marisa-swig_wrap.cxx perl 6 | cp marisa-swig.cxx marisa-swig.h perl 7 | 8 | swig-python: 9 | swig -Wall -c++ -python -outdir python marisa-swig.i 10 | mv marisa-swig_wrap.cxx python 11 | cp marisa-swig.cxx marisa-swig.h python 12 | 13 | swig-ruby: 14 | swig -Wall -c++ -ruby -outdir ruby marisa-swig.i 15 | mv marisa-swig_wrap.cxx ruby 16 | cp marisa-swig.cxx marisa-swig.h ruby 17 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/marisa-swig.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SWIG_H_ 2 | #define MARISA_SWIG_H_ 3 | 4 | #include 5 | 6 | namespace marisa_swig { 7 | 8 | #define MARISA_SWIG_ENUM_COPY(name) name = MARISA_ ## name 9 | 10 | enum ErrorCode { 11 | MARISA_SWIG_ENUM_COPY(OK), 12 | MARISA_SWIG_ENUM_COPY(STATE_ERROR), 13 | MARISA_SWIG_ENUM_COPY(NULL_ERROR), 14 | MARISA_SWIG_ENUM_COPY(BOUND_ERROR), 15 | MARISA_SWIG_ENUM_COPY(RANGE_ERROR), 16 | MARISA_SWIG_ENUM_COPY(CODE_ERROR), 17 | MARISA_SWIG_ENUM_COPY(RESET_ERROR), 18 | MARISA_SWIG_ENUM_COPY(SIZE_ERROR), 19 | MARISA_SWIG_ENUM_COPY(MEMORY_ERROR), 20 | MARISA_SWIG_ENUM_COPY(IO_ERROR), 21 | MARISA_SWIG_ENUM_COPY(FORMAT_ERROR) 22 | }; 23 | 24 | enum NumTries { 25 | MARISA_SWIG_ENUM_COPY(MIN_NUM_TRIES), 26 | MARISA_SWIG_ENUM_COPY(MAX_NUM_TRIES), 27 | MARISA_SWIG_ENUM_COPY(DEFAULT_NUM_TRIES) 28 | }; 29 | 30 | enum CacheLevel { 31 | MARISA_SWIG_ENUM_COPY(HUGE_CACHE), 32 | MARISA_SWIG_ENUM_COPY(LARGE_CACHE), 33 | MARISA_SWIG_ENUM_COPY(NORMAL_CACHE), 34 | MARISA_SWIG_ENUM_COPY(SMALL_CACHE), 35 | MARISA_SWIG_ENUM_COPY(TINY_CACHE), 36 | MARISA_SWIG_ENUM_COPY(DEFAULT_CACHE) 37 | }; 38 | 39 | enum TailMode { 40 | MARISA_SWIG_ENUM_COPY(TEXT_TAIL), 41 | MARISA_SWIG_ENUM_COPY(BINARY_TAIL), 42 | MARISA_SWIG_ENUM_COPY(DEFAULT_TAIL) 43 | }; 44 | 45 | enum NodeOrder { 46 | MARISA_SWIG_ENUM_COPY(LABEL_ORDER), 47 | MARISA_SWIG_ENUM_COPY(WEIGHT_ORDER), 48 | MARISA_SWIG_ENUM_COPY(DEFAULT_ORDER) 49 | }; 50 | 51 | #undef MARISA_SWIG_ENUM_COPY 52 | 53 | class Key { 54 | public: 55 | void str(const char **ptr_out, std::size_t *length_out) const; 56 | std::size_t id() const; 57 | float weight() const; 58 | 59 | private: 60 | const marisa::Key key_; 61 | 62 | Key(); 63 | Key(const Key &key); 64 | Key &operator=(const Key &); 65 | }; 66 | 67 | class Query { 68 | public: 69 | void str(const char **ptr_out, std::size_t *length_out) const; 70 | std::size_t id() const; 71 | 72 | private: 73 | const marisa::Query query_; 74 | 75 | Query(); 76 | Query(const Query &query); 77 | Query &operator=(const Query &); 78 | }; 79 | 80 | class Keyset { 81 | friend class Trie; 82 | 83 | public: 84 | Keyset(); 85 | ~Keyset(); 86 | 87 | void push_back(const marisa::Key &key); 88 | void push_back(const char *ptr, std::size_t length, float weight = 1.0); 89 | 90 | const Key &key(std::size_t i) const; 91 | 92 | void key_str(std::size_t i, 93 | const char **ptr_out, std::size_t *length_out) const; 94 | std::size_t key_id(std::size_t i) const; 95 | 96 | std::size_t num_keys() const; 97 | 98 | bool empty() const; 99 | std::size_t size() const; 100 | std::size_t total_length() const; 101 | 102 | void reset(); 103 | void clear(); 104 | 105 | private: 106 | marisa::Keyset *keyset_; 107 | 108 | Keyset(const Keyset &); 109 | Keyset &operator=(const Keyset &); 110 | }; 111 | 112 | class Agent { 113 | friend class Trie; 114 | 115 | public: 116 | Agent(); 117 | ~Agent(); 118 | 119 | void set_query(const char *ptr, std::size_t length); 120 | void set_query(std::size_t id); 121 | 122 | const Key &key() const; 123 | const Query &query() const; 124 | 125 | void key_str(const char **ptr_out, std::size_t *length_out) const; 126 | std::size_t key_id() const; 127 | 128 | void query_str(const char **ptr_out, std::size_t *length_out) const; 129 | std::size_t query_id() const; 130 | 131 | private: 132 | marisa::Agent *agent_; 133 | char *buf_; 134 | std::size_t buf_size_; 135 | 136 | Agent(const Agent &); 137 | Agent &operator=(const Agent &); 138 | }; 139 | 140 | class Trie { 141 | public: 142 | Trie(); 143 | ~Trie(); 144 | 145 | void build(Keyset &keyset, int config_flags = 0); 146 | 147 | void mmap(const char *filename); 148 | void load(const char *filename); 149 | void save(const char *filename) const; 150 | 151 | bool lookup(Agent &agent) const; 152 | void reverse_lookup(Agent &agent) const; 153 | bool common_prefix_search(Agent &agent) const; 154 | bool predictive_search(Agent &agent) const; 155 | 156 | std::size_t lookup(const char *ptr, std::size_t length) const; 157 | void reverse_lookup(std::size_t id, 158 | const char **ptr_out_to_be_deleted, std::size_t *length_out) const; 159 | 160 | std::size_t num_tries() const; 161 | std::size_t num_keys() const; 162 | std::size_t num_nodes() const; 163 | 164 | TailMode tail_mode() const; 165 | NodeOrder node_order() const; 166 | 167 | bool empty() const; 168 | std::size_t size() const; 169 | std::size_t total_size() const; 170 | std::size_t io_size() const; 171 | 172 | void clear(); 173 | 174 | private: 175 | marisa::Trie *trie_; 176 | 177 | Trie(const Trie &); 178 | Trie &operator=(const Trie &); 179 | }; 180 | 181 | } // namespace marisa_swig 182 | 183 | #endif // MARISA_SWIG_H_ 184 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/marisa-swig.i: -------------------------------------------------------------------------------- 1 | %module marisa 2 | 3 | %include "cstring.i" 4 | %include "exception.i" 5 | 6 | %{ 7 | #include "marisa-swig.h" 8 | %} 9 | 10 | %apply (char *STRING, int LENGTH) { (const char *ptr, std::size_t length) }; 11 | 12 | %cstring_output_allocate_size(const char **ptr_out, std::size_t *length_out, ); 13 | %cstring_output_allocate_size(const char **ptr_out_to_be_deleted, 14 | std::size_t *length_out, delete [] (*$1)); 15 | 16 | %exception { 17 | try { 18 | $action 19 | } catch (const marisa::Exception &ex) { 20 | SWIG_exception(SWIG_RuntimeError, ex.what()); 21 | } catch (...) { 22 | SWIG_exception(SWIG_UnknownError,"Unknown exception"); 23 | } 24 | } 25 | 26 | %include "marisa-swig.h" 27 | 28 | %constant size_t INVALID_KEY_ID = MARISA_INVALID_KEY_ID; 29 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/perl/Makefile.PL: -------------------------------------------------------------------------------- 1 | use ExtUtils::MakeMaker; 2 | 3 | WriteMakefile( 4 | 'NAME' => 'marisa', 5 | 'LIBS' => ['-lmarisa'], 6 | 'OBJECT' => 'marisa-swig.o marisa-swig_wrap.o' 7 | ); 8 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/perl/marisa-swig.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SWIG_H_ 2 | #define MARISA_SWIG_H_ 3 | 4 | #include 5 | 6 | namespace marisa_swig { 7 | 8 | #define MARISA_SWIG_ENUM_COPY(name) name = MARISA_ ## name 9 | 10 | enum ErrorCode { 11 | MARISA_SWIG_ENUM_COPY(OK), 12 | MARISA_SWIG_ENUM_COPY(STATE_ERROR), 13 | MARISA_SWIG_ENUM_COPY(NULL_ERROR), 14 | MARISA_SWIG_ENUM_COPY(BOUND_ERROR), 15 | MARISA_SWIG_ENUM_COPY(RANGE_ERROR), 16 | MARISA_SWIG_ENUM_COPY(CODE_ERROR), 17 | MARISA_SWIG_ENUM_COPY(RESET_ERROR), 18 | MARISA_SWIG_ENUM_COPY(SIZE_ERROR), 19 | MARISA_SWIG_ENUM_COPY(MEMORY_ERROR), 20 | MARISA_SWIG_ENUM_COPY(IO_ERROR), 21 | MARISA_SWIG_ENUM_COPY(FORMAT_ERROR) 22 | }; 23 | 24 | enum NumTries { 25 | MARISA_SWIG_ENUM_COPY(MIN_NUM_TRIES), 26 | MARISA_SWIG_ENUM_COPY(MAX_NUM_TRIES), 27 | MARISA_SWIG_ENUM_COPY(DEFAULT_NUM_TRIES) 28 | }; 29 | 30 | enum CacheLevel { 31 | MARISA_SWIG_ENUM_COPY(HUGE_CACHE), 32 | MARISA_SWIG_ENUM_COPY(LARGE_CACHE), 33 | MARISA_SWIG_ENUM_COPY(NORMAL_CACHE), 34 | MARISA_SWIG_ENUM_COPY(SMALL_CACHE), 35 | MARISA_SWIG_ENUM_COPY(TINY_CACHE), 36 | MARISA_SWIG_ENUM_COPY(DEFAULT_CACHE) 37 | }; 38 | 39 | enum TailMode { 40 | MARISA_SWIG_ENUM_COPY(TEXT_TAIL), 41 | MARISA_SWIG_ENUM_COPY(BINARY_TAIL), 42 | MARISA_SWIG_ENUM_COPY(DEFAULT_TAIL) 43 | }; 44 | 45 | enum NodeOrder { 46 | MARISA_SWIG_ENUM_COPY(LABEL_ORDER), 47 | MARISA_SWIG_ENUM_COPY(WEIGHT_ORDER), 48 | MARISA_SWIG_ENUM_COPY(DEFAULT_ORDER) 49 | }; 50 | 51 | #undef MARISA_SWIG_ENUM_COPY 52 | 53 | class Key { 54 | public: 55 | void str(const char **ptr_out, std::size_t *length_out) const; 56 | std::size_t id() const; 57 | float weight() const; 58 | 59 | private: 60 | const marisa::Key key_; 61 | 62 | Key(); 63 | Key(const Key &key); 64 | Key &operator=(const Key &); 65 | }; 66 | 67 | class Query { 68 | public: 69 | void str(const char **ptr_out, std::size_t *length_out) const; 70 | std::size_t id() const; 71 | 72 | private: 73 | const marisa::Query query_; 74 | 75 | Query(); 76 | Query(const Query &query); 77 | Query &operator=(const Query &); 78 | }; 79 | 80 | class Keyset { 81 | friend class Trie; 82 | 83 | public: 84 | Keyset(); 85 | ~Keyset(); 86 | 87 | void push_back(const marisa::Key &key); 88 | void push_back(const char *ptr, std::size_t length, float weight = 1.0); 89 | 90 | const Key &key(std::size_t i) const; 91 | 92 | void key_str(std::size_t i, 93 | const char **ptr_out, std::size_t *length_out) const; 94 | std::size_t key_id(std::size_t i) const; 95 | 96 | std::size_t num_keys() const; 97 | 98 | bool empty() const; 99 | std::size_t size() const; 100 | std::size_t total_length() const; 101 | 102 | void reset(); 103 | void clear(); 104 | 105 | private: 106 | marisa::Keyset *keyset_; 107 | 108 | Keyset(const Keyset &); 109 | Keyset &operator=(const Keyset &); 110 | }; 111 | 112 | class Agent { 113 | friend class Trie; 114 | 115 | public: 116 | Agent(); 117 | ~Agent(); 118 | 119 | void set_query(const char *ptr, std::size_t length); 120 | void set_query(std::size_t id); 121 | 122 | const Key &key() const; 123 | const Query &query() const; 124 | 125 | void key_str(const char **ptr_out, std::size_t *length_out) const; 126 | std::size_t key_id() const; 127 | 128 | void query_str(const char **ptr_out, std::size_t *length_out) const; 129 | std::size_t query_id() const; 130 | 131 | private: 132 | marisa::Agent *agent_; 133 | char *buf_; 134 | std::size_t buf_size_; 135 | 136 | Agent(const Agent &); 137 | Agent &operator=(const Agent &); 138 | }; 139 | 140 | class Trie { 141 | public: 142 | Trie(); 143 | ~Trie(); 144 | 145 | void build(Keyset &keyset, int config_flags = 0); 146 | 147 | void mmap(const char *filename); 148 | void load(const char *filename); 149 | void save(const char *filename) const; 150 | 151 | bool lookup(Agent &agent) const; 152 | void reverse_lookup(Agent &agent) const; 153 | bool common_prefix_search(Agent &agent) const; 154 | bool predictive_search(Agent &agent) const; 155 | 156 | std::size_t lookup(const char *ptr, std::size_t length) const; 157 | void reverse_lookup(std::size_t id, 158 | const char **ptr_out_to_be_deleted, std::size_t *length_out) const; 159 | 160 | std::size_t num_tries() const; 161 | std::size_t num_keys() const; 162 | std::size_t num_nodes() const; 163 | 164 | TailMode tail_mode() const; 165 | NodeOrder node_order() const; 166 | 167 | bool empty() const; 168 | std::size_t size() const; 169 | std::size_t total_size() const; 170 | std::size_t io_size() const; 171 | 172 | void clear(); 173 | 174 | private: 175 | marisa::Trie *trie_; 176 | 177 | Trie(const Trie &); 178 | Trie &operator=(const Trie &); 179 | }; 180 | 181 | } // namespace marisa_swig 182 | 183 | #endif // MARISA_SWIG_H_ 184 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/perl/sample.pl: -------------------------------------------------------------------------------- 1 | use marisa; 2 | 3 | $keyset = new marisa::Keyset; 4 | $keyset->push_back("cake"); 5 | $keyset->push_back("cookie"); 6 | $keyset->push_back("ice"); 7 | $keyset->push_back("ice-cream"); 8 | 9 | $trie = new marisa::Trie; 10 | $trie->build($keyset); 11 | print("no. keys: ", $trie->num_keys(), "\n"); 12 | print("no. tries: ", $trie->num_tries(), "\n"); 13 | print("no. nodes: ", $trie->num_nodes(), "\n"); 14 | print("size: ", $trie->io_size(), "\n"); 15 | 16 | $agent = new marisa::Agent; 17 | 18 | $agent->set_query("cake"); 19 | $trie->lookup($agent); 20 | print($agent->query_str(), ": ", $agent->key_id(), "\n"); 21 | 22 | $agent->set_query("cookie"); 23 | $trie->lookup($agent); 24 | print($agent->query_str(), ": ", $agent->key_id(), "\n"); 25 | 26 | $agent->set_query("cockoo"); 27 | if ($trie->lookup(agent)) { 28 | print($agent->query_str(), ": not found\n"); 29 | } 30 | 31 | print("ice: ", $trie->lookup("ice"), "\n"); 32 | print("ice-cream: ", $trie->lookup("ice-cream"), "\n"); 33 | if ($trie->lookup("ice-age") == $marisa::INVALID_KEY_ID) { 34 | print("ice-age: not found\n"); 35 | } 36 | 37 | $trie->save("sample.dic"); 38 | $trie->load("sample.dic"); 39 | 40 | $agent->set_query(0); 41 | $trie->reverse_lookup($agent); 42 | print($agent->query_id(), ": ", $agent->key_str(), "\n"); 43 | $agent->set_query(1); 44 | $trie->reverse_lookup($agent); 45 | print($agent->query_id(), ": ", $agent->key_str(), "\n"); 46 | 47 | print("2: ", $trie->reverse_lookup(2), "\n"); 48 | print("3: ", $trie->reverse_lookup(3), "\n"); 49 | 50 | $trie->mmap("sample.dic"); 51 | 52 | $agent->set_query("ice-cream soda"); 53 | while ($trie->common_prefix_search($agent)) { 54 | print($agent->query_str(), ": ", $agent->key_str(), " (", 55 | $agent->key_id(), ")\n"); 56 | } 57 | 58 | $agent->set_query("ic"); 59 | while ($trie->predictive_search($agent)) { 60 | print($agent->query_str(), ": ", $agent->key_str(), " (", 61 | $agent->key_id(), ")\n"); 62 | } 63 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/python/marisa-swig.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SWIG_H_ 2 | #define MARISA_SWIG_H_ 3 | 4 | #include 5 | 6 | namespace marisa_swig { 7 | 8 | #define MARISA_SWIG_ENUM_COPY(name) name = MARISA_ ## name 9 | 10 | enum ErrorCode { 11 | MARISA_SWIG_ENUM_COPY(OK), 12 | MARISA_SWIG_ENUM_COPY(STATE_ERROR), 13 | MARISA_SWIG_ENUM_COPY(NULL_ERROR), 14 | MARISA_SWIG_ENUM_COPY(BOUND_ERROR), 15 | MARISA_SWIG_ENUM_COPY(RANGE_ERROR), 16 | MARISA_SWIG_ENUM_COPY(CODE_ERROR), 17 | MARISA_SWIG_ENUM_COPY(RESET_ERROR), 18 | MARISA_SWIG_ENUM_COPY(SIZE_ERROR), 19 | MARISA_SWIG_ENUM_COPY(MEMORY_ERROR), 20 | MARISA_SWIG_ENUM_COPY(IO_ERROR), 21 | MARISA_SWIG_ENUM_COPY(FORMAT_ERROR) 22 | }; 23 | 24 | enum NumTries { 25 | MARISA_SWIG_ENUM_COPY(MIN_NUM_TRIES), 26 | MARISA_SWIG_ENUM_COPY(MAX_NUM_TRIES), 27 | MARISA_SWIG_ENUM_COPY(DEFAULT_NUM_TRIES) 28 | }; 29 | 30 | enum CacheLevel { 31 | MARISA_SWIG_ENUM_COPY(HUGE_CACHE), 32 | MARISA_SWIG_ENUM_COPY(LARGE_CACHE), 33 | MARISA_SWIG_ENUM_COPY(NORMAL_CACHE), 34 | MARISA_SWIG_ENUM_COPY(SMALL_CACHE), 35 | MARISA_SWIG_ENUM_COPY(TINY_CACHE), 36 | MARISA_SWIG_ENUM_COPY(DEFAULT_CACHE) 37 | }; 38 | 39 | enum TailMode { 40 | MARISA_SWIG_ENUM_COPY(TEXT_TAIL), 41 | MARISA_SWIG_ENUM_COPY(BINARY_TAIL), 42 | MARISA_SWIG_ENUM_COPY(DEFAULT_TAIL) 43 | }; 44 | 45 | enum NodeOrder { 46 | MARISA_SWIG_ENUM_COPY(LABEL_ORDER), 47 | MARISA_SWIG_ENUM_COPY(WEIGHT_ORDER), 48 | MARISA_SWIG_ENUM_COPY(DEFAULT_ORDER) 49 | }; 50 | 51 | #undef MARISA_SWIG_ENUM_COPY 52 | 53 | class Key { 54 | public: 55 | void str(const char **ptr_out, std::size_t *length_out) const; 56 | std::size_t id() const; 57 | float weight() const; 58 | 59 | private: 60 | const marisa::Key key_; 61 | 62 | Key(); 63 | Key(const Key &key); 64 | Key &operator=(const Key &); 65 | }; 66 | 67 | class Query { 68 | public: 69 | void str(const char **ptr_out, std::size_t *length_out) const; 70 | std::size_t id() const; 71 | 72 | private: 73 | const marisa::Query query_; 74 | 75 | Query(); 76 | Query(const Query &query); 77 | Query &operator=(const Query &); 78 | }; 79 | 80 | class Keyset { 81 | friend class Trie; 82 | 83 | public: 84 | Keyset(); 85 | ~Keyset(); 86 | 87 | void push_back(const marisa::Key &key); 88 | void push_back(const char *ptr, std::size_t length, float weight = 1.0); 89 | 90 | const Key &key(std::size_t i) const; 91 | 92 | void key_str(std::size_t i, 93 | const char **ptr_out, std::size_t *length_out) const; 94 | std::size_t key_id(std::size_t i) const; 95 | 96 | std::size_t num_keys() const; 97 | 98 | bool empty() const; 99 | std::size_t size() const; 100 | std::size_t total_length() const; 101 | 102 | void reset(); 103 | void clear(); 104 | 105 | private: 106 | marisa::Keyset *keyset_; 107 | 108 | Keyset(const Keyset &); 109 | Keyset &operator=(const Keyset &); 110 | }; 111 | 112 | class Agent { 113 | friend class Trie; 114 | 115 | public: 116 | Agent(); 117 | ~Agent(); 118 | 119 | void set_query(const char *ptr, std::size_t length); 120 | void set_query(std::size_t id); 121 | 122 | const Key &key() const; 123 | const Query &query() const; 124 | 125 | void key_str(const char **ptr_out, std::size_t *length_out) const; 126 | std::size_t key_id() const; 127 | 128 | void query_str(const char **ptr_out, std::size_t *length_out) const; 129 | std::size_t query_id() const; 130 | 131 | private: 132 | marisa::Agent *agent_; 133 | char *buf_; 134 | std::size_t buf_size_; 135 | 136 | Agent(const Agent &); 137 | Agent &operator=(const Agent &); 138 | }; 139 | 140 | class Trie { 141 | public: 142 | Trie(); 143 | ~Trie(); 144 | 145 | void build(Keyset &keyset, int config_flags = 0); 146 | 147 | void mmap(const char *filename); 148 | void load(const char *filename); 149 | void save(const char *filename) const; 150 | 151 | bool lookup(Agent &agent) const; 152 | void reverse_lookup(Agent &agent) const; 153 | bool common_prefix_search(Agent &agent) const; 154 | bool predictive_search(Agent &agent) const; 155 | 156 | std::size_t lookup(const char *ptr, std::size_t length) const; 157 | void reverse_lookup(std::size_t id, 158 | const char **ptr_out_to_be_deleted, std::size_t *length_out) const; 159 | 160 | std::size_t num_tries() const; 161 | std::size_t num_keys() const; 162 | std::size_t num_nodes() const; 163 | 164 | TailMode tail_mode() const; 165 | NodeOrder node_order() const; 166 | 167 | bool empty() const; 168 | std::size_t size() const; 169 | std::size_t total_size() const; 170 | std::size_t io_size() const; 171 | 172 | void clear(); 173 | 174 | private: 175 | marisa::Trie *trie_; 176 | 177 | Trie(const Trie &); 178 | Trie &operator=(const Trie &); 179 | }; 180 | 181 | } // namespace marisa_swig 182 | 183 | #endif // MARISA_SWIG_H_ 184 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/python/sample.py: -------------------------------------------------------------------------------- 1 | import marisa 2 | 3 | keyset = marisa.Keyset() 4 | keyset.push_back("cake") 5 | keyset.push_back("cookie") 6 | keyset.push_back("ice") 7 | keyset.push_back("ice-cream") 8 | 9 | trie = marisa.Trie() 10 | trie.build(keyset) 11 | print("no. keys: %d" % trie.num_keys()) 12 | print("no. tries: %d" % trie.num_tries()) 13 | print("no. nodes: %d" % trie.num_nodes()) 14 | print("size: %d" % trie.io_size()) 15 | 16 | agent = marisa.Agent() 17 | 18 | agent.set_query("cake") 19 | trie.lookup(agent) 20 | print("%s: %d" % (agent.query_str(), agent.key_id())) 21 | 22 | agent.set_query("cookie") 23 | trie.lookup(agent) 24 | print("%s: %d" % (agent.query_str(), agent.key_id())) 25 | 26 | agent.set_query("cockoo") 27 | if not trie.lookup(agent): 28 | print("%s: not found" % agent.query_str()) 29 | 30 | print("ice: %d" % trie.lookup("ice")) 31 | print("ice-cream: %d" % trie.lookup("ice-cream")) 32 | if trie.lookup("ice-age") == marisa.INVALID_KEY_ID: 33 | print("ice-age: not found") 34 | 35 | trie.save("sample.dic") 36 | trie.load("sample.dic") 37 | 38 | agent.set_query(0) 39 | trie.reverse_lookup(agent) 40 | print("%d: %s" % (agent.query_id(), agent.key_str())) 41 | 42 | agent.set_query(1) 43 | trie.reverse_lookup(agent) 44 | print("%d: %s" % (agent.query_id(), agent.key_str())) 45 | 46 | print("2: %s" % trie.reverse_lookup(2)) 47 | print("3: %s" % trie.reverse_lookup(3)) 48 | 49 | trie.mmap("sample.dic") 50 | 51 | agent.set_query("ice-cream soda") 52 | while trie.common_prefix_search(agent): 53 | print("%s: %s (%d)" % (agent.query_str(), agent.key_str(), agent.key_id())) 54 | 55 | agent.set_query("ic") 56 | while trie.predictive_search(agent): 57 | print("%s: %s (%d)" % (agent.query_str(), agent.key_str(), agent.key_id())) 58 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/python/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | 3 | marisa_module = Extension("_marisa", 4 | sources=["marisa-swig_wrap.cxx", "marisa-swig.cxx"], 5 | libraries=["marisa"]) 6 | 7 | setup(name = "marisa", 8 | ext_modules = [marisa_module], 9 | py_modules = ["marisa"]) 10 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/ruby/extconf.rb: -------------------------------------------------------------------------------- 1 | require "mkmf" 2 | 3 | have_library("marisa") 4 | 5 | create_makefile("marisa") 6 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/ruby/marisa-swig.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SWIG_H_ 2 | #define MARISA_SWIG_H_ 3 | 4 | #include 5 | 6 | namespace marisa_swig { 7 | 8 | #define MARISA_SWIG_ENUM_COPY(name) name = MARISA_ ## name 9 | 10 | enum ErrorCode { 11 | MARISA_SWIG_ENUM_COPY(OK), 12 | MARISA_SWIG_ENUM_COPY(STATE_ERROR), 13 | MARISA_SWIG_ENUM_COPY(NULL_ERROR), 14 | MARISA_SWIG_ENUM_COPY(BOUND_ERROR), 15 | MARISA_SWIG_ENUM_COPY(RANGE_ERROR), 16 | MARISA_SWIG_ENUM_COPY(CODE_ERROR), 17 | MARISA_SWIG_ENUM_COPY(RESET_ERROR), 18 | MARISA_SWIG_ENUM_COPY(SIZE_ERROR), 19 | MARISA_SWIG_ENUM_COPY(MEMORY_ERROR), 20 | MARISA_SWIG_ENUM_COPY(IO_ERROR), 21 | MARISA_SWIG_ENUM_COPY(FORMAT_ERROR) 22 | }; 23 | 24 | enum NumTries { 25 | MARISA_SWIG_ENUM_COPY(MIN_NUM_TRIES), 26 | MARISA_SWIG_ENUM_COPY(MAX_NUM_TRIES), 27 | MARISA_SWIG_ENUM_COPY(DEFAULT_NUM_TRIES) 28 | }; 29 | 30 | enum CacheLevel { 31 | MARISA_SWIG_ENUM_COPY(HUGE_CACHE), 32 | MARISA_SWIG_ENUM_COPY(LARGE_CACHE), 33 | MARISA_SWIG_ENUM_COPY(NORMAL_CACHE), 34 | MARISA_SWIG_ENUM_COPY(SMALL_CACHE), 35 | MARISA_SWIG_ENUM_COPY(TINY_CACHE), 36 | MARISA_SWIG_ENUM_COPY(DEFAULT_CACHE) 37 | }; 38 | 39 | enum TailMode { 40 | MARISA_SWIG_ENUM_COPY(TEXT_TAIL), 41 | MARISA_SWIG_ENUM_COPY(BINARY_TAIL), 42 | MARISA_SWIG_ENUM_COPY(DEFAULT_TAIL) 43 | }; 44 | 45 | enum NodeOrder { 46 | MARISA_SWIG_ENUM_COPY(LABEL_ORDER), 47 | MARISA_SWIG_ENUM_COPY(WEIGHT_ORDER), 48 | MARISA_SWIG_ENUM_COPY(DEFAULT_ORDER) 49 | }; 50 | 51 | #undef MARISA_SWIG_ENUM_COPY 52 | 53 | class Key { 54 | public: 55 | void str(const char **ptr_out, std::size_t *length_out) const; 56 | std::size_t id() const; 57 | float weight() const; 58 | 59 | private: 60 | const marisa::Key key_; 61 | 62 | Key(); 63 | Key(const Key &key); 64 | Key &operator=(const Key &); 65 | }; 66 | 67 | class Query { 68 | public: 69 | void str(const char **ptr_out, std::size_t *length_out) const; 70 | std::size_t id() const; 71 | 72 | private: 73 | const marisa::Query query_; 74 | 75 | Query(); 76 | Query(const Query &query); 77 | Query &operator=(const Query &); 78 | }; 79 | 80 | class Keyset { 81 | friend class Trie; 82 | 83 | public: 84 | Keyset(); 85 | ~Keyset(); 86 | 87 | void push_back(const marisa::Key &key); 88 | void push_back(const char *ptr, std::size_t length, float weight = 1.0); 89 | 90 | const Key &key(std::size_t i) const; 91 | 92 | void key_str(std::size_t i, 93 | const char **ptr_out, std::size_t *length_out) const; 94 | std::size_t key_id(std::size_t i) const; 95 | 96 | std::size_t num_keys() const; 97 | 98 | bool empty() const; 99 | std::size_t size() const; 100 | std::size_t total_length() const; 101 | 102 | void reset(); 103 | void clear(); 104 | 105 | private: 106 | marisa::Keyset *keyset_; 107 | 108 | Keyset(const Keyset &); 109 | Keyset &operator=(const Keyset &); 110 | }; 111 | 112 | class Agent { 113 | friend class Trie; 114 | 115 | public: 116 | Agent(); 117 | ~Agent(); 118 | 119 | void set_query(const char *ptr, std::size_t length); 120 | void set_query(std::size_t id); 121 | 122 | const Key &key() const; 123 | const Query &query() const; 124 | 125 | void key_str(const char **ptr_out, std::size_t *length_out) const; 126 | std::size_t key_id() const; 127 | 128 | void query_str(const char **ptr_out, std::size_t *length_out) const; 129 | std::size_t query_id() const; 130 | 131 | private: 132 | marisa::Agent *agent_; 133 | char *buf_; 134 | std::size_t buf_size_; 135 | 136 | Agent(const Agent &); 137 | Agent &operator=(const Agent &); 138 | }; 139 | 140 | class Trie { 141 | public: 142 | Trie(); 143 | ~Trie(); 144 | 145 | void build(Keyset &keyset, int config_flags = 0); 146 | 147 | void mmap(const char *filename); 148 | void load(const char *filename); 149 | void save(const char *filename) const; 150 | 151 | bool lookup(Agent &agent) const; 152 | void reverse_lookup(Agent &agent) const; 153 | bool common_prefix_search(Agent &agent) const; 154 | bool predictive_search(Agent &agent) const; 155 | 156 | std::size_t lookup(const char *ptr, std::size_t length) const; 157 | void reverse_lookup(std::size_t id, 158 | const char **ptr_out_to_be_deleted, std::size_t *length_out) const; 159 | 160 | std::size_t num_tries() const; 161 | std::size_t num_keys() const; 162 | std::size_t num_nodes() const; 163 | 164 | TailMode tail_mode() const; 165 | NodeOrder node_order() const; 166 | 167 | bool empty() const; 168 | std::size_t size() const; 169 | std::size_t total_size() const; 170 | std::size_t io_size() const; 171 | 172 | void clear(); 173 | 174 | private: 175 | marisa::Trie *trie_; 176 | 177 | Trie(const Trie &); 178 | Trie &operator=(const Trie &); 179 | }; 180 | 181 | } // namespace marisa_swig 182 | 183 | #endif // MARISA_SWIG_H_ 184 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/bindings/ruby/sample.rb: -------------------------------------------------------------------------------- 1 | require "marisa" 2 | 3 | keyset = Marisa::Keyset.new 4 | keyset.push_back("cake") 5 | keyset.push_back("cookie") 6 | keyset.push_back("ice") 7 | keyset.push_back("ice-cream") 8 | 9 | trie = Marisa::Trie.new 10 | trie.build(keyset) 11 | print("no. keys: ", trie.num_keys(), "\n") 12 | print("no. tries: ", trie.num_tries(), "\n") 13 | print("no. nodes: ", trie.num_nodes(), "\n") 14 | print("size: ", trie.io_size(), "\n") 15 | 16 | agent = Marisa::Agent.new 17 | 18 | agent.set_query("cake") 19 | trie.lookup(agent) 20 | print(agent.query_str(), ": ", agent.key_id(), "\n") 21 | 22 | agent.set_query("cookie") 23 | trie.lookup(agent) 24 | print(agent.query_str(), ": ", agent.key_id(), "\n") 25 | 26 | agent.set_query("cockoo") 27 | if not trie.lookup(agent) 28 | print(agent.query_str(), ": not found\n") 29 | end 30 | 31 | print("ice: ", trie.lookup("ice"), "\n") 32 | print("ice-cream: ", trie.lookup("ice-cream"), "\n") 33 | if trie.lookup("ice-age") == Marisa::INVALID_KEY_ID 34 | print("ice-age: not found\n") 35 | end 36 | 37 | trie.save("sample.dic") 38 | trie.load("sample.dic") 39 | 40 | agent.set_query(0) 41 | trie.reverse_lookup(agent) 42 | print(agent.query_id(), ": ", agent.key_str(), "\n") 43 | 44 | agent.set_query(1) 45 | trie.reverse_lookup(agent) 46 | print(agent.query_id(), ": ", agent.key_str(), "\n") 47 | 48 | print("2: ", trie.reverse_lookup(2), "\n") 49 | print("3: ", trie.reverse_lookup(3), "\n") 50 | 51 | trie.mmap("sample.dic") 52 | 53 | agent.set_query("ice-cream soda") 54 | while trie.common_prefix_search(agent) 55 | print(agent.query_str(), ": ", agent.key_str(), " (", agent.key_id(), ")\n") 56 | end 57 | 58 | agent.set_query("ic") 59 | while trie.predictive_search(agent) 60 | print(agent.query_str(), ": ", agent.key_str(), " (", agent.key_id(), ")\n") 61 | end 62 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/docs/style.css: -------------------------------------------------------------------------------- 1 | @charset "utf-8"; 2 | 3 | * { 4 | line-height: 140%; 5 | margin: 0px; 6 | padding: 0px; 7 | } 8 | 9 | body { 10 | background: lightgray; 11 | color: #222; 12 | margin: 10px 20px; 13 | } 14 | 15 | a { 16 | color: inherit; 17 | text-decoration: none; 18 | } 19 | a:link { 20 | color: #066; 21 | padding: 0px 2px; 22 | } 23 | a:link:after { 24 | content: "†"; 25 | } 26 | a:visited { 27 | color: #057; 28 | padding: 0px 2px; 29 | } 30 | a:visited:after { 31 | content: "†"; 32 | } 33 | a:hover { 34 | text-decoration: underline; 35 | } 36 | a[name]:hover { 37 | color: inherit; 38 | text-decoration: none; 39 | } 40 | a:active { 41 | } 42 | a[name]:active { 43 | color: inherit; 44 | padding: 0px; 45 | } 46 | 47 | ol { 48 | list-style-position: outside; 49 | margin-left: 2em; 50 | } 51 | ol ol { 52 | list-style-position: outside; 53 | margin-left: 1em; 54 | } 55 | ul { 56 | list-style-position: outside; 57 | margin-left: 2em; 58 | } 59 | ul ul { 60 | list-style-position: outside; 61 | margin-left: 1em; 62 | } 63 | li { 64 | margin: 5px 0px; 65 | } 66 | 67 | code { 68 | color: darkblue; 69 | font-family: sans-serif; 70 | font-style: normal; 71 | padding: 0px 2px; 72 | } 73 | kbd { 74 | color: darkgreen; 75 | font-family: sans-serif; 76 | font-style: normal; 77 | padding: 0px 2px; 78 | } 79 | var { 80 | color: darkred; 81 | font-family: sans-serif; 82 | font-style: normal; 83 | padding: 0px 2px; 84 | } 85 | sup { 86 | font-size: 75%; 87 | } 88 | 89 | div#header { 90 | font-family: "Times New Roman"; 91 | padding: 5px 10px; 92 | } 93 | div#header div.left { 94 | float: left; 95 | } 96 | div#header div.right { 97 | float: right; 98 | } 99 | div#header div.end { 100 | clear: both; 101 | } 102 | 103 | div#body { 104 | background: white; 105 | border: 1px solid black; 106 | box-shadow: 1px 1px 5px gray; 107 | -webkit-box-shadow: 1px 1px 5px gray; 108 | -moz-box-shadow: 1px 1px 5px gray; 109 | padding: 20px; 110 | } 111 | 112 | div#body h1 { 113 | font-size: 150%; 114 | margin: 10px; 115 | text-align: center; 116 | } 117 | 118 | div#body p#authors { 119 | font-weight: bold; 120 | margin: 10px; 121 | text-align: center; 122 | } 123 | div#body p#authors span.author { 124 | margin: 0px 1em; 125 | } 126 | 127 | div#body p#abstract { 128 | margin: 15px auto; 129 | text-align: auto; 130 | width: 75%; 131 | } 132 | div#body p#abstract span#heading { 133 | font-family: "Times New Roman"; 134 | font-style: italic; 135 | font-weight: bold; 136 | margin-right: 0.5em; 137 | } 138 | 139 | div#body div.section { 140 | clear: both; 141 | margin: 5px 0px; 142 | } 143 | div#body div.section h2 { 144 | background: honeydew; 145 | border-bottom: 1px dashed darkgray; 146 | color: #353; 147 | font-size: 125%; 148 | margin-top: 15px; 149 | padding: 5px; 150 | } 151 | 152 | div#body div.section p { 153 | margin: 5px 0px; 154 | text-indent: 1em; 155 | } 156 | 157 | div#body div.section div.float { 158 | box-shadow: 1px 1px 5px gray; 159 | -webkit-box-shadow: 1px 1px 5px gray; 160 | -moz-box-shadow: 1px 1px 5px gray; 161 | clear: both; 162 | float: right; 163 | margin: 5px 0px 5px 10px; 164 | max-width: 75%; 165 | } 166 | div#body div.section pre.code { 167 | background: whitesmoke; 168 | color: darkblue; 169 | line-height: 125%; 170 | overflow: auto; 171 | padding: 5px 10px; 172 | } 173 | div#body div.section pre.console { 174 | background: darkslategray; 175 | color: white; 176 | line-height: 125%; 177 | overflow: auto; 178 | padding: 5px 10px; 179 | } 180 | 181 | div#body div.section table { 182 | background: whitesmoke; 183 | border-collapse: separate; 184 | border-spacing: 5px; 185 | empty-cells: hide; 186 | padding: 5px 10px; 187 | } 188 | div#body div.section table caption { 189 | background: inherit; 190 | padding-top: 5px; 191 | } 192 | div#body div.section table th { 193 | background: white; 194 | box-shadow: 1px 1px 3px gray; 195 | -webkit-box-shadow: 1px 1px 3px gray; 196 | -moz-box-shadow: 1px 1px 3px gray; 197 | font-weight: normal; 198 | padding: 0px 5px; 199 | } 200 | div#body div.section table td { 201 | background: white; 202 | box-shadow: 1px 1px 2px gray; 203 | -webkit-box-shadow: 1px 1px 2px gray; 204 | -moz-box-shadow: 1px 1px 2px gray; 205 | padding: 0px 3px; 206 | } 207 | 208 | div#body div.section div.subsection { 209 | clear: both; 210 | margin: 5px 0px; 211 | } 212 | div#body div.section div.subsection h3 { 213 | background: aliceblue; 214 | border-bottom: 1px dashed lightgray; 215 | color: #336; 216 | font-size: 100%; 217 | margin-top: 10px; 218 | padding: 3px 5px; 219 | } 220 | 221 | div#body div.section div.subsubsection { 222 | margin: 5px; 223 | } 224 | div#body div.section div.subsubsection h4 { 225 | background: lavenderblush; 226 | border-bottom: 1px dashed lightgray; 227 | color: #336; 228 | font-size: 100%; 229 | margin-top: 10px; 230 | padding: 3px 5px; 231 | } 232 | 233 | div#footer { 234 | font-family: "Times New Roman"; 235 | padding: 5px 10px; 236 | } 237 | div#footer div.left { 238 | float: left; 239 | } 240 | div#footer div.right { 241 | float: right; 242 | } 243 | div#footer div.end { 244 | clear: both; 245 | } 246 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = marisa 2 | 3 | include_HEADERS = marisa.h 4 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_H_ 2 | #define MARISA_H_ 3 | 4 | // "marisa/stdio.h" includes for I/O using std::FILE. 5 | #include "marisa/stdio.h" 6 | 7 | // "marisa/iostream.h" includes for I/O using std::iostream. 8 | #include "marisa/iostream.h" 9 | 10 | // You can use instead of if you don't need the 11 | // above I/O interfaces and don't want to include the above I/O headers. 12 | #include "marisa/trie.h" 13 | 14 | #endif // MARISA_H_ 15 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/Makefile.am: -------------------------------------------------------------------------------- 1 | libmarisa_includedir = ${includedir}/marisa 2 | libmarisa_include_HEADERS = \ 3 | base.h \ 4 | exception.h \ 5 | scoped-ptr.h \ 6 | scoped-array.h \ 7 | key.h \ 8 | keyset.h \ 9 | query.h \ 10 | agent.h \ 11 | stdio.h \ 12 | iostream.h \ 13 | trie.h 14 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/agent.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_AGENT_H_ 2 | #define MARISA_AGENT_H_ 3 | 4 | #include "marisa/key.h" 5 | #include "marisa/query.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace trie { 10 | 11 | class State; 12 | 13 | } // namespace trie 14 | } // namespace grimoire 15 | 16 | class Agent { 17 | public: 18 | Agent(); 19 | ~Agent(); 20 | 21 | const Query &query() const { 22 | return query_; 23 | } 24 | const Key &key() const { 25 | return key_; 26 | } 27 | 28 | void set_query(const char *str); 29 | void set_query(const char *ptr, std::size_t length); 30 | void set_query(std::size_t key_id); 31 | 32 | const grimoire::trie::State &state() const { 33 | return *state_; 34 | } 35 | grimoire::trie::State &state() { 36 | return *state_; 37 | } 38 | 39 | void set_key(const char *str) { 40 | MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR); 41 | key_.set_str(str); 42 | } 43 | void set_key(const char *ptr, std::size_t length) { 44 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 45 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 46 | key_.set_str(ptr, length); 47 | } 48 | void set_key(std::size_t id) { 49 | MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 50 | key_.set_id(id); 51 | } 52 | 53 | bool has_state() const { 54 | return state_.get() != NULL; 55 | } 56 | void init_state(); 57 | 58 | void clear(); 59 | void swap(Agent &rhs); 60 | 61 | private: 62 | Query query_; 63 | Key key_; 64 | scoped_ptr state_; 65 | 66 | // Disallows copy and assignment. 67 | Agent(const Agent &); 68 | Agent &operator=(const Agent &); 69 | }; 70 | 71 | } // namespace marisa 72 | 73 | #endif // MARISA_AGENT_H_ 74 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/exception.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_EXCEPTION_H_ 2 | #define MARISA_EXCEPTION_H_ 3 | 4 | #include 5 | 6 | #include "marisa/base.h" 7 | 8 | namespace marisa { 9 | 10 | // An exception object keeps a filename, a line number, an error code and an 11 | // error message. The message format is as follows: 12 | // "__FILE__:__LINE__: error_code: error_message" 13 | class Exception : public std::exception { 14 | public: 15 | Exception(const char *filename, int line, 16 | ErrorCode error_code, const char *error_message) 17 | : std::exception(), filename_(filename), line_(line), 18 | error_code_(error_code), error_message_(error_message) {} 19 | Exception(const Exception &ex) 20 | : std::exception(), filename_(ex.filename_), line_(ex.line_), 21 | error_code_(ex.error_code_), error_message_(ex.error_message_) {} 22 | virtual ~Exception() throw() {} 23 | 24 | Exception &operator=(const Exception &rhs) { 25 | filename_ = rhs.filename_; 26 | line_ = rhs.line_; 27 | error_code_ = rhs.error_code_; 28 | error_message_ = rhs.error_message_; 29 | return *this; 30 | } 31 | 32 | const char *filename() const { 33 | return filename_; 34 | } 35 | int line() const { 36 | return line_; 37 | } 38 | ErrorCode error_code() const { 39 | return error_code_; 40 | } 41 | const char *error_message() const { 42 | return error_message_; 43 | } 44 | 45 | virtual const char *what() const throw() { 46 | return error_message_; 47 | } 48 | 49 | private: 50 | const char *filename_; 51 | int line_; 52 | ErrorCode error_code_; 53 | const char *error_message_; 54 | }; 55 | 56 | // These macros are used to convert a line number to a string constant. 57 | #define MARISA_INT_TO_STR(value) #value 58 | #define MARISA_LINE_TO_STR(line) MARISA_INT_TO_STR(line) 59 | #define MARISA_LINE_STR MARISA_LINE_TO_STR(__LINE__) 60 | 61 | // MARISA_THROW throws an exception with a filename, a line number, an error 62 | // code and an error message. The message format is as follows: 63 | // "__FILE__:__LINE__: error_code: error_message" 64 | #define MARISA_THROW(error_code, error_message) \ 65 | (throw marisa::Exception(__FILE__, __LINE__, error_code, \ 66 | __FILE__ ":" MARISA_LINE_STR ": " #error_code ": " error_message)) 67 | 68 | // MARISA_THROW_IF throws an exception if `condition' is true. 69 | #define MARISA_THROW_IF(condition, error_code) \ 70 | (void)((!(condition)) || (MARISA_THROW(error_code, #condition), 0)) 71 | 72 | // MARISA_DEBUG_IF is ignored if _DEBUG is undefined. So, it is useful for 73 | // debugging time-critical codes. 74 | #ifdef _DEBUG 75 | #define MARISA_DEBUG_IF(cond, error_code) MARISA_THROW_IF(cond, error_code) 76 | #else 77 | #define MARISA_DEBUG_IF(cond, error_code) 78 | #endif 79 | 80 | } // namespace marisa 81 | 82 | #endif // MARISA_EXCEPTION_H_ 83 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/iostream.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_IOSTREAM_H_ 2 | #define MARISA_IOSTREAM_H_ 3 | 4 | #include 5 | 6 | namespace marisa { 7 | 8 | class Trie; 9 | 10 | std::istream &read(std::istream &stream, Trie *trie); 11 | std::ostream &write(std::ostream &stream, const Trie &trie); 12 | 13 | std::istream &operator>>(std::istream &stream, Trie &trie); 14 | std::ostream &operator<<(std::ostream &stream, const Trie &trie); 15 | 16 | } // namespace marisa 17 | 18 | #endif // MARISA_IOSTREAM_H_ 19 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/key.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_KEY_H_ 2 | #define MARISA_KEY_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | namespace marisa { 7 | 8 | class Key { 9 | public: 10 | Key() : ptr_(NULL), length_(0), union_() { 11 | union_.id = 0; 12 | } 13 | Key(const Key &key) 14 | : ptr_(key.ptr_), length_(key.length_), union_(key.union_) {} 15 | 16 | Key &operator=(const Key &key) { 17 | ptr_ = key.ptr_; 18 | length_ = key.length_; 19 | union_ = key.union_; 20 | return *this; 21 | } 22 | 23 | char operator[](std::size_t i) const { 24 | MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR); 25 | return ptr_[i]; 26 | } 27 | 28 | void set_str(const char *str) { 29 | MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR); 30 | std::size_t length = 0; 31 | while (str[length] != '\0') { 32 | ++length; 33 | } 34 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 35 | ptr_ = str; 36 | length_ = (UInt32)length; 37 | } 38 | void set_str(const char *ptr, std::size_t length) { 39 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 40 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 41 | ptr_ = ptr; 42 | length_ = (UInt32)length; 43 | } 44 | void set_id(std::size_t id) { 45 | MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 46 | union_.id = (UInt32)id; 47 | } 48 | void set_weight(float weight) { 49 | union_.weight = weight; 50 | } 51 | 52 | const char *ptr() const { 53 | return ptr_; 54 | } 55 | std::size_t length() const { 56 | return length_; 57 | } 58 | std::size_t id() const { 59 | return union_.id; 60 | } 61 | float weight() const { 62 | return union_.weight; 63 | } 64 | 65 | void clear() { 66 | Key().swap(*this); 67 | } 68 | void swap(Key &rhs) { 69 | marisa::swap(ptr_, rhs.ptr_); 70 | marisa::swap(length_, rhs.length_); 71 | marisa::swap(union_.id, rhs.union_.id); 72 | } 73 | 74 | private: 75 | const char *ptr_; 76 | UInt32 length_; 77 | union Union { 78 | UInt32 id; 79 | float weight; 80 | } union_; 81 | }; 82 | 83 | } // namespace marisa 84 | 85 | #endif // MARISA_KEY_H_ 86 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/keyset.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_KEYSET_H_ 2 | #define MARISA_KEYSET_H_ 3 | 4 | #include "marisa/key.h" 5 | 6 | namespace marisa { 7 | 8 | class Keyset { 9 | public: 10 | enum { 11 | BASE_BLOCK_SIZE = 4096, 12 | EXTRA_BLOCK_SIZE = 1024, 13 | KEY_BLOCK_SIZE = 256 14 | }; 15 | 16 | Keyset(); 17 | 18 | void push_back(const Key &key); 19 | void push_back(const Key &key, char end_marker); 20 | 21 | void push_back(const char *str); 22 | void push_back(const char *ptr, std::size_t length, float weight = 1.0); 23 | 24 | const Key &operator[](std::size_t i) const { 25 | MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR); 26 | return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE]; 27 | } 28 | Key &operator[](std::size_t i) { 29 | MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR); 30 | return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE]; 31 | } 32 | 33 | std::size_t num_keys() const { 34 | return size_; 35 | } 36 | 37 | bool empty() const { 38 | return size_ == 0; 39 | } 40 | std::size_t size() const { 41 | return size_; 42 | } 43 | std::size_t total_length() const { 44 | return total_length_; 45 | } 46 | 47 | void reset(); 48 | 49 | void clear(); 50 | void swap(Keyset &rhs); 51 | 52 | private: 53 | scoped_array > base_blocks_; 54 | std::size_t base_blocks_size_; 55 | std::size_t base_blocks_capacity_; 56 | scoped_array > extra_blocks_; 57 | std::size_t extra_blocks_size_; 58 | std::size_t extra_blocks_capacity_; 59 | scoped_array > key_blocks_; 60 | std::size_t key_blocks_size_; 61 | std::size_t key_blocks_capacity_; 62 | char *ptr_; 63 | std::size_t avail_; 64 | std::size_t size_; 65 | std::size_t total_length_; 66 | 67 | char *reserve(std::size_t size); 68 | 69 | void append_base_block(); 70 | void append_extra_block(std::size_t size); 71 | void append_key_block(); 72 | 73 | // Disallows copy and assignment. 74 | Keyset(const Keyset &); 75 | Keyset &operator=(const Keyset &); 76 | }; 77 | 78 | } // namespace marisa 79 | 80 | #endif // MARISA_KEYSET_H_ 81 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/query.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_QUERY_H_ 2 | #define MARISA_QUERY_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | namespace marisa { 7 | 8 | class Query { 9 | public: 10 | Query() : ptr_(NULL), length_(0), id_(0) {} 11 | Query(const Query &query) 12 | : ptr_(query.ptr_), length_(query.length_), id_(query.id_) {} 13 | 14 | Query &operator=(const Query &query) { 15 | ptr_ = query.ptr_; 16 | length_ = query.length_; 17 | id_ = query.id_; 18 | return *this; 19 | } 20 | 21 | char operator[](std::size_t i) const { 22 | MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR); 23 | return ptr_[i]; 24 | } 25 | 26 | void set_str(const char *str) { 27 | MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR); 28 | std::size_t length = 0; 29 | while (str[length] != '\0') { 30 | ++length; 31 | } 32 | ptr_ = str; 33 | length_ = length; 34 | } 35 | void set_str(const char *ptr, std::size_t length) { 36 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 37 | ptr_ = ptr; 38 | length_ = length; 39 | } 40 | void set_id(std::size_t id) { 41 | id_ = id; 42 | } 43 | 44 | const char *ptr() const { 45 | return ptr_; 46 | } 47 | std::size_t length() const { 48 | return length_; 49 | } 50 | std::size_t id() const { 51 | return id_; 52 | } 53 | 54 | void clear() { 55 | Query().swap(*this); 56 | } 57 | void swap(Query &rhs) { 58 | marisa::swap(ptr_, rhs.ptr_); 59 | marisa::swap(length_, rhs.length_); 60 | marisa::swap(id_, rhs.id_); 61 | } 62 | 63 | private: 64 | const char *ptr_; 65 | std::size_t length_; 66 | std::size_t id_; 67 | }; 68 | 69 | } // namespace marisa 70 | 71 | #endif // MARISA_QUERY_H_ 72 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/scoped-array.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SCOPED_ARRAY_H_ 2 | #define MARISA_SCOPED_ARRAY_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | namespace marisa { 7 | 8 | template 9 | class scoped_array { 10 | public: 11 | scoped_array() : array_(NULL) {} 12 | explicit scoped_array(T *array) : array_(array) {} 13 | 14 | ~scoped_array() { 15 | delete [] array_; 16 | } 17 | 18 | void reset(T *array = NULL) { 19 | MARISA_THROW_IF((array != NULL) && (array == array_), MARISA_RESET_ERROR); 20 | scoped_array(array).swap(*this); 21 | } 22 | 23 | T &operator[](std::size_t i) const { 24 | MARISA_DEBUG_IF(array_ == NULL, MARISA_STATE_ERROR); 25 | return array_[i]; 26 | } 27 | T *get() const { 28 | return array_; 29 | } 30 | 31 | void clear() { 32 | scoped_array().swap(*this); 33 | } 34 | void swap(scoped_array &rhs) { 35 | marisa::swap(array_, rhs.array_); 36 | } 37 | 38 | private: 39 | T *array_; 40 | 41 | // Disallows copy and assignment. 42 | scoped_array(const scoped_array &); 43 | scoped_array &operator=(const scoped_array &); 44 | }; 45 | 46 | } // namespace marisa 47 | 48 | #endif // MARISA_SCOPED_ARRAY_H_ 49 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/scoped-ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SCOPED_PTR_H_ 2 | #define MARISA_SCOPED_PTR_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | namespace marisa { 7 | 8 | template 9 | class scoped_ptr { 10 | public: 11 | scoped_ptr() : ptr_(NULL) {} 12 | explicit scoped_ptr(T *ptr) : ptr_(ptr) {} 13 | 14 | ~scoped_ptr() { 15 | delete ptr_; 16 | } 17 | 18 | void reset(T *ptr = NULL) { 19 | MARISA_THROW_IF((ptr != NULL) && (ptr == ptr_), MARISA_RESET_ERROR); 20 | scoped_ptr(ptr).swap(*this); 21 | } 22 | 23 | T &operator*() const { 24 | MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR); 25 | return *ptr_; 26 | } 27 | T *operator->() const { 28 | MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR); 29 | return ptr_; 30 | } 31 | T *get() const { 32 | return ptr_; 33 | } 34 | 35 | void clear() { 36 | scoped_ptr().swap(*this); 37 | } 38 | void swap(scoped_ptr &rhs) { 39 | marisa::swap(ptr_, rhs.ptr_); 40 | } 41 | 42 | private: 43 | T *ptr_; 44 | 45 | // Disallows copy and assignment. 46 | scoped_ptr(const scoped_ptr &); 47 | scoped_ptr &operator=(const scoped_ptr &); 48 | }; 49 | 50 | } // namespace marisa 51 | 52 | #endif // MARISA_SCOPED_PTR_H_ 53 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/stdio.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_MYSTDIO_H_ 2 | #define MARISA_MYSTDIO_H_ 3 | 4 | #include 5 | 6 | namespace marisa { 7 | 8 | class Trie; 9 | 10 | void fread(std::FILE *file, Trie *trie); 11 | void fwrite(std::FILE *file, const Trie &trie); 12 | 13 | } // namespace marisa 14 | 15 | #endif // MARISA_MYSTDIO_H_ 16 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/include/marisa/trie.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_TRIE_H_ 2 | #define MARISA_TRIE_H_ 3 | 4 | #include "marisa/keyset.h" 5 | #include "marisa/agent.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace trie { 10 | 11 | class LoudsTrie; 12 | 13 | } // namespace trie 14 | } // namespace grimoire 15 | 16 | class Trie { 17 | friend class TrieIO; 18 | 19 | public: 20 | Trie(); 21 | ~Trie(); 22 | 23 | void build(Keyset &keyset, int config_flags = 0); 24 | 25 | void mmap(const char *filename); 26 | void map(const void *ptr, std::size_t size); 27 | 28 | void load(const char *filename); 29 | void read(int fd); 30 | 31 | void save(const char *filename) const; 32 | void write(int fd) const; 33 | 34 | bool lookup(Agent &agent) const; 35 | void reverse_lookup(Agent &agent) const; 36 | bool common_prefix_search(Agent &agent) const; 37 | bool predictive_search(Agent &agent) const; 38 | 39 | std::size_t num_tries() const; 40 | std::size_t num_keys() const; 41 | std::size_t num_nodes() const; 42 | 43 | TailMode tail_mode() const; 44 | NodeOrder node_order() const; 45 | 46 | bool empty() const; 47 | std::size_t size() const; 48 | std::size_t total_size() const; 49 | std::size_t io_size() const; 50 | 51 | void clear(); 52 | void swap(Trie &rhs); 53 | 54 | private: 55 | scoped_ptr trie_; 56 | 57 | // Disallows copy and assignment. 58 | Trie(const Trie &); 59 | Trie &operator=(const Trie &); 60 | }; 61 | 62 | } // namespace marisa 63 | 64 | #endif // MARISA_TRIE_H_ 65 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = marisa 2 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = grimoire 2 | 3 | MY_INCLUDE = -I$(top_srcdir)/include -I$(top_srcdir)/lib 4 | 5 | AM_CXXFLAGS = -Wall -Weffc++ -Wextra -Wconversion $(MY_INCLUDE) 6 | 7 | lib_LTLIBRARIES = libmarisa.la 8 | 9 | libmarisa_la_LDFLAGS = -no-undefined 10 | 11 | libmarisa_la_LIBADD = \ 12 | grimoire/io/libio.la \ 13 | grimoire/trie/libtrie.la \ 14 | grimoire/vector/libvector.la 15 | 16 | libmarisa_la_SOURCES = \ 17 | keyset.cc \ 18 | agent.cc \ 19 | trie.cc 20 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/agent.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "marisa/agent.h" 4 | #include "marisa/grimoire/trie.h" 5 | 6 | namespace marisa { 7 | 8 | Agent::Agent() : query_(), key_(), state_() {} 9 | 10 | Agent::~Agent() {} 11 | 12 | void Agent::set_query(const char *str) { 13 | MARISA_THROW_IF(str == NULL, MARISA_NULL_ERROR); 14 | if (state_.get() != NULL) { 15 | state_->reset(); 16 | } 17 | query_.set_str(str); 18 | } 19 | 20 | void Agent::set_query(const char *ptr, std::size_t length) { 21 | MARISA_THROW_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 22 | if (state_.get() != NULL) { 23 | state_->reset(); 24 | } 25 | query_.set_str(ptr, length); 26 | } 27 | 28 | void Agent::set_query(std::size_t key_id) { 29 | if (state_.get() != NULL) { 30 | state_->reset(); 31 | } 32 | query_.set_id(key_id); 33 | } 34 | 35 | void Agent::init_state() { 36 | MARISA_THROW_IF(state_.get() != NULL, MARISA_STATE_ERROR); 37 | state_.reset(new (std::nothrow) grimoire::State); 38 | MARISA_THROW_IF(state_.get() == NULL, MARISA_MEMORY_ERROR); 39 | } 40 | 41 | void Agent::clear() { 42 | Agent().swap(*this); 43 | } 44 | 45 | void Agent::swap(Agent &rhs) { 46 | query_.swap(rhs.query_); 47 | key_.swap(rhs.key_); 48 | state_.swap(rhs.state_); 49 | } 50 | 51 | } // namespace marisa 52 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = algorithm io trie vector 2 | 3 | noinst_HEADERS = \ 4 | intrin.h \ 5 | io.h \ 6 | vector.h \ 7 | algorithm.h \ 8 | trie.h 9 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/algorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_ALGORITHM_H_ 2 | #define MARISA_GRIMOIRE_ALGORITHM_H_ 3 | 4 | #include "marisa/grimoire/algorithm/sort.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | 9 | class Algorithm { 10 | public: 11 | Algorithm() {} 12 | 13 | template 14 | std::size_t sort(Iterator begin, Iterator end) const { 15 | return algorithm::sort(begin, end); 16 | } 17 | 18 | private: 19 | Algorithm(const Algorithm &); 20 | Algorithm &operator=(const Algorithm &); 21 | }; 22 | 23 | } // namespace grimoire 24 | } // namespace marisa 25 | 26 | #endif // MARISA_GRIMOIRE_ALGORITHM_H_ 27 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/algorithm/Makefile.am: -------------------------------------------------------------------------------- 1 | noinst_HEADERS = \ 2 | sort.h 3 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/intrin.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_INTRIN_H_ 2 | #define MARISA_GRIMOIRE_INTRIN_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | #if defined(__x86_64__) || defined(_M_X64) 7 | #define MARISA_X64 8 | #elif defined(__i386__) || defined(_M_IX86) 9 | #define MARISA_X86 10 | #else // defined(__i386__) || defined(_M_IX86) 11 | #ifdef MARISA_USE_POPCNT 12 | #undef MARISA_USE_POPCNT 13 | #endif // MARISA_USE_POPCNT 14 | #ifdef MARISA_USE_SSE4A 15 | #undef MARISA_USE_SSE4A 16 | #endif // MARISA_USE_SSE4A 17 | #ifdef MARISA_USE_SSE4 18 | #undef MARISA_USE_SSE4 19 | #endif // MARISA_USE_SSE4 20 | #ifdef MARISA_USE_SSE4_2 21 | #undef MARISA_USE_SSE4_2 22 | #endif // MARISA_USE_SSE4_2 23 | #ifdef MARISA_USE_SSE4_1 24 | #undef MARISA_USE_SSE4_1 25 | #endif // MARISA_USE_SSE4_1 26 | #ifdef MARISA_USE_SSSE3 27 | #undef MARISA_USE_SSSE3 28 | #endif // MARISA_USE_SSSE3 29 | #ifdef MARISA_USE_SSE3 30 | #undef MARISA_USE_SSE3 31 | #endif // MARISA_USE_SSE3 32 | #ifdef MARISA_USE_SSE2 33 | #undef MARISA_USE_SSE2 34 | #endif // MARISA_USE_SSE2 35 | #endif // defined(__i386__) || defined(_M_IX86) 36 | 37 | #ifdef MARISA_USE_POPCNT 38 | #ifndef MARISA_USE_SSE3 39 | #define MARISA_USE_SSE3 40 | #endif // MARISA_USE_SSE3 41 | #ifdef _MSC_VER 42 | #include 43 | #else // _MSC_VER 44 | #include 45 | #endif // _MSC_VER 46 | #endif // MARISA_USE_POPCNT 47 | 48 | #ifdef MARISA_USE_SSE4A 49 | #ifndef MARISA_USE_SSE3 50 | #define MARISA_USE_SSE3 51 | #endif // MARISA_USE_SSE3 52 | #ifndef MARISA_USE_POPCNT 53 | #define MARISA_USE_POPCNT 54 | #endif // MARISA_USE_POPCNT 55 | #endif // MARISA_USE_SSE4A 56 | 57 | #ifdef MARISA_USE_SSE4 58 | #ifndef MARISA_USE_SSE4_2 59 | #define MARISA_USE_SSE4_2 60 | #endif // MARISA_USE_SSE4_2 61 | #endif // MARISA_USE_SSE4 62 | 63 | #ifdef MARISA_USE_SSE4_2 64 | #ifndef MARISA_USE_SSE4_1 65 | #define MARISA_USE_SSE4_1 66 | #endif // MARISA_USE_SSE4_1 67 | #ifndef MARISA_USE_POPCNT 68 | #define MARISA_USE_POPCNT 69 | #endif // MARISA_USE_POPCNT 70 | #endif // MARISA_USE_SSE4_2 71 | 72 | #ifdef MARISA_USE_SSE4_1 73 | #ifndef MARISA_USE_SSSE3 74 | #define MARISA_USE_SSSE3 75 | #endif // MARISA_USE_SSSE3 76 | #endif // MARISA_USE_SSE4_1 77 | 78 | #ifdef MARISA_USE_SSSE3 79 | #ifndef MARISA_USE_SSE3 80 | #define MARISA_USE_SSE3 81 | #endif // MARISA_USE_SSE3 82 | #ifdef MARISA_X64 83 | #define MARISA_X64_SSSE3 84 | #else // MARISA_X64 85 | #define MARISA_X86_SSSE3 86 | #endif // MAIRSA_X64 87 | #include 88 | #endif // MARISA_USE_SSSE3 89 | 90 | #ifdef MARISA_USE_SSE3 91 | #ifndef MARISA_USE_SSE2 92 | #define MARISA_USE_SSE2 93 | #endif // MARISA_USE_SSE2 94 | #endif // MARISA_USE_SSE3 95 | 96 | #ifdef MARISA_USE_SSE2 97 | #ifdef MARISA_X64 98 | #define MARISA_X64_SSE2 99 | #else // MARISA_X64 100 | #define MARISA_X86_SSE2 101 | #endif // MAIRSA_X64 102 | #include 103 | #endif // MARISA_USE_SSE2 104 | 105 | #ifdef _MSC_VER 106 | #if MARISA_WORD_SIZE == 64 107 | #include 108 | #pragma intrinsic(_BitScanForward64) 109 | #else // MARISA_WORD_SIZE == 64 110 | #include 111 | #pragma intrinsic(_BitScanForward) 112 | #endif // MARISA_WORD_SIZE == 64 113 | #endif // _MSC_VER 114 | 115 | #endif // MARISA_GRIMOIRE_INTRIN_H_ 116 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/io.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_H_ 2 | #define MARISA_GRIMOIRE_IO_H_ 3 | 4 | #include "marisa/grimoire/io/mapper.h" 5 | #include "marisa/grimoire/io/reader.h" 6 | #include "marisa/grimoire/io/writer.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | 11 | using io::Mapper; 12 | using io::Reader; 13 | using io::Writer; 14 | 15 | } // namespace grimoire 16 | } // namespace marisa 17 | 18 | #endif // MARISA_GRIMOIRE_IO_H_ 19 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/io/Makefile.am: -------------------------------------------------------------------------------- 1 | MY_INCLUDE = -I$(top_srcdir)/include -I$(top_srcdir)/lib 2 | 3 | AM_CXXFLAGS = -Wall -Weffc++ -Wextra -Wconversion $(MY_INCLUDE) 4 | 5 | noinst_LTLIBRARIES = libio.la 6 | 7 | libio_la_LDFLAGS = -no-undefined 8 | 9 | libio_la_SOURCES = \ 10 | mapper.cc \ 11 | reader.cc \ 12 | writer.cc 13 | 14 | noinst_HEADERS = \ 15 | mapper.h \ 16 | reader.h \ 17 | writer.h 18 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/io/mapper.cc: -------------------------------------------------------------------------------- 1 | #if (defined _WIN32) || (defined _WIN64) 2 | #include 3 | #include 4 | #include 5 | #else // (defined _WIN32) || (defined _WIN64) 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #endif // (defined _WIN32) || (defined _WIN64) 12 | 13 | #include "marisa/grimoire/io/mapper.h" 14 | 15 | namespace marisa { 16 | namespace grimoire { 17 | namespace io { 18 | 19 | #if (defined _WIN32) || (defined _WIN64) 20 | Mapper::Mapper() 21 | : ptr_(NULL), origin_(NULL), avail_(0), size_(0), 22 | file_(NULL), map_(NULL) {} 23 | #else // (defined _WIN32) || (defined _WIN64) 24 | Mapper::Mapper() 25 | : ptr_(NULL), origin_(MAP_FAILED), avail_(0), size_(0), fd_(-1) {} 26 | #endif // (defined _WIN32) || (defined _WIN64) 27 | 28 | #if (defined _WIN32) || (defined _WIN64) 29 | Mapper::~Mapper() { 30 | if (origin_ != NULL) { 31 | ::UnmapViewOfFile(origin_); 32 | } 33 | 34 | if (map_ != NULL) { 35 | ::CloseHandle(map_); 36 | } 37 | 38 | if (file_ != NULL) { 39 | ::CloseHandle(file_); 40 | } 41 | } 42 | #else // (defined _WIN32) || (defined _WIN64) 43 | Mapper::~Mapper() { 44 | if (origin_ != MAP_FAILED) { 45 | ::munmap(origin_, size_); 46 | } 47 | 48 | if (fd_ != -1) { 49 | ::close(fd_); 50 | } 51 | } 52 | #endif // (defined _WIN32) || (defined _WIN64) 53 | 54 | void Mapper::open(const char *filename) { 55 | MARISA_THROW_IF(filename == NULL, MARISA_NULL_ERROR); 56 | 57 | Mapper temp; 58 | temp.open_(filename); 59 | swap(temp); 60 | } 61 | 62 | void Mapper::open(const void *ptr, std::size_t size) { 63 | MARISA_THROW_IF((ptr == NULL) && (size != 0), MARISA_NULL_ERROR); 64 | 65 | Mapper temp; 66 | temp.open_(ptr, size); 67 | swap(temp); 68 | } 69 | 70 | void Mapper::seek(std::size_t size) { 71 | MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR); 72 | MARISA_THROW_IF(size > avail_, MARISA_IO_ERROR); 73 | 74 | map_data(size); 75 | } 76 | 77 | bool Mapper::is_open() const { 78 | return ptr_ != NULL; 79 | } 80 | 81 | void Mapper::clear() { 82 | Mapper().swap(*this); 83 | } 84 | 85 | void Mapper::swap(Mapper &rhs) { 86 | marisa::swap(ptr_, rhs.ptr_); 87 | marisa::swap(avail_, rhs.avail_); 88 | marisa::swap(origin_, rhs.origin_); 89 | marisa::swap(size_, rhs.size_); 90 | #if (defined _WIN32) || (defined _WIN64) 91 | marisa::swap(file_, rhs.file_); 92 | marisa::swap(map_, rhs.map_); 93 | #else // (defined _WIN32) || (defined _WIN64) 94 | marisa::swap(fd_, rhs.fd_); 95 | #endif // (defined _WIN32) || (defined _WIN64) 96 | } 97 | 98 | const void *Mapper::map_data(std::size_t size) { 99 | MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR); 100 | MARISA_THROW_IF(size > avail_, MARISA_IO_ERROR); 101 | 102 | const char * const data = static_cast(ptr_); 103 | ptr_ = data + size; 104 | avail_ -= size; 105 | return data; 106 | } 107 | 108 | #if (defined _WIN32) || (defined _WIN64) 109 | #ifdef __MSVCRT_VERSION__ 110 | #if __MSVCRT_VERSION__ >= 0x0601 111 | #define MARISA_HAS_STAT64 112 | #endif // __MSVCRT_VERSION__ >= 0x0601 113 | #endif // __MSVCRT_VERSION__ 114 | void Mapper::open_(const char *filename) { 115 | #ifdef MARISA_HAS_STAT64 116 | struct __stat64 st; 117 | MARISA_THROW_IF(::_stat64(filename, &st) != 0, MARISA_IO_ERROR); 118 | #else // MARISA_HAS_STAT64 119 | struct _stat st; 120 | MARISA_THROW_IF(::_stat(filename, &st) != 0, MARISA_IO_ERROR); 121 | #endif // MARISA_HAS_STAT64 122 | MARISA_THROW_IF((UInt64)st.st_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR); 123 | size_ = (std::size_t)st.st_size; 124 | 125 | file_ = ::CreateFileA(filename, GENERIC_READ, FILE_SHARE_READ, 126 | NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); 127 | MARISA_THROW_IF(file_ == INVALID_HANDLE_VALUE, MARISA_IO_ERROR); 128 | 129 | map_ = ::CreateFileMapping(file_, NULL, PAGE_READONLY, 0, 0, NULL); 130 | MARISA_THROW_IF(map_ == NULL, MARISA_IO_ERROR); 131 | 132 | origin_ = ::MapViewOfFile(map_, FILE_MAP_READ, 0, 0, 0); 133 | MARISA_THROW_IF(origin_ == NULL, MARISA_IO_ERROR); 134 | 135 | ptr_ = static_cast(origin_); 136 | avail_ = size_; 137 | } 138 | #else // (defined _WIN32) || (defined _WIN64) 139 | void Mapper::open_(const char *filename) { 140 | struct stat st; 141 | MARISA_THROW_IF(::stat(filename, &st) != 0, MARISA_IO_ERROR); 142 | MARISA_THROW_IF((UInt64)st.st_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR); 143 | size_ = (std::size_t)st.st_size; 144 | 145 | fd_ = ::open(filename, O_RDONLY); 146 | MARISA_THROW_IF(fd_ == -1, MARISA_IO_ERROR); 147 | 148 | origin_ = ::mmap(NULL, size_, PROT_READ, MAP_SHARED, fd_, 0); 149 | MARISA_THROW_IF(origin_ == MAP_FAILED, MARISA_IO_ERROR); 150 | 151 | ptr_ = static_cast(origin_); 152 | avail_ = size_; 153 | } 154 | #endif // (defined _WIN32) || (defined _WIN64) 155 | 156 | void Mapper::open_(const void *ptr, std::size_t size) { 157 | ptr_ = ptr; 158 | avail_ = size; 159 | } 160 | 161 | } // namespace io 162 | } // namespace grimoire 163 | } // namespace marisa 164 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/io/mapper.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_MAPPER_H_ 2 | #define MARISA_GRIMOIRE_IO_MAPPER_H_ 3 | 4 | #include 5 | 6 | #include "marisa/base.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | namespace io { 11 | 12 | class Mapper { 13 | public: 14 | Mapper(); 15 | ~Mapper(); 16 | 17 | void open(const char *filename); 18 | void open(const void *ptr, std::size_t size); 19 | 20 | template 21 | void map(T *obj) { 22 | MARISA_THROW_IF(obj == NULL, MARISA_NULL_ERROR); 23 | *obj = *static_cast(map_data(sizeof(T))); 24 | } 25 | 26 | template 27 | void map(const T **objs, std::size_t num_objs) { 28 | MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR); 29 | MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)), 30 | MARISA_SIZE_ERROR); 31 | *objs = static_cast(map_data(sizeof(T) * num_objs)); 32 | } 33 | 34 | void seek(std::size_t size); 35 | 36 | bool is_open() const; 37 | 38 | void clear(); 39 | void swap(Mapper &rhs); 40 | 41 | private: 42 | const void *ptr_; 43 | void *origin_; 44 | std::size_t avail_; 45 | std::size_t size_; 46 | #if (defined _WIN32) || (defined _WIN64) 47 | void *file_; 48 | void *map_; 49 | #else // (defined _WIN32) || (defined _WIN64) 50 | int fd_; 51 | #endif // (defined _WIN32) || (defined _WIN64) 52 | 53 | void open_(const char *filename); 54 | void open_(const void *ptr, std::size_t size); 55 | 56 | const void *map_data(std::size_t size); 57 | 58 | // Disallows copy and assignment. 59 | Mapper(const Mapper &); 60 | Mapper &operator=(const Mapper &); 61 | }; 62 | 63 | } // namespace io 64 | } // namespace grimoire 65 | } // namespace marisa 66 | 67 | #endif // MARISA_GRIMOIRE_IO_MAPPER_H_ 68 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/io/reader.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef _WIN32 4 | #include 5 | #else // _WIN32 6 | #include 7 | #endif // _WIN32 8 | 9 | #include 10 | 11 | #include "marisa/grimoire/io/reader.h" 12 | 13 | namespace marisa { 14 | namespace grimoire { 15 | namespace io { 16 | 17 | Reader::Reader() 18 | : file_(NULL), fd_(-1), stream_(NULL), needs_fclose_(false) {} 19 | 20 | Reader::~Reader() { 21 | if (needs_fclose_) { 22 | ::fclose(file_); 23 | } 24 | } 25 | 26 | void Reader::open(const char *filename) { 27 | MARISA_THROW_IF(filename == NULL, MARISA_NULL_ERROR); 28 | 29 | Reader temp; 30 | temp.open_(filename); 31 | swap(temp); 32 | } 33 | 34 | void Reader::open(std::FILE *file) { 35 | MARISA_THROW_IF(file == NULL, MARISA_NULL_ERROR); 36 | 37 | Reader temp; 38 | temp.open_(file); 39 | swap(temp); 40 | } 41 | 42 | void Reader::open(int fd) { 43 | MARISA_THROW_IF(fd == -1, MARISA_CODE_ERROR); 44 | 45 | Reader temp; 46 | temp.open_(fd); 47 | swap(temp); 48 | } 49 | 50 | void Reader::open(std::istream &stream) { 51 | Reader temp; 52 | temp.open_(stream); 53 | swap(temp); 54 | } 55 | 56 | void Reader::clear() { 57 | Reader().swap(*this); 58 | } 59 | 60 | void Reader::swap(Reader &rhs) { 61 | marisa::swap(file_, rhs.file_); 62 | marisa::swap(fd_, rhs.fd_); 63 | marisa::swap(stream_, rhs.stream_); 64 | marisa::swap(needs_fclose_, rhs.needs_fclose_); 65 | } 66 | 67 | void Reader::seek(std::size_t size) { 68 | MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR); 69 | if (size == 0) { 70 | return; 71 | } else if (size <= 16) { 72 | char buf[16]; 73 | read_data(buf, size); 74 | } else { 75 | char buf[1024]; 76 | while (size != 0) { 77 | const std::size_t count = (size < sizeof(buf)) ? size : sizeof(buf); 78 | read_data(buf, count); 79 | size -= count; 80 | } 81 | } 82 | } 83 | 84 | bool Reader::is_open() const { 85 | return (file_ != NULL) || (fd_ != -1) || (stream_ != NULL); 86 | } 87 | 88 | void Reader::open_(const char *filename) { 89 | std::FILE *file = NULL; 90 | #ifdef _MSC_VER 91 | MARISA_THROW_IF(::fopen_s(&file, filename, "rb") != 0, MARISA_IO_ERROR); 92 | #else // _MSC_VER 93 | file = ::fopen(filename, "rb"); 94 | MARISA_THROW_IF(file == NULL, MARISA_IO_ERROR); 95 | #endif // _MSC_VER 96 | file_ = file; 97 | needs_fclose_ = true; 98 | } 99 | 100 | void Reader::open_(std::FILE *file) { 101 | file_ = file; 102 | } 103 | 104 | void Reader::open_(int fd) { 105 | fd_ = fd; 106 | } 107 | 108 | void Reader::open_(std::istream &stream) { 109 | stream_ = &stream; 110 | } 111 | 112 | void Reader::read_data(void *buf, std::size_t size) { 113 | MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR); 114 | if (size == 0) { 115 | return; 116 | } else if (fd_ != -1) { 117 | while (size != 0) { 118 | #ifdef _WIN32 119 | static const std::size_t CHUNK_SIZE = 120 | std::numeric_limits::max(); 121 | const unsigned int count = (size < CHUNK_SIZE) ? size : CHUNK_SIZE; 122 | const int size_read = ::_read(fd_, buf, count); 123 | #else // _WIN32 124 | static const std::size_t CHUNK_SIZE = 125 | std::numeric_limits< ::ssize_t>::max(); 126 | const ::size_t count = (size < CHUNK_SIZE) ? size : CHUNK_SIZE; 127 | const ::ssize_t size_read = ::read(fd_, buf, count); 128 | #endif // _WIN32 129 | MARISA_THROW_IF(size_read <= 0, MARISA_IO_ERROR); 130 | buf = static_cast(buf) + size_read; 131 | size -= size_read; 132 | } 133 | } else if (file_ != NULL) { 134 | MARISA_THROW_IF(::fread(buf, 1, size, file_) != size, MARISA_IO_ERROR); 135 | } else if (stream_ != NULL) { 136 | try { 137 | MARISA_THROW_IF(!stream_->read(static_cast(buf), size), 138 | MARISA_IO_ERROR); 139 | } catch (const std::ios_base::failure &) { 140 | MARISA_THROW(MARISA_IO_ERROR, "std::ios_base::failure"); 141 | } 142 | } 143 | } 144 | 145 | } // namespace io 146 | } // namespace grimoire 147 | } // namespace marisa 148 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/io/reader.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_READER_H_ 2 | #define MARISA_GRIMOIRE_IO_READER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "marisa/base.h" 8 | 9 | namespace marisa { 10 | namespace grimoire { 11 | namespace io { 12 | 13 | class Reader { 14 | public: 15 | Reader(); 16 | ~Reader(); 17 | 18 | void open(const char *filename); 19 | void open(std::FILE *file); 20 | void open(int fd); 21 | void open(std::istream &stream); 22 | 23 | template 24 | void read(T *obj) { 25 | MARISA_THROW_IF(obj == NULL, MARISA_NULL_ERROR); 26 | read_data(obj, sizeof(T)); 27 | } 28 | 29 | template 30 | void read(T *objs, std::size_t num_objs) { 31 | MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR); 32 | MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)), 33 | MARISA_SIZE_ERROR); 34 | read_data(objs, sizeof(T) * num_objs); 35 | } 36 | 37 | void seek(std::size_t size); 38 | 39 | bool is_open() const; 40 | 41 | void clear(); 42 | void swap(Reader &rhs); 43 | 44 | private: 45 | std::FILE *file_; 46 | int fd_; 47 | std::istream *stream_; 48 | bool needs_fclose_; 49 | 50 | void open_(const char *filename); 51 | void open_(std::FILE *file); 52 | void open_(int fd); 53 | void open_(std::istream &stream); 54 | 55 | void read_data(void *buf, std::size_t size); 56 | 57 | // Disallows copy and assignment. 58 | Reader(const Reader &); 59 | Reader &operator=(const Reader &); 60 | }; 61 | 62 | } // namespace io 63 | } // namespace grimoire 64 | } // namespace marisa 65 | 66 | #endif // MARISA_GRIMOIRE_IO_READER_H_ 67 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/io/writer.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef _WIN32 4 | #include 5 | #else // _WIN32 6 | #include 7 | #endif // _WIN32 8 | 9 | #include 10 | 11 | #include "marisa/grimoire/io/writer.h" 12 | 13 | namespace marisa { 14 | namespace grimoire { 15 | namespace io { 16 | 17 | Writer::Writer() 18 | : file_(NULL), fd_(-1), stream_(NULL), needs_fclose_(false) {} 19 | 20 | Writer::~Writer() { 21 | if (needs_fclose_) { 22 | ::fclose(file_); 23 | } 24 | } 25 | 26 | void Writer::open(const char *filename) { 27 | MARISA_THROW_IF(filename == NULL, MARISA_NULL_ERROR); 28 | 29 | Writer temp; 30 | temp.open_(filename); 31 | swap(temp); 32 | } 33 | 34 | void Writer::open(std::FILE *file) { 35 | MARISA_THROW_IF(file == NULL, MARISA_NULL_ERROR); 36 | 37 | Writer temp; 38 | temp.open_(file); 39 | swap(temp); 40 | } 41 | 42 | void Writer::open(int fd) { 43 | MARISA_THROW_IF(fd == -1, MARISA_CODE_ERROR); 44 | 45 | Writer temp; 46 | temp.open_(fd); 47 | swap(temp); 48 | } 49 | 50 | void Writer::open(std::ostream &stream) { 51 | Writer temp; 52 | temp.open_(stream); 53 | swap(temp); 54 | } 55 | 56 | void Writer::clear() { 57 | Writer().swap(*this); 58 | } 59 | 60 | void Writer::swap(Writer &rhs) { 61 | marisa::swap(file_, rhs.file_); 62 | marisa::swap(fd_, rhs.fd_); 63 | marisa::swap(stream_, rhs.stream_); 64 | marisa::swap(needs_fclose_, rhs.needs_fclose_); 65 | } 66 | 67 | void Writer::seek(std::size_t size) { 68 | MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR); 69 | if (size == 0) { 70 | return; 71 | } else if (size <= 16) { 72 | const char buf[16] = {}; 73 | write_data(buf, size); 74 | } else { 75 | const char buf[1024] = {}; 76 | do { 77 | const std::size_t count = (size < sizeof(buf)) ? size : sizeof(buf); 78 | write_data(buf, count); 79 | size -= count; 80 | } while (size != 0); 81 | } 82 | } 83 | 84 | bool Writer::is_open() const { 85 | return (file_ != NULL) || (fd_ != -1) || (stream_ != NULL); 86 | } 87 | 88 | void Writer::open_(const char *filename) { 89 | std::FILE *file = NULL; 90 | #ifdef _MSC_VER 91 | MARISA_THROW_IF(::fopen_s(&file, filename, "wb") != 0, MARISA_IO_ERROR); 92 | #else // _MSC_VER 93 | file = ::fopen(filename, "wb"); 94 | MARISA_THROW_IF(file == NULL, MARISA_IO_ERROR); 95 | #endif // _MSC_VER 96 | file_ = file; 97 | needs_fclose_ = true; 98 | } 99 | 100 | void Writer::open_(std::FILE *file) { 101 | file_ = file; 102 | } 103 | 104 | void Writer::open_(int fd) { 105 | fd_ = fd; 106 | } 107 | 108 | void Writer::open_(std::ostream &stream) { 109 | stream_ = &stream; 110 | } 111 | 112 | void Writer::write_data(const void *data, std::size_t size) { 113 | MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR); 114 | if (size == 0) { 115 | return; 116 | } else if (fd_ != -1) { 117 | while (size != 0) { 118 | #ifdef _WIN32 119 | static const std::size_t CHUNK_SIZE = 120 | std::numeric_limits::max(); 121 | const unsigned int count = (size < CHUNK_SIZE) ? size : CHUNK_SIZE; 122 | const int size_written = ::_write(fd_, data, count); 123 | #else // _WIN32 124 | static const std::size_t CHUNK_SIZE = 125 | std::numeric_limits< ::ssize_t>::max(); 126 | const ::size_t count = (size < CHUNK_SIZE) ? size : CHUNK_SIZE; 127 | const ::ssize_t size_written = ::write(fd_, data, count); 128 | #endif // _WIN32 129 | MARISA_THROW_IF(size_written <= 0, MARISA_IO_ERROR); 130 | data = static_cast(data) + size_written; 131 | size -= size_written; 132 | } 133 | } else if (file_ != NULL) { 134 | MARISA_THROW_IF(::fwrite(data, 1, size, file_) != size, MARISA_IO_ERROR); 135 | MARISA_THROW_IF(::fflush(file_) != 0, MARISA_IO_ERROR); 136 | } else if (stream_ != NULL) { 137 | try { 138 | MARISA_THROW_IF(!stream_->write(static_cast(data), size), 139 | MARISA_IO_ERROR); 140 | } catch (const std::ios_base::failure &) { 141 | MARISA_THROW(MARISA_IO_ERROR, "std::ios_base::failure"); 142 | } 143 | } 144 | } 145 | 146 | } // namespace io 147 | } // namespace grimoire 148 | } // namespace marisa 149 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/io/writer.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_WRITER_H_ 2 | #define MARISA_GRIMOIRE_IO_WRITER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "marisa/base.h" 8 | 9 | namespace marisa { 10 | namespace grimoire { 11 | namespace io { 12 | 13 | class Writer { 14 | public: 15 | Writer(); 16 | ~Writer(); 17 | 18 | void open(const char *filename); 19 | void open(std::FILE *file); 20 | void open(int fd); 21 | void open(std::ostream &stream); 22 | 23 | template 24 | void write(const T &obj) { 25 | write_data(&obj, sizeof(T)); 26 | } 27 | 28 | template 29 | void write(const T *objs, std::size_t num_objs) { 30 | MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR); 31 | MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)), 32 | MARISA_SIZE_ERROR); 33 | write_data(objs, sizeof(T) * num_objs); 34 | } 35 | 36 | void seek(std::size_t size); 37 | 38 | bool is_open() const; 39 | 40 | void clear(); 41 | void swap(Writer &rhs); 42 | 43 | private: 44 | std::FILE *file_; 45 | int fd_; 46 | std::ostream *stream_; 47 | bool needs_fclose_; 48 | 49 | void open_(const char *filename); 50 | void open_(std::FILE *file); 51 | void open_(int fd); 52 | void open_(std::ostream &stream); 53 | 54 | void write_data(const void *data, std::size_t size); 55 | 56 | // Disallows copy and assignment. 57 | Writer(const Writer &); 58 | Writer &operator=(const Writer &); 59 | }; 60 | 61 | } // namespace io 62 | } // namespace grimoire 63 | } // namespace marisa 64 | 65 | #endif // MARISA_GRIMOIRE_IO_WRITER_H_ 66 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_H_ 3 | 4 | #include "marisa/grimoire/trie/state.h" 5 | #include "marisa/grimoire/trie/louds-trie.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | 10 | using trie::State; 11 | using trie::LoudsTrie; 12 | 13 | } // namespace grimoire 14 | } // namespace marisa 15 | 16 | #endif // MARISA_GRIMOIRE_TRIE_H_ 17 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/Makefile.am: -------------------------------------------------------------------------------- 1 | MY_INCLUDE = -I$(top_srcdir)/include -I$(top_srcdir)/lib 2 | 3 | AM_CXXFLAGS = -Wall -Weffc++ -Wextra -Wconversion $(MY_INCLUDE) 4 | 5 | noinst_LTLIBRARIES = libtrie.la 6 | 7 | libtrie_la_LDFLAGS = -no-undefined 8 | 9 | libtrie_la_SOURCES = \ 10 | tail.cc \ 11 | louds-trie.cc 12 | 13 | noinst_HEADERS = \ 14 | config.h \ 15 | header.h \ 16 | key.h \ 17 | range.h \ 18 | entry.h \ 19 | tail.h \ 20 | cache.h \ 21 | history.h \ 22 | state.h \ 23 | louds-trie.h 24 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/cache.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_CACHE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_CACHE_H_ 3 | 4 | #include 5 | 6 | #include "marisa/base.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | namespace trie { 11 | 12 | class Cache { 13 | public: 14 | Cache() : parent_(0), child_(0), union_() { 15 | union_.weight = FLT_MIN; 16 | } 17 | Cache(const Cache &cache) 18 | : parent_(cache.parent_), child_(cache.child_), union_(cache.union_) {} 19 | 20 | Cache &operator=(const Cache &cache) { 21 | parent_ = cache.parent_; 22 | child_ = cache.child_; 23 | union_ = cache.union_; 24 | return *this; 25 | } 26 | 27 | void set_parent(std::size_t parent) { 28 | MARISA_DEBUG_IF(parent > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 29 | parent_ = (UInt32)parent; 30 | } 31 | void set_child(std::size_t child) { 32 | MARISA_DEBUG_IF(child > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 33 | child_ = (UInt32)child; 34 | } 35 | void set_base(UInt8 base) { 36 | union_.link = (union_.link & ~0xFFU) | base; 37 | } 38 | void set_extra(std::size_t extra) { 39 | MARISA_DEBUG_IF(extra > (MARISA_UINT32_MAX >> 8), MARISA_SIZE_ERROR); 40 | union_.link = (UInt32)((union_.link & 0xFFU) | (extra << 8)); 41 | } 42 | void set_weight(float weight) { 43 | union_.weight = weight; 44 | } 45 | 46 | std::size_t parent() const { 47 | return parent_; 48 | } 49 | std::size_t child() const { 50 | return child_; 51 | } 52 | UInt8 base() const { 53 | return (UInt8)(union_.link & 0xFFU); 54 | } 55 | std::size_t extra() const { 56 | return union_.link >> 8; 57 | } 58 | char label() const { 59 | return (char)base(); 60 | } 61 | std::size_t link() const { 62 | return union_.link; 63 | } 64 | float weight() const { 65 | return union_.weight; 66 | } 67 | 68 | private: 69 | UInt32 parent_; 70 | UInt32 child_; 71 | union Union { 72 | UInt32 link; 73 | float weight; 74 | } union_; 75 | }; 76 | 77 | } // namespace trie 78 | } // namespace grimoire 79 | } // namespace marisa 80 | 81 | #endif // MARISA_GRIMOIRE_TRIE_CACHE_H_ 82 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/config.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_CONFIG_H_ 2 | #define MARISA_GRIMOIRE_TRIE_CONFIG_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Config { 11 | public: 12 | Config() 13 | : num_tries_(MARISA_DEFAULT_NUM_TRIES), 14 | cache_level_(MARISA_DEFAULT_CACHE), 15 | tail_mode_(MARISA_DEFAULT_TAIL), 16 | node_order_(MARISA_DEFAULT_ORDER) {} 17 | 18 | void parse(int config_flags) { 19 | Config temp; 20 | temp.parse_(config_flags); 21 | swap(temp); 22 | } 23 | 24 | int flags() const { 25 | return (int)num_tries_ | tail_mode_ | node_order_; 26 | } 27 | 28 | std::size_t num_tries() const { 29 | return num_tries_; 30 | } 31 | CacheLevel cache_level() const { 32 | return cache_level_; 33 | } 34 | TailMode tail_mode() const { 35 | return tail_mode_; 36 | } 37 | NodeOrder node_order() const { 38 | return node_order_; 39 | } 40 | 41 | void clear() { 42 | Config().swap(*this); 43 | } 44 | void swap(Config &rhs) { 45 | marisa::swap(num_tries_, rhs.num_tries_); 46 | marisa::swap(cache_level_, rhs.cache_level_); 47 | marisa::swap(tail_mode_, rhs.tail_mode_); 48 | marisa::swap(node_order_, rhs.node_order_); 49 | } 50 | 51 | private: 52 | std::size_t num_tries_; 53 | CacheLevel cache_level_; 54 | TailMode tail_mode_; 55 | NodeOrder node_order_; 56 | 57 | void parse_(int config_flags) { 58 | MARISA_THROW_IF((config_flags & ~MARISA_CONFIG_MASK) != 0, 59 | MARISA_CODE_ERROR); 60 | 61 | parse_num_tries(config_flags); 62 | parse_cache_level(config_flags); 63 | parse_tail_mode(config_flags); 64 | parse_node_order(config_flags); 65 | } 66 | 67 | void parse_num_tries(int config_flags) { 68 | const int num_tries = config_flags & MARISA_NUM_TRIES_MASK; 69 | if (num_tries != 0) { 70 | num_tries_ = num_tries; 71 | } 72 | } 73 | 74 | void parse_cache_level(int config_flags) { 75 | switch (config_flags & MARISA_CACHE_LEVEL_MASK) { 76 | case 0: { 77 | cache_level_ = MARISA_DEFAULT_CACHE; 78 | break; 79 | } 80 | case MARISA_HUGE_CACHE: { 81 | cache_level_ = MARISA_HUGE_CACHE; 82 | break; 83 | } 84 | case MARISA_LARGE_CACHE: { 85 | cache_level_ = MARISA_LARGE_CACHE; 86 | break; 87 | } 88 | case MARISA_NORMAL_CACHE: { 89 | cache_level_ = MARISA_NORMAL_CACHE; 90 | break; 91 | } 92 | case MARISA_SMALL_CACHE: { 93 | cache_level_ = MARISA_SMALL_CACHE; 94 | break; 95 | } 96 | case MARISA_TINY_CACHE: { 97 | cache_level_ = MARISA_TINY_CACHE; 98 | break; 99 | } 100 | default: { 101 | MARISA_THROW(MARISA_CODE_ERROR, "undefined cache level"); 102 | } 103 | } 104 | } 105 | 106 | void parse_tail_mode(int config_flags) { 107 | switch (config_flags & MARISA_TAIL_MODE_MASK) { 108 | case 0: { 109 | tail_mode_ = MARISA_DEFAULT_TAIL; 110 | break; 111 | } 112 | case MARISA_TEXT_TAIL: { 113 | tail_mode_ = MARISA_TEXT_TAIL; 114 | break; 115 | } 116 | case MARISA_BINARY_TAIL: { 117 | tail_mode_ = MARISA_BINARY_TAIL; 118 | break; 119 | } 120 | default: { 121 | MARISA_THROW(MARISA_CODE_ERROR, "undefined tail mode"); 122 | } 123 | } 124 | } 125 | 126 | void parse_node_order(int config_flags) { 127 | switch (config_flags & MARISA_NODE_ORDER_MASK) { 128 | case 0: { 129 | node_order_ = MARISA_DEFAULT_ORDER; 130 | break; 131 | } 132 | case MARISA_LABEL_ORDER: { 133 | node_order_ = MARISA_LABEL_ORDER; 134 | break; 135 | } 136 | case MARISA_WEIGHT_ORDER: { 137 | node_order_ = MARISA_WEIGHT_ORDER; 138 | break; 139 | } 140 | default: { 141 | MARISA_THROW(MARISA_CODE_ERROR, "undefined node order"); 142 | } 143 | } 144 | } 145 | 146 | // Disallows copy and assignment. 147 | Config(const Config &); 148 | Config &operator=(const Config &); 149 | }; 150 | 151 | } // namespace trie 152 | } // namespace grimoire 153 | } // namespace marisa 154 | 155 | #endif // MARISA_GRIMOIRE_TRIE_CONFIG_H_ 156 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/entry.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_ENTRY_H_ 2 | #define MARISA_GRIMOIRE_TRIE_ENTRY_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Entry { 11 | public: 12 | Entry() 13 | : ptr_(static_cast(NULL) - 1), length_(0), id_(0) {} 14 | Entry(const Entry &entry) 15 | : ptr_(entry.ptr_), length_(entry.length_), id_(entry.id_) {} 16 | 17 | Entry &operator=(const Entry &entry) { 18 | ptr_ = entry.ptr_; 19 | length_ = entry.length_; 20 | id_ = entry.id_; 21 | return *this; 22 | } 23 | 24 | char operator[](std::size_t i) const { 25 | MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR); 26 | return *(ptr_ - i); 27 | } 28 | 29 | void set_str(const char *ptr, std::size_t length) { 30 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 31 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 32 | ptr_ = ptr + length - 1; 33 | length_ = (UInt32)length; 34 | } 35 | void set_id(std::size_t id) { 36 | MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 37 | id_ = (UInt32)id; 38 | } 39 | 40 | const char *ptr() const { 41 | return ptr_ - length_ + 1; 42 | } 43 | std::size_t length() const { 44 | return length_; 45 | } 46 | std::size_t id() const { 47 | return id_; 48 | } 49 | 50 | class StringComparer { 51 | public: 52 | bool operator()(const Entry &lhs, const Entry &rhs) const { 53 | for (std::size_t i = 0; i < lhs.length(); ++i) { 54 | if (i == rhs.length()) { 55 | return true; 56 | } 57 | if (lhs[i] != rhs[i]) { 58 | return (UInt8)lhs[i] > (UInt8)rhs[i]; 59 | } 60 | } 61 | return lhs.length() > rhs.length(); 62 | } 63 | }; 64 | 65 | class IDComparer { 66 | public: 67 | bool operator()(const Entry &lhs, const Entry &rhs) const { 68 | return lhs.id_ < rhs.id_; 69 | } 70 | }; 71 | 72 | private: 73 | const char *ptr_; 74 | UInt32 length_; 75 | UInt32 id_; 76 | }; 77 | 78 | } // namespace trie 79 | } // namespace grimoire 80 | } // namespace marisa 81 | 82 | #endif // MARISA_GRIMOIRE_TRIE_ENTRY_H_ 83 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/header.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_HEADER_H_ 2 | #define MARISA_GRIMOIRE_TRIE_HEADER_H_ 3 | 4 | #include "marisa/grimoire/io.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Header { 11 | public: 12 | enum { 13 | HEADER_SIZE = 16 14 | }; 15 | 16 | Header() {} 17 | 18 | void map(Mapper &mapper) { 19 | const char *ptr; 20 | mapper.map(&ptr, HEADER_SIZE); 21 | MARISA_THROW_IF(!test_header(ptr), MARISA_FORMAT_ERROR); 22 | } 23 | void read(Reader &reader) { 24 | char buf[HEADER_SIZE]; 25 | reader.read(buf, HEADER_SIZE); 26 | MARISA_THROW_IF(!test_header(buf), MARISA_FORMAT_ERROR); 27 | } 28 | void write(Writer &writer) const { 29 | writer.write(get_header(), HEADER_SIZE); 30 | } 31 | 32 | std::size_t io_size() const { 33 | return HEADER_SIZE; 34 | } 35 | 36 | private: 37 | 38 | static const char *get_header() { 39 | static const char buf[HEADER_SIZE] = "We love Marisa."; 40 | return buf; 41 | } 42 | 43 | static bool test_header(const char *ptr) { 44 | for (std::size_t i = 0; i < HEADER_SIZE; ++i) { 45 | if (ptr[i] != get_header()[i]) { 46 | return false; 47 | } 48 | } 49 | return true; 50 | } 51 | 52 | // Disallows copy and assignment. 53 | Header(const Header &); 54 | Header &operator=(const Header &); 55 | }; 56 | 57 | } // namespace trie 58 | } // namespace marisa 59 | } // namespace grimoire 60 | 61 | #endif // MARISA_GRIMOIRE_TRIE_HEADER_H_ 62 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/history.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_STATE_HISTORY_H_ 2 | #define MARISA_GRIMOIRE_TRIE_STATE_HISTORY_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class History { 11 | public: 12 | History() 13 | : node_id_(0), louds_pos_(0), key_pos_(0), 14 | link_id_(MARISA_INVALID_LINK_ID), key_id_(MARISA_INVALID_KEY_ID) {} 15 | 16 | void set_node_id(std::size_t node_id) { 17 | MARISA_DEBUG_IF(node_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 18 | node_id_ = (UInt32)node_id; 19 | } 20 | void set_louds_pos(std::size_t louds_pos) { 21 | MARISA_DEBUG_IF(louds_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 22 | louds_pos_ = (UInt32)louds_pos; 23 | } 24 | void set_key_pos(std::size_t key_pos) { 25 | MARISA_DEBUG_IF(key_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 26 | key_pos_ = (UInt32)key_pos; 27 | } 28 | void set_link_id(std::size_t link_id) { 29 | MARISA_DEBUG_IF(link_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 30 | link_id_ = (UInt32)link_id; 31 | } 32 | void set_key_id(std::size_t key_id) { 33 | MARISA_DEBUG_IF(key_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 34 | key_id_ = (UInt32)key_id; 35 | } 36 | 37 | std::size_t node_id() const { 38 | return node_id_; 39 | } 40 | std::size_t louds_pos() const { 41 | return louds_pos_; 42 | } 43 | std::size_t key_pos() const { 44 | return key_pos_; 45 | } 46 | std::size_t link_id() const { 47 | return link_id_; 48 | } 49 | std::size_t key_id() const { 50 | return key_id_; 51 | } 52 | 53 | private: 54 | UInt32 node_id_; 55 | UInt32 louds_pos_; 56 | UInt32 key_pos_; 57 | UInt32 link_id_; 58 | UInt32 key_id_; 59 | }; 60 | 61 | } // namespace trie 62 | } // namespace grimoire 63 | } // namespace marisa 64 | 65 | #endif // MARISA_GRIMOIRE_TRIE_STATE_HISTORY_H_ 66 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/louds-trie.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_LOUDS_TRIE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_LOUDS_TRIE_H_ 3 | 4 | #include "marisa/keyset.h" 5 | #include "marisa/agent.h" 6 | #include "marisa/grimoire/vector.h" 7 | #include "marisa/grimoire/trie/config.h" 8 | #include "marisa/grimoire/trie/key.h" 9 | #include "marisa/grimoire/trie/tail.h" 10 | #include "marisa/grimoire/trie/cache.h" 11 | 12 | namespace marisa { 13 | namespace grimoire { 14 | namespace trie { 15 | 16 | class LoudsTrie { 17 | public: 18 | LoudsTrie(); 19 | ~LoudsTrie(); 20 | 21 | void build(Keyset &keyset, int flags); 22 | 23 | void map(Mapper &mapper); 24 | void read(Reader &reader); 25 | void write(Writer &writer) const; 26 | 27 | bool lookup(Agent &agent) const; 28 | void reverse_lookup(Agent &agent) const; 29 | bool common_prefix_search(Agent &agent) const; 30 | bool predictive_search(Agent &agent) const; 31 | 32 | std::size_t num_tries() const { 33 | return config_.num_tries(); 34 | } 35 | std::size_t num_keys() const { 36 | return size(); 37 | } 38 | std::size_t num_nodes() const { 39 | return (louds_.size() / 2) - 1; 40 | } 41 | 42 | CacheLevel cache_level() const { 43 | return config_.cache_level(); 44 | } 45 | TailMode tail_mode() const { 46 | return config_.tail_mode(); 47 | } 48 | NodeOrder node_order() const { 49 | return config_.node_order(); 50 | } 51 | 52 | bool empty() const { 53 | return size() == 0; 54 | } 55 | std::size_t size() const { 56 | return terminal_flags_.num_1s(); 57 | } 58 | std::size_t total_size() const; 59 | std::size_t io_size() const; 60 | 61 | void clear(); 62 | void swap(LoudsTrie &rhs); 63 | 64 | private: 65 | BitVector louds_; 66 | BitVector terminal_flags_; 67 | BitVector link_flags_; 68 | Vector bases_; 69 | FlatVector extras_; 70 | Tail tail_; 71 | scoped_ptr next_trie_; 72 | Vector cache_; 73 | std::size_t cache_mask_; 74 | std::size_t num_l1_nodes_; 75 | Config config_; 76 | Mapper mapper_; 77 | 78 | void build_(Keyset &keyset, const Config &config); 79 | 80 | template 81 | void build_trie(Vector &keys, 82 | Vector *terminals, const Config &config, std::size_t trie_id); 83 | template 84 | void build_current_trie(Vector &keys, 85 | Vector *terminals, const Config &config, std::size_t trie_id); 86 | template 87 | void build_next_trie(Vector &keys, 88 | Vector *terminals, const Config &config, std::size_t trie_id); 89 | template 90 | void build_terminals(const Vector &keys, 91 | Vector *terminals) const; 92 | 93 | void reserve_cache(const Config &config, std::size_t trie_id, 94 | std::size_t num_keys); 95 | template 96 | void cache(std::size_t parent, std::size_t child, 97 | float weight, char label); 98 | void fill_cache(); 99 | 100 | void map_(Mapper &mapper); 101 | void read_(Reader &reader); 102 | void write_(Writer &writer) const; 103 | 104 | inline bool find_child(Agent &agent) const; 105 | inline bool predictive_find_child(Agent &agent) const; 106 | 107 | inline void restore(Agent &agent, std::size_t node_id) const; 108 | inline bool match(Agent &agent, std::size_t node_id) const; 109 | inline bool prefix_match(Agent &agent, std::size_t node_id) const; 110 | 111 | void restore_(Agent &agent, std::size_t node_id) const; 112 | bool match_(Agent &agent, std::size_t node_id) const; 113 | bool prefix_match_(Agent &agent, std::size_t node_id) const; 114 | 115 | inline std::size_t get_cache_id(std::size_t node_id, char label) const; 116 | inline std::size_t get_cache_id(std::size_t node_id) const; 117 | 118 | inline std::size_t get_link(std::size_t node_id) const; 119 | inline std::size_t get_link(std::size_t node_id, 120 | std::size_t link_id) const; 121 | 122 | inline std::size_t update_link_id(std::size_t link_id, 123 | std::size_t node_id) const; 124 | 125 | // Disallows copy and assignment. 126 | LoudsTrie(const LoudsTrie &); 127 | LoudsTrie &operator=(const LoudsTrie &); 128 | }; 129 | 130 | } // namespace trie 131 | } // namespace grimoire 132 | } // namespace marisa 133 | 134 | #endif // MARISA_GRIMOIRE_TRIE_LOUDS_TRIE_H_ 135 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/range.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_RANGE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_RANGE_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Range { 11 | public: 12 | Range() : begin_(0), end_(0), key_pos_(0) {} 13 | 14 | void set_begin(std::size_t begin) { 15 | MARISA_DEBUG_IF(begin > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 16 | begin_ = static_cast(begin); 17 | } 18 | void set_end(std::size_t end) { 19 | MARISA_DEBUG_IF(end > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 20 | end_ = static_cast(end); 21 | } 22 | void set_key_pos(std::size_t key_pos) { 23 | MARISA_DEBUG_IF(key_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 24 | key_pos_ = static_cast(key_pos); 25 | } 26 | 27 | std::size_t begin() const { 28 | return begin_; 29 | } 30 | std::size_t end() const { 31 | return end_; 32 | } 33 | std::size_t key_pos() const { 34 | return key_pos_; 35 | } 36 | 37 | private: 38 | UInt32 begin_; 39 | UInt32 end_; 40 | UInt32 key_pos_; 41 | }; 42 | 43 | inline Range make_range(std::size_t begin, std::size_t end, 44 | std::size_t key_pos) { 45 | Range range; 46 | range.set_begin(begin); 47 | range.set_end(end); 48 | range.set_key_pos(key_pos); 49 | return range; 50 | } 51 | 52 | class WeightedRange { 53 | public: 54 | WeightedRange() : range_(), weight_(0.0F) {} 55 | 56 | void set_range(const Range &range) { 57 | range_ = range; 58 | } 59 | void set_begin(std::size_t begin) { 60 | range_.set_begin(begin); 61 | } 62 | void set_end(std::size_t end) { 63 | range_.set_end(end); 64 | } 65 | void set_key_pos(std::size_t key_pos) { 66 | range_.set_key_pos(key_pos); 67 | } 68 | void set_weight(float weight) { 69 | weight_ = weight; 70 | } 71 | 72 | const Range &range() const { 73 | return range_; 74 | } 75 | std::size_t begin() const { 76 | return range_.begin(); 77 | } 78 | std::size_t end() const { 79 | return range_.end(); 80 | } 81 | std::size_t key_pos() const { 82 | return range_.key_pos(); 83 | } 84 | float weight() const { 85 | return weight_; 86 | } 87 | 88 | private: 89 | Range range_; 90 | float weight_; 91 | }; 92 | 93 | inline bool operator<(const WeightedRange &lhs, const WeightedRange &rhs) { 94 | return lhs.weight() < rhs.weight(); 95 | } 96 | 97 | inline bool operator>(const WeightedRange &lhs, const WeightedRange &rhs) { 98 | return lhs.weight() > rhs.weight(); 99 | } 100 | 101 | inline WeightedRange make_weighted_range(std::size_t begin, std::size_t end, 102 | std::size_t key_pos, float weight) { 103 | WeightedRange range; 104 | range.set_begin(begin); 105 | range.set_end(end); 106 | range.set_key_pos(key_pos); 107 | range.set_weight(weight); 108 | return range; 109 | } 110 | 111 | } // namespace trie 112 | } // namespace grimoire 113 | } // namespace marisa 114 | 115 | #endif // MARISA_GRIMOIRE_TRIE_RANGE_H_ 116 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/state.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_STATE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_STATE_H_ 3 | 4 | #include "marisa/grimoire/vector.h" 5 | #include "marisa/grimoire/trie/history.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace trie { 10 | 11 | // A search agent has its internal state and the status codes are defined 12 | // below. 13 | typedef enum StatusCode { 14 | MARISA_READY_TO_ALL, 15 | MARISA_READY_TO_COMMON_PREFIX_SEARCH, 16 | MARISA_READY_TO_PREDICTIVE_SEARCH, 17 | MARISA_END_OF_COMMON_PREFIX_SEARCH, 18 | MARISA_END_OF_PREDICTIVE_SEARCH, 19 | } StatusCode; 20 | 21 | class State { 22 | public: 23 | State() 24 | : key_buf_(), history_(), node_id_(0), query_pos_(0), 25 | history_pos_(0), status_code_(MARISA_READY_TO_ALL) {} 26 | 27 | void set_node_id(std::size_t node_id) { 28 | MARISA_DEBUG_IF(node_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 29 | node_id_ = (UInt32)node_id; 30 | } 31 | void set_query_pos(std::size_t query_pos) { 32 | MARISA_DEBUG_IF(query_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 33 | query_pos_ = (UInt32)query_pos; 34 | } 35 | void set_history_pos(std::size_t history_pos) { 36 | MARISA_DEBUG_IF(history_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 37 | history_pos_ = (UInt32)history_pos; 38 | } 39 | void set_status_code(StatusCode status_code) { 40 | status_code_ = status_code; 41 | } 42 | 43 | std::size_t node_id() const { 44 | return node_id_; 45 | } 46 | std::size_t query_pos() const { 47 | return query_pos_; 48 | } 49 | std::size_t history_pos() const { 50 | return history_pos_; 51 | } 52 | StatusCode status_code() const { 53 | return status_code_; 54 | } 55 | 56 | const Vector &key_buf() const { 57 | return key_buf_; 58 | } 59 | const Vector &history() const { 60 | return history_; 61 | } 62 | 63 | Vector &key_buf() { 64 | return key_buf_; 65 | } 66 | Vector &history() { 67 | return history_; 68 | } 69 | 70 | void reset() { 71 | status_code_ = MARISA_READY_TO_ALL; 72 | } 73 | 74 | void lookup_init() { 75 | node_id_ = 0; 76 | query_pos_ = 0; 77 | status_code_ = MARISA_READY_TO_ALL; 78 | } 79 | void reverse_lookup_init() { 80 | key_buf_.resize(0); 81 | key_buf_.reserve(32); 82 | status_code_ = MARISA_READY_TO_ALL; 83 | } 84 | void common_prefix_search_init() { 85 | node_id_ = 0; 86 | query_pos_ = 0; 87 | status_code_ = MARISA_READY_TO_COMMON_PREFIX_SEARCH; 88 | } 89 | void predictive_search_init() { 90 | key_buf_.resize(0); 91 | key_buf_.reserve(64); 92 | history_.resize(0); 93 | history_.reserve(4); 94 | node_id_ = 0; 95 | query_pos_ = 0; 96 | history_pos_ = 0; 97 | status_code_ = MARISA_READY_TO_PREDICTIVE_SEARCH; 98 | } 99 | 100 | private: 101 | Vector key_buf_; 102 | Vector history_; 103 | UInt32 node_id_; 104 | UInt32 query_pos_; 105 | UInt32 history_pos_; 106 | StatusCode status_code_; 107 | 108 | // Disallows copy and assignment. 109 | State(const State &); 110 | State &operator=(const State &); 111 | }; 112 | 113 | } // namespace trie 114 | } // namespace grimoire 115 | } // namespace marisa 116 | 117 | #endif // MARISA_GRIMOIRE_TRIE_STATE_H_ 118 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/trie/tail.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_TAIL_H_ 2 | #define MARISA_GRIMOIRE_TRIE_TAIL_H_ 3 | 4 | #include "marisa/agent.h" 5 | #include "marisa/grimoire/vector.h" 6 | #include "marisa/grimoire/trie/entry.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | namespace trie { 11 | 12 | class Tail { 13 | public: 14 | Tail(); 15 | 16 | void build(Vector &entries, Vector *offsets, 17 | TailMode mode); 18 | 19 | void map(Mapper &mapper); 20 | void read(Reader &reader); 21 | void write(Writer &writer) const; 22 | 23 | void restore(Agent &agent, std::size_t offset) const; 24 | bool match(Agent &agent, std::size_t offset) const; 25 | bool prefix_match(Agent &agent, std::size_t offset) const; 26 | 27 | const char &operator[](std::size_t offset) const { 28 | MARISA_DEBUG_IF(offset >= buf_.size(), MARISA_BOUND_ERROR); 29 | return buf_[offset]; 30 | } 31 | 32 | TailMode mode() const { 33 | return end_flags_.empty() ? MARISA_TEXT_TAIL : MARISA_BINARY_TAIL; 34 | } 35 | 36 | bool empty() const { 37 | return buf_.empty(); 38 | } 39 | std::size_t size() const { 40 | return buf_.size(); 41 | } 42 | std::size_t total_size() const { 43 | return buf_.total_size() + end_flags_.total_size(); 44 | } 45 | std::size_t io_size() const { 46 | return buf_.io_size() + end_flags_.io_size(); 47 | } 48 | 49 | void clear(); 50 | void swap(Tail &rhs); 51 | 52 | private: 53 | Vector buf_; 54 | BitVector end_flags_; 55 | 56 | void build_(Vector &entries, Vector *offsets, 57 | TailMode mode); 58 | 59 | void map_(Mapper &mapper); 60 | void read_(Reader &reader); 61 | void write_(Writer &writer) const; 62 | 63 | // Disallows copy and assignment. 64 | Tail(const Tail &); 65 | Tail &operator=(const Tail &); 66 | }; 67 | 68 | } // namespace trie 69 | } // namespace grimoire 70 | } // namespace marisa 71 | 72 | #endif // MARISA_GRIMOIRE_TRIE_TAIL_H_ 73 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/vector.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_H_ 3 | 4 | #include "marisa/grimoire/vector/vector.h" 5 | #include "marisa/grimoire/vector/flat-vector.h" 6 | #include "marisa/grimoire/vector/bit-vector.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | 11 | using vector::Vector; 12 | typedef vector::FlatVector FlatVector; 13 | typedef vector::BitVector BitVector; 14 | 15 | } // namespace grimoire 16 | } // namespace marisa 17 | 18 | #endif // MARISA_GRIMOIRE_VECTOR_H_ 19 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/vector/Makefile.am: -------------------------------------------------------------------------------- 1 | MY_INCLUDE = -I$(top_srcdir)/include -I$(top_srcdir)/lib 2 | 3 | AM_CXXFLAGS = -Wall -Weffc++ -Wextra -Wconversion $(MY_INCLUDE) 4 | 5 | noinst_LTLIBRARIES = libvector.la 6 | 7 | libvector_la_LDFLAGS = -no-undefined 8 | 9 | libvector_la_SOURCES = \ 10 | bit-vector.cc 11 | 12 | noinst_HEADERS = \ 13 | pop-count.h \ 14 | rank-index.h \ 15 | vector.h \ 16 | flat-vector.h \ 17 | bit-vector.h 18 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/vector/bit-vector.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_BIT_VECTOR_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_BIT_VECTOR_H_ 3 | 4 | #include "marisa/grimoire/vector/rank-index.h" 5 | #include "marisa/grimoire/vector/vector.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace vector { 10 | 11 | class BitVector { 12 | public: 13 | #if MARISA_WORD_SIZE == 64 14 | typedef UInt64 Unit; 15 | #else // MARISA_WORD_SIZE == 64 16 | typedef UInt32 Unit; 17 | #endif // MARISA_WORD_SIZE == 64 18 | 19 | BitVector() 20 | : units_(), size_(0), num_1s_(0), ranks_(), select0s_(), select1s_() {} 21 | 22 | void build(bool enables_select0, bool enables_select1) { 23 | BitVector temp; 24 | temp.build_index(*this, enables_select0, enables_select1); 25 | units_.shrink(); 26 | temp.units_.swap(units_); 27 | swap(temp); 28 | } 29 | 30 | void map(Mapper &mapper) { 31 | BitVector temp; 32 | temp.map_(mapper); 33 | swap(temp); 34 | } 35 | void read(Reader &reader) { 36 | BitVector temp; 37 | temp.read_(reader); 38 | swap(temp); 39 | } 40 | void write(Writer &writer) const { 41 | write_(writer); 42 | } 43 | 44 | void disable_select0() { 45 | select0s_.clear(); 46 | } 47 | void disable_select1() { 48 | select1s_.clear(); 49 | } 50 | 51 | void push_back(bool bit) { 52 | MARISA_THROW_IF(size_ == MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 53 | if (size_ == (MARISA_WORD_SIZE * units_.size())) { 54 | units_.resize(units_.size() + (64 / MARISA_WORD_SIZE), 0); 55 | } 56 | if (bit) { 57 | units_[size_ / MARISA_WORD_SIZE] |= 58 | (Unit)1 << (size_ % MARISA_WORD_SIZE); 59 | ++num_1s_; 60 | } 61 | ++size_; 62 | } 63 | 64 | bool operator[](std::size_t i) const { 65 | MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR); 66 | return (units_[i / MARISA_WORD_SIZE] 67 | & ((Unit)1 << (i % MARISA_WORD_SIZE))) != 0; 68 | } 69 | 70 | std::size_t rank0(std::size_t i) const { 71 | MARISA_DEBUG_IF(ranks_.empty(), MARISA_STATE_ERROR); 72 | MARISA_DEBUG_IF(i > size_, MARISA_BOUND_ERROR); 73 | return i - rank1(i); 74 | } 75 | std::size_t rank1(std::size_t i) const; 76 | 77 | std::size_t select0(std::size_t i) const; 78 | std::size_t select1(std::size_t i) const; 79 | 80 | std::size_t num_0s() const { 81 | return size_ - num_1s_; 82 | } 83 | std::size_t num_1s() const { 84 | return num_1s_; 85 | } 86 | 87 | bool empty() const { 88 | return size_ == 0; 89 | } 90 | std::size_t size() const { 91 | return size_; 92 | } 93 | std::size_t total_size() const { 94 | return units_.total_size() + ranks_.total_size() 95 | + select0s_.total_size() + select1s_.total_size(); 96 | } 97 | std::size_t io_size() const { 98 | return units_.io_size() + (sizeof(UInt32) * 2) + ranks_.io_size() 99 | + select0s_.io_size() + select1s_.io_size(); 100 | } 101 | 102 | void clear() { 103 | BitVector().swap(*this); 104 | } 105 | void swap(BitVector &rhs) { 106 | units_.swap(rhs.units_); 107 | marisa::swap(size_, rhs.size_); 108 | marisa::swap(num_1s_, rhs.num_1s_); 109 | ranks_.swap(rhs.ranks_); 110 | select0s_.swap(rhs.select0s_); 111 | select1s_.swap(rhs.select1s_); 112 | } 113 | 114 | private: 115 | Vector units_; 116 | std::size_t size_; 117 | std::size_t num_1s_; 118 | Vector ranks_; 119 | Vector select0s_; 120 | Vector select1s_; 121 | 122 | void build_index(const BitVector &bv, 123 | bool enables_select0, bool enables_select1); 124 | 125 | void map_(Mapper &mapper) { 126 | units_.map(mapper); 127 | { 128 | UInt32 temp_size; 129 | mapper.map(&temp_size); 130 | size_ = temp_size; 131 | } 132 | { 133 | UInt32 temp_num_1s; 134 | mapper.map(&temp_num_1s); 135 | MARISA_THROW_IF(temp_num_1s > size_, MARISA_FORMAT_ERROR); 136 | num_1s_ = temp_num_1s; 137 | } 138 | ranks_.map(mapper); 139 | select0s_.map(mapper); 140 | select1s_.map(mapper); 141 | } 142 | 143 | void read_(Reader &reader) { 144 | units_.read(reader); 145 | { 146 | UInt32 temp_size; 147 | reader.read(&temp_size); 148 | size_ = temp_size; 149 | } 150 | { 151 | UInt32 temp_num_1s; 152 | reader.read(&temp_num_1s); 153 | MARISA_THROW_IF(temp_num_1s > size_, MARISA_FORMAT_ERROR); 154 | num_1s_ = temp_num_1s; 155 | } 156 | ranks_.read(reader); 157 | select0s_.read(reader); 158 | select1s_.read(reader); 159 | } 160 | 161 | void write_(Writer &writer) const { 162 | units_.write(writer); 163 | writer.write((UInt32)size_); 164 | writer.write((UInt32)num_1s_); 165 | ranks_.write(writer); 166 | select0s_.write(writer); 167 | select1s_.write(writer); 168 | } 169 | 170 | // Disallows copy and assignment. 171 | BitVector(const BitVector &); 172 | BitVector &operator=(const BitVector &); 173 | }; 174 | 175 | } // namespace vector 176 | } // namespace grimoire 177 | } // namespace marisa 178 | 179 | #endif // MARISA_GRIMOIRE_VECTOR_BIT_VECTOR_H_ 180 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/vector/pop-count.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_ 3 | 4 | #include "marisa/grimoire/intrin.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace vector { 9 | 10 | #if MARISA_WORD_SIZE == 64 11 | 12 | class PopCount { 13 | public: 14 | explicit PopCount(UInt64 x) : value_() { 15 | x = (x & 0x5555555555555555ULL) + ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1); 16 | x = (x & 0x3333333333333333ULL) + ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2); 17 | x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4); 18 | x *= 0x0101010101010101ULL; 19 | value_ = x; 20 | } 21 | 22 | std::size_t lo8() const { 23 | return (std::size_t)(value_ & 0xFFU); 24 | } 25 | std::size_t lo16() const { 26 | return (std::size_t)((value_ >> 8) & 0xFFU); 27 | } 28 | std::size_t lo24() const { 29 | return (std::size_t)((value_ >> 16) & 0xFFU); 30 | } 31 | std::size_t lo32() const { 32 | return (std::size_t)((value_ >> 24) & 0xFFU); 33 | } 34 | std::size_t lo40() const { 35 | return (std::size_t)((value_ >> 32) & 0xFFU); 36 | } 37 | std::size_t lo48() const { 38 | return (std::size_t)((value_ >> 40) & 0xFFU); 39 | } 40 | std::size_t lo56() const { 41 | return (std::size_t)((value_ >> 48) & 0xFFU); 42 | } 43 | std::size_t lo64() const { 44 | return (std::size_t)((value_ >> 56) & 0xFFU); 45 | } 46 | 47 | static std::size_t count(UInt64 x) { 48 | #if defined(MARISA_X64) && defined(MARISA_USE_POPCNT) 49 | #ifdef _MSC_VER 50 | return __popcnt64(x); 51 | #else // _MSC_VER 52 | return _mm_popcnt_u64(x); 53 | #endif // _MSC_VER 54 | #else // defined(MARISA_X64) && defined(MARISA_USE_POPCNT) 55 | return PopCount(x).lo64(); 56 | #endif // defined(MARISA_X64) && defined(MARISA_USE_POPCNT) 57 | } 58 | 59 | private: 60 | UInt64 value_; 61 | }; 62 | 63 | #else // MARISA_WORD_SIZE == 64 64 | 65 | class PopCount { 66 | public: 67 | explicit PopCount(UInt32 x) : value_() { 68 | x = (x & 0x55555555U) + ((x & 0xAAAAAAAAU) >> 1); 69 | x = (x & 0x33333333U) + ((x & 0xCCCCCCCCU) >> 2); 70 | x = (x & 0x0F0F0F0FU) + ((x & 0xF0F0F0F0U) >> 4); 71 | x *= 0x01010101U; 72 | value_ = x; 73 | } 74 | 75 | std::size_t lo8() const { 76 | return value_ & 0xFFU; 77 | } 78 | std::size_t lo16() const { 79 | return (value_ >> 8) & 0xFFU; 80 | } 81 | std::size_t lo24() const { 82 | return (value_ >> 16) & 0xFFU; 83 | } 84 | std::size_t lo32() const { 85 | return (value_ >> 24) & 0xFFU; 86 | } 87 | 88 | static std::size_t count(UInt32 x) { 89 | #ifdef MARISA_USE_POPCNT 90 | #ifdef _MSC_VER 91 | return __popcnt(x); 92 | #else // _MSC_VER 93 | return _mm_popcnt_u32(x); 94 | #endif // _MSC_VER 95 | #else // MARISA_USE_POPCNT 96 | return PopCount(x).lo32(); 97 | #endif // MARISA_USE_POPCNT 98 | } 99 | 100 | private: 101 | UInt32 value_; 102 | }; 103 | 104 | #endif // MARISA_WORD_SIZE == 64 105 | 106 | } // namespace vector 107 | } // namespace grimoire 108 | } // namespace marisa 109 | 110 | #endif // MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_ 111 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/lib/marisa/grimoire/vector/rank-index.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_ 3 | 4 | #include "marisa/base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace vector { 9 | 10 | class RankIndex { 11 | public: 12 | RankIndex() : abs_(0), rel_lo_(0), rel_hi_(0) {} 13 | 14 | void set_abs(std::size_t value) { 15 | MARISA_DEBUG_IF(value > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 16 | abs_ = (UInt32)value; 17 | } 18 | void set_rel1(std::size_t value) { 19 | MARISA_DEBUG_IF(value > 64, MARISA_RANGE_ERROR); 20 | rel_lo_ = (UInt32)((rel_lo_ & ~0x7FU) | (value & 0x7FU)); 21 | } 22 | void set_rel2(std::size_t value) { 23 | MARISA_DEBUG_IF(value > 128, MARISA_RANGE_ERROR); 24 | rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 7)) | ((value & 0xFFU) << 7)); 25 | } 26 | void set_rel3(std::size_t value) { 27 | MARISA_DEBUG_IF(value > 192, MARISA_RANGE_ERROR); 28 | rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 15)) | ((value & 0xFFU) << 15)); 29 | } 30 | void set_rel4(std::size_t value) { 31 | MARISA_DEBUG_IF(value > 256, MARISA_RANGE_ERROR); 32 | rel_lo_ = (UInt32)((rel_lo_ & ~(0x1FFU << 23)) | ((value & 0x1FFU) << 23)); 33 | } 34 | void set_rel5(std::size_t value) { 35 | MARISA_DEBUG_IF(value > 320, MARISA_RANGE_ERROR); 36 | rel_hi_ = (UInt32)((rel_hi_ & ~0x1FFU) | (value & 0x1FFU)); 37 | } 38 | void set_rel6(std::size_t value) { 39 | MARISA_DEBUG_IF(value > 384, MARISA_RANGE_ERROR); 40 | rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 9)) | ((value & 0x1FFU) << 9)); 41 | } 42 | void set_rel7(std::size_t value) { 43 | MARISA_DEBUG_IF(value > 448, MARISA_RANGE_ERROR); 44 | rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 18)) | ((value & 0x1FFU) << 18)); 45 | } 46 | 47 | std::size_t abs() const { 48 | return abs_; 49 | } 50 | std::size_t rel1() const { 51 | return rel_lo_ & 0x7FU; 52 | } 53 | std::size_t rel2() const { 54 | return (rel_lo_ >> 7) & 0xFFU; 55 | } 56 | std::size_t rel3() const { 57 | return (rel_lo_ >> 15) & 0xFFU; 58 | } 59 | std::size_t rel4() const { 60 | return (rel_lo_ >> 23) & 0x1FFU; 61 | } 62 | std::size_t rel5() const { 63 | return rel_hi_ & 0x1FFU; 64 | } 65 | std::size_t rel6() const { 66 | return (rel_hi_ >> 9) & 0x1FFU; 67 | } 68 | std::size_t rel7() const { 69 | return (rel_hi_ >> 18) & 0x1FFU; 70 | } 71 | 72 | private: 73 | UInt32 abs_; 74 | UInt32 rel_lo_; 75 | UInt32 rel_hi_; 76 | }; 77 | 78 | } // namespace vector 79 | } // namespace grimoire 80 | } // namespace marisa 81 | 82 | #endif // MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_ 83 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/m4/ltsugar.m4: -------------------------------------------------------------------------------- 1 | # ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- 2 | # 3 | # Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software 4 | # Foundation, Inc. 5 | # Written by Gary V. Vaughan, 2004 6 | # 7 | # This file is free software; the Free Software Foundation gives 8 | # unlimited permission to copy and/or distribute it, with or without 9 | # modifications, as long as this notice is preserved. 10 | 11 | # serial 6 ltsugar.m4 12 | 13 | # This is to help aclocal find these macros, as it can't see m4_define. 14 | AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) 15 | 16 | 17 | # lt_join(SEP, ARG1, [ARG2...]) 18 | # ----------------------------- 19 | # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their 20 | # associated separator. 21 | # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier 22 | # versions in m4sugar had bugs. 23 | m4_define([lt_join], 24 | [m4_if([$#], [1], [], 25 | [$#], [2], [[$2]], 26 | [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) 27 | m4_define([_lt_join], 28 | [m4_if([$#$2], [2], [], 29 | [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) 30 | 31 | 32 | # lt_car(LIST) 33 | # lt_cdr(LIST) 34 | # ------------ 35 | # Manipulate m4 lists. 36 | # These macros are necessary as long as will still need to support 37 | # Autoconf-2.59, which quotes differently. 38 | m4_define([lt_car], [[$1]]) 39 | m4_define([lt_cdr], 40 | [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], 41 | [$#], 1, [], 42 | [m4_dquote(m4_shift($@))])]) 43 | m4_define([lt_unquote], $1) 44 | 45 | 46 | # lt_append(MACRO-NAME, STRING, [SEPARATOR]) 47 | # ------------------------------------------ 48 | # Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'. 49 | # Note that neither SEPARATOR nor STRING are expanded; they are appended 50 | # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). 51 | # No SEPARATOR is output if MACRO-NAME was previously undefined (different 52 | # than defined and empty). 53 | # 54 | # This macro is needed until we can rely on Autoconf 2.62, since earlier 55 | # versions of m4sugar mistakenly expanded SEPARATOR but not STRING. 56 | m4_define([lt_append], 57 | [m4_define([$1], 58 | m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) 59 | 60 | 61 | 62 | # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) 63 | # ---------------------------------------------------------- 64 | # Produce a SEP delimited list of all paired combinations of elements of 65 | # PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list 66 | # has the form PREFIXmINFIXSUFFIXn. 67 | # Needed until we can rely on m4_combine added in Autoconf 2.62. 68 | m4_define([lt_combine], 69 | [m4_if(m4_eval([$# > 3]), [1], 70 | [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl 71 | [[m4_foreach([_Lt_prefix], [$2], 72 | [m4_foreach([_Lt_suffix], 73 | ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, 74 | [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) 75 | 76 | 77 | # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) 78 | # ----------------------------------------------------------------------- 79 | # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited 80 | # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. 81 | m4_define([lt_if_append_uniq], 82 | [m4_ifdef([$1], 83 | [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], 84 | [lt_append([$1], [$2], [$3])$4], 85 | [$5])], 86 | [lt_append([$1], [$2], [$3])$4])]) 87 | 88 | 89 | # lt_dict_add(DICT, KEY, VALUE) 90 | # ----------------------------- 91 | m4_define([lt_dict_add], 92 | [m4_define([$1($2)], [$3])]) 93 | 94 | 95 | # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) 96 | # -------------------------------------------- 97 | m4_define([lt_dict_add_subkey], 98 | [m4_define([$1($2:$3)], [$4])]) 99 | 100 | 101 | # lt_dict_fetch(DICT, KEY, [SUBKEY]) 102 | # ---------------------------------- 103 | m4_define([lt_dict_fetch], 104 | [m4_ifval([$3], 105 | m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), 106 | m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) 107 | 108 | 109 | # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) 110 | # ----------------------------------------------------------------- 111 | m4_define([lt_if_dict_fetch], 112 | [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], 113 | [$5], 114 | [$6])]) 115 | 116 | 117 | # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) 118 | # -------------------------------------------------------------- 119 | m4_define([lt_dict_filter], 120 | [m4_if([$5], [], [], 121 | [lt_join(m4_quote(m4_default([$4], [[, ]])), 122 | lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), 123 | [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl 124 | ]) 125 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/m4/ltversion.m4: -------------------------------------------------------------------------------- 1 | # ltversion.m4 -- version numbers -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # @configure_input@ 11 | 12 | # serial 4179 ltversion.m4 13 | # This file is part of GNU Libtool 14 | 15 | m4_define([LT_PACKAGE_VERSION], [2.4.6]) 16 | m4_define([LT_PACKAGE_REVISION], [2.4.6]) 17 | 18 | AC_DEFUN([LTVERSION_VERSION], 19 | [macro_version='2.4.6' 20 | macro_revision='2.4.6' 21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) 22 | _LT_DECL(, macro_revision, 0) 23 | ]) 24 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/marisa.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | bindir=@bindir@ 4 | libdir=@libdir@ 5 | includedir=@includedir@ 6 | 7 | Name: Marisa 8 | Description: Matching Algorithm with Recursively Implemented StorAge 9 | Version: @VERSION@ 10 | Cflags: -I${includedir} 11 | Libs: -L${libdir} -lmarisa 12 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/tests/Makefile.am: -------------------------------------------------------------------------------- 1 | MY_INCLUDE = -I$(top_srcdir)/include -I$(top_srcdir)/lib 2 | MY_LIBS = $(top_srcdir)/lib/marisa/libmarisa.la 3 | 4 | AM_CXXFLAGS = -Wall -Weffc++ -Wextra -Wconversion -D_DEBUG $(MY_INCLUDE) 5 | 6 | TESTS = \ 7 | base-test \ 8 | io-test \ 9 | vector-test \ 10 | trie-test \ 11 | marisa-test 12 | 13 | check_PROGRAMS = $(TESTS) 14 | 15 | noinst_HEADERS = marisa-assert.h 16 | 17 | base_test_SOURCES = base-test.cc 18 | base_test_LDADD = $(MY_LIBS) 19 | 20 | io_test_SOURCES = io-test.cc 21 | io_test_LDADD = $(MY_LIBS) 22 | 23 | vector_test_SOURCES = vector-test.cc 24 | vector_test_LDADD = $(MY_LIBS) 25 | 26 | trie_test_SOURCES = trie-test.cc 27 | trie_test_LDADD = $(MY_LIBS) 28 | 29 | marisa_test_SOURCES = marisa-test.cc 30 | marisa_test_LDADD = $(MY_LIBS) 31 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/tests/marisa-assert.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_ASSERT_H_ 2 | #define MARISA_ASSERT_H_ 3 | 4 | #include 5 | #include 6 | 7 | #define ASSERT(cond) (void)((!!(cond)) || \ 8 | ((std::cout << __LINE__ << ": Assertion `" << #cond << "' failed." \ 9 | << std::endl), std::exit(-1), 0)) 10 | 11 | #define EXCEPT(code, expected_error_code) try { \ 12 | code; \ 13 | std::cout << __LINE__ << ": Exception `" << #code << "' failed." \ 14 | << std::endl; \ 15 | std::exit(-1); \ 16 | } catch (const marisa::Exception &ex) { \ 17 | ASSERT(ex.error_code() == expected_error_code); \ 18 | } 19 | 20 | #define TEST_START() \ 21 | (std::cout << __FILE__ << ":" << __LINE__ << ": " << __FUNCTION__ << "(): ") 22 | 23 | #define TEST_END() \ 24 | (std::cout << "ok" << std::endl) 25 | 26 | #endif // MARISA_ASSERT_H_ 27 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/tools/Makefile.am: -------------------------------------------------------------------------------- 1 | MY_INCLUDE = -I$(top_srcdir)/include -I$(top_srcdir)/lib 2 | MY_LIBS = $(top_srcdir)/lib/marisa/libmarisa.la libcmdopt.la 3 | 4 | AM_CXXFLAGS = -Wall -Weffc++ -Wextra -Wconversion $(MY_INCLUDE) 5 | 6 | noinst_LTLIBRARIES = libcmdopt.la 7 | 8 | libcmdopt_la_SOURCES = cmdopt.cc 9 | 10 | noinst_HEADERS = cmdopt.h 11 | 12 | bin_PROGRAMS = \ 13 | marisa-build \ 14 | marisa-lookup \ 15 | marisa-reverse-lookup \ 16 | marisa-common-prefix-search \ 17 | marisa-predictive-search \ 18 | marisa-dump \ 19 | marisa-benchmark 20 | 21 | marisa_build_SOURCES = marisa-build.cc 22 | marisa_build_LDADD = $(MY_LIBS) 23 | 24 | marisa_lookup_SOURCES = marisa-lookup.cc 25 | marisa_lookup_LDADD = $(MY_LIBS) 26 | 27 | marisa_reverse_lookup_SOURCES = marisa-reverse-lookup.cc 28 | marisa_reverse_lookup_LDADD = $(MY_LIBS) 29 | 30 | marisa_common_prefix_search_SOURCES = marisa-common-prefix-search.cc 31 | marisa_common_prefix_search_LDADD = $(MY_LIBS) 32 | 33 | marisa_predictive_search_SOURCES = marisa-predictive-search.cc 34 | marisa_predictive_search_LDADD = $(MY_LIBS) 35 | 36 | marisa_dump_SOURCES = marisa-dump.cc 37 | marisa_dump_LDADD = $(MY_LIBS) 38 | 39 | marisa_benchmark_SOURCES = marisa-benchmark.cc 40 | marisa_benchmark_LDADD = $(MY_LIBS) 41 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/tools/cmdopt.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_CMDOPT_H_ 2 | #define MARISA_CMDOPT_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | typedef struct cmdopt_option_ { 9 | // `name' specifies the name of this option. 10 | // An array of options must be terminated with an option whose name == NULL. 11 | const char *name; 12 | 13 | // `has_name' specifies whether an option takes an argument or not. 14 | // 0 specifies that this option does not have any argument. 15 | // 1 specifies that this option has an argument. 16 | // 2 specifies that this option may have an argument. 17 | int has_arg; 18 | 19 | // `flag' specifies an integer variable which is overwritten by cmdopt_next() 20 | // with its return value. 21 | int *flag; 22 | 23 | // `val' specifies a return value of cmdopt_next(). This value is returned 24 | // when cmdopt_next() finds this option. 25 | int val; 26 | } cmdopt_option; 27 | 28 | typedef struct cmdopt_t_ { 29 | // Command line arguments. 30 | int argc; 31 | char **argv; 32 | 33 | // Option settings. 34 | const cmdopt_option *longopts; 35 | const char *optstring; 36 | 37 | int optind; // Index of the next argument. 38 | char *nextchar; // Next character. 39 | char *optarg; // Argument of the last option. 40 | int optopt; // Label of the last option. 41 | char *optlong; // Long option. 42 | int opterr; // Warning level (0: nothing, 1: warning, 2: all). 43 | int longindex; // Index of the last long option. 44 | int optnum; // Number of options. 45 | } cmdopt_t; 46 | 47 | // cmdopt_init() initializes a cmdopt_t for successive cmdopt_next()s. 48 | void cmdopt_init(cmdopt_t *h, int argc, char **argv, 49 | const char *optstring, const cmdopt_option *longopts); 50 | 51 | // cmdopt_get() analyzes command line arguments and gets the next option. 52 | int cmdopt_get(cmdopt_t *h); 53 | 54 | #ifdef __cplusplus 55 | } // extern "C" 56 | #endif 57 | 58 | #endif // MARISA_CMDOPT_H_ 59 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/tools/marisa-common-prefix-search.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include "cmdopt.h" 8 | 9 | namespace { 10 | 11 | std::size_t max_num_results = 10; 12 | bool mmap_flag = true; 13 | 14 | void print_help(const char *cmd) { 15 | std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n" 16 | "Options:\n" 17 | " -n, --max-num-results=[N] limit the number of results to N" 18 | " (default: 10)\n" 19 | " 0: no limit\n" 20 | " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary" 21 | " (default)\n" 22 | " -r, --read-dictionary read an entire dictionary into memory\n" 23 | " -h, --help print this help\n" 24 | << std::endl; 25 | } 26 | 27 | int common_prefix_search(const char * const *args, std::size_t num_args) { 28 | if (num_args == 0) { 29 | std::cerr << "error: dictionary is not specified" << std::endl; 30 | return 10; 31 | } else if (num_args > 1) { 32 | std::cerr << "error: more than one dictionaries are specified" 33 | << std::endl; 34 | return 11; 35 | } 36 | 37 | marisa::Trie trie; 38 | if (mmap_flag) { 39 | try { 40 | trie.mmap(args[0]); 41 | } catch (const marisa::Exception &ex) { 42 | std::cerr << ex.what() << ": failed to mmap a dictionary file: " 43 | << args[0] << std::endl; 44 | return 20; 45 | } 46 | } else { 47 | try { 48 | trie.load(args[0]); 49 | } catch (const marisa::Exception &ex) { 50 | std::cerr << ex.what() << ": failed to load a dictionary file: " 51 | << args[0] << std::endl; 52 | return 21; 53 | } 54 | } 55 | 56 | marisa::Agent agent; 57 | marisa::Keyset keyset; 58 | std::string str; 59 | while (std::getline(std::cin, str)) { 60 | try { 61 | agent.set_query(str.c_str(), str.length()); 62 | while (trie.common_prefix_search(agent)) { 63 | keyset.push_back(agent.key()); 64 | } 65 | if (keyset.empty()) { 66 | std::cout << "not found" << std::endl; 67 | } else { 68 | std::cout << keyset.size() << " found" << std::endl; 69 | const std::size_t end = std::min(max_num_results, keyset.size()); 70 | for (std::size_t i = 0; i < end; ++i) { 71 | std::cout << keyset[i].id() << '\t'; 72 | std::cout.write(keyset[i].ptr(), keyset[i].length()) << '\t'; 73 | std::cout << str << '\n'; 74 | } 75 | } 76 | keyset.reset(); 77 | } catch (const marisa::Exception &ex) { 78 | std::cerr << ex.what() << ": common_prefix_search() failed: " 79 | << str << std::endl; 80 | return 30; 81 | } 82 | 83 | if (!std::cout) { 84 | std::cerr << "error: failed to write results to standard output" 85 | << std::endl; 86 | return 31; 87 | } 88 | } 89 | 90 | return 0; 91 | } 92 | 93 | } // namespace 94 | 95 | int main(int argc, char *argv[]) { 96 | std::ios::sync_with_stdio(false); 97 | 98 | ::cmdopt_option long_options[] = { 99 | { "max-num-results", 1, NULL, 'n' }, 100 | { "mmap-dictionary", 0, NULL, 'm' }, 101 | { "read-dictionary", 0, NULL, 'r' }, 102 | { "help", 0, NULL, 'h' }, 103 | { NULL, 0, NULL, 0 } 104 | }; 105 | ::cmdopt_t cmdopt; 106 | ::cmdopt_init(&cmdopt, argc, argv, "n:mrh", long_options); 107 | int label; 108 | while ((label = ::cmdopt_get(&cmdopt)) != -1) { 109 | switch (label) { 110 | case 'n': { 111 | char *end_of_value; 112 | const long value = std::strtol(cmdopt.optarg, &end_of_value, 10); 113 | if ((*end_of_value != '\0') || (value < 0)) { 114 | std::cerr << "error: option `-n' with an invalid argument: " 115 | << cmdopt.optarg << std::endl; 116 | } 117 | if ((value == 0) || ((unsigned long long)value > MARISA_SIZE_MAX)) { 118 | max_num_results = MARISA_SIZE_MAX; 119 | } else { 120 | max_num_results = (std::size_t)value; 121 | } 122 | break; 123 | } 124 | case 'm': { 125 | mmap_flag = true; 126 | break; 127 | } 128 | case 'r': { 129 | mmap_flag = false; 130 | break; 131 | } 132 | case 'h': { 133 | print_help(argv[0]); 134 | return 0; 135 | } 136 | default: { 137 | return 1; 138 | } 139 | } 140 | } 141 | return common_prefix_search(cmdopt.argv + cmdopt.optind, 142 | cmdopt.argc - cmdopt.optind); 143 | } 144 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/tools/marisa-dump.cc: -------------------------------------------------------------------------------- 1 | #ifdef _WIN32 2 | #include 3 | #include 4 | #include 5 | #endif // _WIN32 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include "cmdopt.h" 14 | 15 | namespace { 16 | 17 | const char *delimiter = "\n"; 18 | bool mmap_flag = true; 19 | 20 | void print_help(const char *cmd) { 21 | std::cerr << "Usage: " << cmd << " [OPTION]... DIC...\n\n" 22 | "Options:\n" 23 | " -d, --delimiter=[S] specify the delimier (default: \"\\n\")\n" 24 | " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary" 25 | " (default)\n" 26 | " -r, --read-dictionary read an entire dictionary into memory\n" 27 | " -h, --help print this help\n" 28 | << std::endl; 29 | } 30 | 31 | int dump(const marisa::Trie &trie) { 32 | std::size_t num_keys = 0; 33 | marisa::Agent agent; 34 | agent.set_query(""); 35 | try { 36 | while (trie.predictive_search(agent)) { 37 | std::cout.write(agent.key().ptr(), agent.key().length()) << delimiter; 38 | if (!std::cout) { 39 | std::cerr << "error: failed to write results to standard output" 40 | << std::endl; 41 | return 20; 42 | } 43 | ++num_keys; 44 | } 45 | } catch (const marisa::Exception &ex) { 46 | std::cerr << ex.what() << ": predictive_search() failed" << std::endl; 47 | return 21; 48 | } 49 | std::cerr << "#keys: " << num_keys << std::endl; 50 | return 0; 51 | } 52 | 53 | int dump(const char *filename) { 54 | marisa::Trie trie; 55 | if (filename != NULL) { 56 | std::cerr << "input: " << filename << std::endl; 57 | if (mmap_flag) { 58 | try { 59 | trie.mmap(filename); 60 | } catch (const marisa::Exception &ex) { 61 | std::cerr << ex.what() << ": failed to mmap a dictionary file: " 62 | << filename << std::endl; 63 | return 10; 64 | } 65 | } else { 66 | try { 67 | trie.load(filename); 68 | } catch (const marisa::Exception &ex) { 69 | std::cerr << ex.what() << ": failed to load a dictionary file: " 70 | << filename << std::endl; 71 | return 11; 72 | } 73 | } 74 | } else { 75 | std::cerr << "input: " << std::endl; 76 | #ifdef _WIN32 77 | const int stdin_fileno = ::_fileno(stdin); 78 | if (stdin_fileno < 0) { 79 | std::cerr << "error: failed to get the file descriptor of " 80 | "standard input" << std::endl; 81 | return 20; 82 | } 83 | if (::_setmode(stdin_fileno, _O_BINARY) == -1) { 84 | std::cerr << "error: failed to set binary mode" << std::endl; 85 | return 21; 86 | } 87 | #endif // _WIN32 88 | try { 89 | std::cin >> trie; 90 | } catch (const marisa::Exception &ex) { 91 | std::cerr << ex.what() 92 | << ": failed to read a dictionary from standard input" << std::endl; 93 | return 22; 94 | } 95 | } 96 | return dump(trie); 97 | } 98 | 99 | int dump(const char * const *args, std::size_t num_args) { 100 | if (num_args == 0) { 101 | return dump(NULL); 102 | } 103 | for (std::size_t i = 0; i < num_args; ++i) { 104 | const int result = dump(args[i]); 105 | if (result != 0) { 106 | return result; 107 | } 108 | } 109 | return 0; 110 | } 111 | 112 | } // namespace 113 | 114 | int main(int argc, char *argv[]) { 115 | std::ios::sync_with_stdio(false); 116 | 117 | ::cmdopt_option long_options[] = { 118 | { "delimiter", 1, NULL, 'd' }, 119 | { "mmap-dictionary", 0, NULL, 'm' }, 120 | { "read-dictionary", 0, NULL, 'r' }, 121 | { "help", 0, NULL, 'h' }, 122 | { NULL, 0, NULL, 0 } 123 | }; 124 | ::cmdopt_t cmdopt; 125 | ::cmdopt_init(&cmdopt, argc, argv, "d:mrh", long_options); 126 | int label; 127 | while ((label = ::cmdopt_get(&cmdopt)) != -1) { 128 | switch (label) { 129 | case 'd': { 130 | delimiter = cmdopt.optarg; 131 | break; 132 | } 133 | case 'm': { 134 | mmap_flag = true; 135 | break; 136 | } 137 | case 'r': { 138 | mmap_flag = false; 139 | break; 140 | } 141 | case 'h': { 142 | print_help(argv[0]); 143 | return 0; 144 | } 145 | default: { 146 | return 1; 147 | } 148 | } 149 | } 150 | return dump(cmdopt.argv + cmdopt.optind, cmdopt.argc - cmdopt.optind); 151 | } 152 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/tools/marisa-lookup.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "cmdopt.h" 7 | 8 | namespace { 9 | 10 | bool mmap_flag = true; 11 | 12 | void print_help(const char *cmd) { 13 | std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n" 14 | "Options:\n" 15 | " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary" 16 | " (default)\n" 17 | " -r, --read-dictionary read an entire dictionary into memory\n" 18 | " -h, --help print this help\n" 19 | << std::endl; 20 | } 21 | 22 | int lookup(const char * const *args, std::size_t num_args) { 23 | if (num_args == 0) { 24 | std::cerr << "error: dictionary is not specified" << std::endl; 25 | return 10; 26 | } else if (num_args > 1) { 27 | std::cerr << "error: more than one dictionaries are specified" 28 | << std::endl; 29 | return 11; 30 | } 31 | 32 | marisa::Trie trie; 33 | if (mmap_flag) { 34 | try { 35 | trie.mmap(args[0]); 36 | } catch (const marisa::Exception &ex) { 37 | std::cerr << ex.what() << ": failed to mmap a dictionary file: " 38 | << args[0] << std::endl; 39 | return 20; 40 | } 41 | } else { 42 | try { 43 | trie.load(args[0]); 44 | } catch (const marisa::Exception &ex) { 45 | std::cerr << ex.what() << ": failed to load a dictionary file: " 46 | << args[0] << std::endl; 47 | return 21; 48 | } 49 | } 50 | 51 | marisa::Agent agent; 52 | std::string str; 53 | while (std::getline(std::cin, str)) { 54 | try { 55 | agent.set_query(str.c_str(), str.length()); 56 | if (trie.lookup(agent)) { 57 | std::cout << agent.key().id() << '\t' << str << '\n'; 58 | } else { 59 | std::cout << "-1\t" << str << '\n'; 60 | } 61 | } catch (const marisa::Exception &ex) { 62 | std::cerr << ex.what() << ": lookup() failed: " << str << std::endl; 63 | return 30; 64 | } 65 | 66 | if (!std::cout) { 67 | std::cerr << "error: failed to write results to standard output" 68 | << std::endl; 69 | return 30; 70 | } 71 | } 72 | 73 | return 0; 74 | } 75 | 76 | } // namespace 77 | 78 | int main(int argc, char *argv[]) { 79 | std::ios::sync_with_stdio(false); 80 | 81 | ::cmdopt_option long_options[] = { 82 | { "mmap-dictionary", 0, NULL, 'm' }, 83 | { "read-dictionary", 0, NULL, 'r' }, 84 | { "help", 0, NULL, 'h' }, 85 | { NULL, 0, NULL, 0 } 86 | }; 87 | ::cmdopt_t cmdopt; 88 | ::cmdopt_init(&cmdopt, argc, argv, "mrh", long_options); 89 | int label; 90 | while ((label = ::cmdopt_get(&cmdopt)) != -1) { 91 | switch (label) { 92 | case 'm': { 93 | mmap_flag = true; 94 | break; 95 | } 96 | case 'r': { 97 | mmap_flag = false; 98 | break; 99 | } 100 | case 'h': { 101 | print_help(argv[0]); 102 | return 0; 103 | } 104 | default: { 105 | return 1; 106 | } 107 | } 108 | } 109 | return lookup(cmdopt.argv + cmdopt.optind, cmdopt.argc - cmdopt.optind); 110 | } 111 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/tools/marisa-predictive-search.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include "cmdopt.h" 8 | 9 | namespace { 10 | 11 | std::size_t max_num_results = 10; 12 | bool mmap_flag = true; 13 | 14 | void print_help(const char *cmd) { 15 | std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n" 16 | "Options:\n" 17 | " -n, --max-num-results=[N] limit the number of outputs to N" 18 | " (default: 10)\n" 19 | " 0: no limit\n" 20 | " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary" 21 | " (default)\n" 22 | " -r, --read-dictionary read an entire dictionary into memory\n" 23 | " -h, --help print this help\n" 24 | << std::endl; 25 | } 26 | 27 | int predictive_search(const char * const *args, std::size_t num_args) { 28 | if (num_args == 0) { 29 | std::cerr << "error: dictionary is not specified" << std::endl; 30 | return 10; 31 | } else if (num_args > 1) { 32 | std::cerr << "error: more than one dictionaries are specified" 33 | << std::endl; 34 | return 11; 35 | } 36 | 37 | marisa::Trie trie; 38 | if (mmap_flag) { 39 | try { 40 | trie.mmap(args[0]); 41 | } catch (const marisa::Exception &ex) { 42 | std::cerr << ex.what() << ": failed to mmap a dictionary file: " 43 | << args[0] << std::endl; 44 | return 20; 45 | } 46 | } else { 47 | try { 48 | trie.load(args[0]); 49 | } catch (const marisa::Exception &ex) { 50 | std::cerr << ex.what() << ": failed to load a dictionary file: " 51 | << args[0] << std::endl; 52 | return 21; 53 | } 54 | } 55 | 56 | marisa::Agent agent; 57 | marisa::Keyset keyset; 58 | std::string str; 59 | while (std::getline(std::cin, str)) { 60 | try { 61 | agent.set_query(str.c_str(), str.length()); 62 | while (trie.predictive_search(agent)) { 63 | keyset.push_back(agent.key()); 64 | } 65 | if (keyset.empty()) { 66 | std::cout << "not found" << std::endl; 67 | } else { 68 | std::cout << keyset.size() << " found" << std::endl; 69 | const std::size_t end = std::min(max_num_results, keyset.size()); 70 | for (std::size_t i = 0; i < end; ++i) { 71 | std::cout << keyset[i].id() << '\t'; 72 | std::cout.write(keyset[i].ptr(), keyset[i].length()) << '\t'; 73 | std::cout << str << '\n'; 74 | } 75 | } 76 | keyset.reset(); 77 | } catch (const marisa::Exception &ex) { 78 | std::cerr << ex.what() << ": predictive_search() failed: " 79 | << str << std::endl; 80 | return 30; 81 | } 82 | 83 | if (!std::cout) { 84 | std::cerr << "error: failed to write results to standard output" 85 | << std::endl; 86 | return 31; 87 | } 88 | } 89 | 90 | return 0; 91 | } 92 | 93 | } // namespace 94 | 95 | int main(int argc, char *argv[]) { 96 | std::ios::sync_with_stdio(false); 97 | 98 | ::cmdopt_option long_options[] = { 99 | { "max-num-results", 1, NULL, 'n' }, 100 | { "mmap-dictionary", 0, NULL, 'm' }, 101 | { "read-dictionary", 0, NULL, 'r' }, 102 | { "help", 0, NULL, 'h' }, 103 | { NULL, 0, NULL, 0 } 104 | }; 105 | ::cmdopt_t cmdopt; 106 | ::cmdopt_init(&cmdopt, argc, argv, "n:mrh", long_options); 107 | int label; 108 | while ((label = ::cmdopt_get(&cmdopt)) != -1) { 109 | switch (label) { 110 | case 'n': { 111 | char *end_of_value; 112 | const long value = std::strtol(cmdopt.optarg, &end_of_value, 10); 113 | if ((*end_of_value != '\0') || (value < 0)) { 114 | std::cerr << "error: option `-n' with an invalid argument: " 115 | << cmdopt.optarg << std::endl; 116 | } 117 | if ((value == 0) || ((unsigned long long)value > MARISA_SIZE_MAX)) { 118 | max_num_results = MARISA_SIZE_MAX; 119 | } else { 120 | max_num_results = (std::size_t)value; 121 | } 122 | break; 123 | } 124 | case 'm': { 125 | mmap_flag = true; 126 | break; 127 | } 128 | case 'r': { 129 | mmap_flag = false; 130 | break; 131 | } 132 | case 'h': { 133 | print_help(argv[0]); 134 | return 0; 135 | } 136 | default: { 137 | return 1; 138 | } 139 | } 140 | } 141 | return predictive_search(cmdopt.argv + cmdopt.optind, 142 | cmdopt.argc - cmdopt.optind); 143 | } 144 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/tools/marisa-reverse-lookup.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "cmdopt.h" 7 | 8 | namespace { 9 | 10 | bool mmap_flag = true; 11 | 12 | void print_help(const char *cmd) { 13 | std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n" 14 | "Options:\n" 15 | " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary" 16 | " (default)\n" 17 | " -r, --read-dictionary read an entire dictionary into memory\n" 18 | " -h, --help print this help\n" 19 | << std::endl; 20 | } 21 | 22 | int reverse_lookup(const char * const *args, std::size_t num_args) { 23 | if (num_args == 0) { 24 | std::cerr << "error: dictionary is not specified" << std::endl; 25 | return 10; 26 | } else if (num_args > 1) { 27 | std::cerr << "error: more than one dictionaries are specified" 28 | << std::endl; 29 | return 11; 30 | } 31 | 32 | marisa::Trie trie; 33 | if (mmap_flag) { 34 | try { 35 | trie.mmap(args[0]); 36 | } catch (const marisa::Exception &ex) { 37 | std::cerr << ex.what() << ": failed to mmap a dictionary file: " 38 | << args[0] << std::endl; 39 | return 20; 40 | } 41 | } else { 42 | try { 43 | trie.load(args[0]); 44 | } catch (const marisa::Exception &ex) { 45 | std::cerr << ex.what() << ": failed to load a dictionary file: " 46 | << args[0] << std::endl; 47 | return 21; 48 | } 49 | } 50 | 51 | marisa::Agent agent; 52 | std::size_t key_id; 53 | while (std::cin >> key_id) { 54 | try { 55 | agent.set_query(key_id); 56 | trie.reverse_lookup(agent); 57 | std::cout << agent.key().id() << '\t'; 58 | std::cout.write(agent.key().ptr(), agent.key().length()) << '\n'; 59 | } catch (const marisa::Exception &ex) { 60 | std::cerr << ex.what() << ": reverse_lookup() failed: " 61 | << key_id << std::endl; 62 | return 30; 63 | } 64 | 65 | if (!std::cout) { 66 | std::cerr << "error: failed to write results to standard output" 67 | << std::endl; 68 | return 30; 69 | } 70 | } 71 | 72 | return 0; 73 | } 74 | 75 | } // namespace 76 | 77 | int main(int argc, char *argv[]) { 78 | std::ios::sync_with_stdio(false); 79 | 80 | ::cmdopt_option long_options[] = { 81 | { "mmap-dictionary", 0, NULL, 'm' }, 82 | { "read-dictionary", 0, NULL, 'r' }, 83 | { "help", 0, NULL, 'h' }, 84 | { NULL, 0, NULL, 0 } 85 | }; 86 | ::cmdopt_t cmdopt; 87 | ::cmdopt_init(&cmdopt, argc, argv, "mrh", long_options); 88 | int label; 89 | while ((label = ::cmdopt_get(&cmdopt)) != -1) { 90 | switch (label) { 91 | case 'm': { 92 | mmap_flag = true; 93 | break; 94 | } 95 | case 'r': { 96 | mmap_flag = false; 97 | break; 98 | } 99 | case 'h': { 100 | print_help(argv[0]); 101 | return 0; 102 | } 103 | default: { 104 | return 1; 105 | } 106 | } 107 | } 108 | return reverse_lookup(cmdopt.argv + cmdopt.optind, 109 | cmdopt.argc - cmdopt.optind); 110 | } 111 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/vs2008/base-test/base-test.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 70 | 73 | 76 | 79 | 82 | 85 | 88 | 91 | 92 | 100 | 103 | 106 | 109 | 112 | 115 | 127 | 130 | 133 | 136 | 145 | 148 | 151 | 154 | 157 | 160 | 163 | 166 | 167 | 168 | 169 | 170 | 171 | 176 | 179 | 180 | 181 | 186 | 189 | 190 | 191 | 196 | 197 | 198 | 199 | 200 | 201 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/vs2008/io-test/io-test.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 69 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 91 | 99 | 102 | 105 | 108 | 111 | 114 | 126 | 129 | 132 | 135 | 144 | 147 | 150 | 153 | 156 | 159 | 162 | 165 | 166 | 167 | 168 | 169 | 170 | 175 | 178 | 179 | 180 | 185 | 188 | 189 | 190 | 195 | 196 | 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/vs2008/marisa-build/marisa-build.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 69 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 91 | 99 | 102 | 105 | 108 | 111 | 114 | 126 | 129 | 132 | 135 | 144 | 147 | 150 | 153 | 156 | 159 | 162 | 165 | 166 | 167 | 168 | 169 | 170 | 175 | 178 | 179 | 182 | 183 | 184 | 189 | 192 | 193 | 194 | 199 | 200 | 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/vs2008/marisa-dump/marisa-dump.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 69 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 91 | 99 | 102 | 105 | 108 | 111 | 114 | 126 | 129 | 132 | 135 | 144 | 147 | 150 | 153 | 156 | 159 | 162 | 165 | 166 | 167 | 168 | 169 | 170 | 175 | 178 | 179 | 182 | 183 | 184 | 189 | 192 | 193 | 194 | 199 | 200 | 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/vs2008/marisa-lookup/marisa-lookup.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 69 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 91 | 99 | 102 | 105 | 108 | 111 | 114 | 126 | 129 | 132 | 135 | 144 | 147 | 150 | 153 | 156 | 159 | 162 | 165 | 166 | 167 | 168 | 169 | 170 | 175 | 178 | 179 | 182 | 183 | 184 | 189 | 192 | 193 | 194 | 199 | 200 | 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/vs2008/marisa-test/marisa-test.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 69 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 91 | 99 | 102 | 105 | 108 | 111 | 114 | 126 | 129 | 132 | 135 | 144 | 147 | 150 | 153 | 156 | 159 | 162 | 165 | 166 | 167 | 168 | 169 | 170 | 175 | 178 | 179 | 180 | 185 | 188 | 189 | 190 | 195 | 196 | 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/vs2008/trie-test/trie-test.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 69 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 91 | 99 | 102 | 105 | 108 | 111 | 114 | 126 | 129 | 132 | 135 | 144 | 147 | 150 | 153 | 156 | 159 | 162 | 165 | 166 | 167 | 168 | 169 | 170 | 175 | 178 | 179 | 180 | 185 | 188 | 189 | 190 | 195 | 196 | 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/vs2008/vector-test/vector-test.vcproj: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 15 | 16 | 17 | 18 | 19 | 26 | 29 | 32 | 35 | 38 | 41 | 53 | 56 | 59 | 62 | 69 | 72 | 75 | 78 | 81 | 84 | 87 | 90 | 91 | 99 | 102 | 105 | 108 | 111 | 114 | 126 | 129 | 132 | 135 | 144 | 147 | 150 | 153 | 156 | 159 | 162 | 165 | 166 | 167 | 168 | 169 | 170 | 175 | 178 | 179 | 180 | 185 | 188 | 189 | 190 | 195 | 196 | 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /benchmark/marisa-0.2.5/vs2008/vs2008.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhirose/cpp-fstlib/89b47e6300fe0457e5160a1dfb25fe93bd2efcd4/benchmark/marisa-0.2.5/vs2008/vs2008.suo -------------------------------------------------------------------------------- /benchmark/ux-trie/bitVec.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010 Daisuke Okanohara 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above Copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above Copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | */ 19 | 20 | #include 21 | #include 22 | #include "bitVec.hpp" 23 | 24 | using namespace std; 25 | 26 | namespace ux { 27 | 28 | BitVec::BitVec() : size_(0){ 29 | } 30 | 31 | BitVec::~BitVec(){ 32 | } 33 | 34 | 35 | void BitVec::push_back(const uint8_t b){ 36 | if (size_ / S_BLOCK >= B_.size()) { 37 | B_.push_back(0); 38 | } 39 | 40 | if (b) { 41 | B_[size_ / S_BLOCK] |= (1ULL << (size_ % S_BLOCK)); 42 | } 43 | ++size_; 44 | } 45 | 46 | void BitVec::push_back_with_len(const uint64_t x, const uint64_t len){ 47 | size_t offset = size_ % S_BLOCK; 48 | if ((size_ + len - 1) / S_BLOCK >= B_.size()){ 49 | B_.push_back(0); 50 | } 51 | 52 | B_[size_ / S_BLOCK] |= (x << offset); 53 | if (offset + len - 1 >= S_BLOCK){ 54 | B_[size_ / S_BLOCK + 1] |= (x >> (S_BLOCK - offset)); 55 | } 56 | size_ += len; 57 | } 58 | 59 | void BitVec::setBit(const uint64_t pos, const uint8_t b){ 60 | if (b == 0) return; 61 | B_[pos / S_BLOCK] = 1LLU << (pos % S_BLOCK); 62 | } 63 | 64 | uint8_t BitVec::getBit(const uint64_t pos) const{ 65 | return (B_[pos/S_BLOCK] >> (pos % S_BLOCK)) & 1; 66 | } 67 | 68 | uint64_t BitVec::getBits(const uint64_t pos, const uint64_t len) const{ 69 | uint64_t blockInd1 = pos / S_BLOCK; 70 | uint64_t blockOffset1 = pos % S_BLOCK; 71 | if (blockOffset1 + len <= S_BLOCK){ 72 | return mask(B_[blockInd1] >> blockOffset1, len); 73 | } else { 74 | uint64_t blockInd2 = ((pos + len - 1) / S_BLOCK); 75 | return mask((B_[blockInd1] >> blockOffset1) + (B_[blockInd2] << (S_BLOCK - blockOffset1)), len); 76 | } 77 | } 78 | 79 | void BitVec::save(ostream& os) const { 80 | os.write((const char*)&size_, sizeof(size_)); 81 | os.write((const char*)&B_[0], sizeof(B_[0])*B_.size()); 82 | } 83 | 84 | void BitVec::load(istream& ifs) { 85 | ifs.read((char*)&size_, sizeof(size_)); 86 | B_.resize((size_ + S_BLOCK - 1) / S_BLOCK); 87 | ifs.read((char*)&B_[0], sizeof(B_[0])*B_.size()); 88 | } 89 | 90 | size_t BitVec::size() const { 91 | return size_; 92 | } 93 | 94 | void BitVec::clear() { 95 | B_.clear(); 96 | size_ = 0; 97 | } 98 | 99 | void BitVec::print() const { 100 | for (size_t i = 0; i < size_; ++i){ 101 | if (getBit(i)) cout << "1"; 102 | else cout << "0"; 103 | if ((i+1)%8 == 0){ 104 | cout << " "; 105 | if ((i+1)%64 == 0) cout << endl; 106 | } 107 | } 108 | } 109 | 110 | size_t BitVec::getAllocSize() const { 111 | return B_.size() * sizeof(B_[0]); 112 | } 113 | 114 | uint64_t BitVec::lookupBlock(const size_t ind) const{ 115 | return B_[ind]; 116 | } 117 | 118 | 119 | } 120 | -------------------------------------------------------------------------------- /benchmark/ux-trie/bitVec.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010 Daisuke Okanohara 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above Copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above Copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | */ 19 | 20 | #ifndef BIT_VEC_HPP__ 21 | #define BIT_VEC_HPP__ 22 | 23 | #include 24 | #include 25 | #include 26 | #include "uxUtil.hpp" 27 | 28 | namespace ux { 29 | 30 | static const uint64_t L_SHIFT = 9; 31 | static const uint64_t L_BLOCK = 1LLU << L_SHIFT; 32 | static const uint64_t S_SHIFT = 6; 33 | static const uint64_t S_BLOCK = 1LLU << S_SHIFT; 34 | static const uint64_t S_RATIO = L_BLOCK / S_BLOCK; 35 | 36 | class BitVec { 37 | public: 38 | BitVec(); 39 | ~BitVec(); 40 | 41 | void push_back(const uint8_t b); 42 | void push_back_with_len(const uint64_t x, const uint64_t len); 43 | 44 | void setBit(const uint64_t pos, const uint8_t b); 45 | uint8_t getBit(const uint64_t pos) const; 46 | uint64_t getBits(const uint64_t pos, const uint64_t len) const; 47 | void save(std::ostream& os) const; 48 | void load(std::istream& is); 49 | size_t size() const; 50 | void clear(); 51 | void print() const; 52 | size_t getAllocSize() const; 53 | uint64_t lookupBlock(const size_t ind) const; 54 | 55 | private: 56 | size_t size_; 57 | std::vector B_; 58 | }; 59 | 60 | } 61 | 62 | 63 | 64 | #endif // BIT_VEC_HPP__ 65 | -------------------------------------------------------------------------------- /benchmark/ux-trie/bitVecTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010 Daisuke Okanohara 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above Copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above Copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include "bitVec.hpp" 24 | #include "rsDic.hpp" 25 | #include "uxUtil.hpp" 26 | 27 | using namespace std; 28 | using namespace ux; 29 | 30 | TEST(bitvec, popcount){ 31 | uint64_t x = 0; 32 | for (uint64_t i = 0; i < 64; ++i){ 33 | ASSERT_EQ(i, popCount(x)); 34 | x |= (1LLU << i); 35 | } 36 | } 37 | 38 | TEST(bitvec, selectblock){ 39 | uint64_t x = 0; 40 | 41 | for (uint64_t i = 0; i < 64; ++i){ 42 | ASSERT_EQ(i, selectBlock(i+1, x, 0)); 43 | } 44 | 45 | for (uint64_t i = 0; i < 64; ++i){ 46 | x |= (1LLU << i); 47 | } 48 | 49 | for (uint64_t i = 0; i < 64; ++i){ 50 | ASSERT_EQ(i, selectBlock(i+1, x, 1)); 51 | } 52 | } 53 | 54 | TEST(bitvec, trivial_zero){ 55 | BitVec bv; 56 | for (int i = 0; i < 1000; ++i){ 57 | bv.push_back(0); 58 | } 59 | 60 | RSDic rs; 61 | rs.build(bv); 62 | ASSERT_EQ(1000, rs.size()); 63 | for (size_t i = 0; i < rs.size(); ++i){ 64 | ASSERT_EQ(0 , bv.getBit(i)); 65 | ASSERT_EQ(i+1, rs.rank(i, 0)); 66 | ASSERT_EQ(i , rs.select(i+1, 0)); 67 | } 68 | } 69 | 70 | TEST(bitvec, trivial_one){ 71 | BitVec bv; 72 | for (int i = 0; i < 1000; ++i){ 73 | bv.push_back(1); 74 | } 75 | 76 | RSDic rs; 77 | rs.build(bv); 78 | ASSERT_EQ(1000, rs.size()); 79 | for (size_t i = 0; i < rs.size(); ++i){ 80 | ASSERT_EQ(1 , rs.getBit(i)); 81 | ASSERT_EQ(i+1, rs.rank(i, 1)); 82 | ASSERT_EQ(i , rs.select(i+1, 1)); 83 | } 84 | } 85 | 86 | /* 87 | TEST(bitvec, trivial_interleave){ 88 | RSDic bv; 89 | for (int i = 0; i < 1000; ++i){ 90 | bv.push_back((i+1)%2); 91 | } 92 | bv.build(); 93 | ASSERT_EQ(1000, bv.size()); 94 | for (size_t i = 0; i < bv.size(); ++i){ 95 | ASSERT_EQ(i/2 + 1, bv.rank2(i)); 96 | } 97 | } 98 | */ 99 | 100 | 101 | TEST(bitvec, random){ 102 | BitVec bv; 103 | vector B; 104 | for (int i = 0; i < 100000; ++i){ 105 | int b = rand() % 2; 106 | bv.push_back(b); 107 | B.push_back(b); 108 | } 109 | 110 | RSDic rs; 111 | rs.build(bv); 112 | ASSERT_EQ(100000, rs.size()); 113 | int sum = 0; 114 | for (size_t i = 0; i < rs.size(); ++i){ 115 | ASSERT_EQ(B[i] , bv.getBit(i)); 116 | sum += B[i]; 117 | if (B[i]){ 118 | ASSERT_EQ(sum, rs.rank(i, 1)); 119 | ASSERT_EQ(i, rs.select(sum, 1)); 120 | } else { 121 | ASSERT_EQ(i - sum + 1, rs.rank(i, 0)); 122 | ASSERT_EQ(i, rs.select(i-sum+1, 0)); 123 | } 124 | } 125 | } 126 | 127 | TEST(bitvec, vacuum){ 128 | BitVec bv; 129 | vector B; 130 | for (int i = 0; i < 100000; ++i){ 131 | int b = rand() % 2; 132 | bv.push_back(b); 133 | B.push_back(b); 134 | } 135 | 136 | RSDic rs; 137 | rs.build(bv); 138 | } 139 | 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /benchmark/ux-trie/rsDic.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010 Daisuke Okanohara 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above Copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above Copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | */ 19 | 20 | #include 21 | #include 22 | #include "rsDic.hpp" 23 | 24 | using namespace std; 25 | 26 | namespace ux { 27 | 28 | RSDic::RSDic() : size_(0) { 29 | } 30 | 31 | RSDic::~RSDic() { 32 | } 33 | 34 | void RSDic::build(BitVec& bv){ 35 | size_ = bv.size(); 36 | swap(bitVec_, bv); 37 | L_.resize((size_ + L_BLOCK-1) / L_BLOCK); 38 | size_t sum = 0; 39 | for (uint64_t il = 0; il < size_; il += L_BLOCK){ 40 | L_[il/L_BLOCK] = sum; 41 | for (uint64_t is = 0; is < L_BLOCK && il + is < size_; is += S_BLOCK){ 42 | sum += popCount(bitVec_.lookupBlock((il + is)/S_BLOCK)); 43 | } 44 | } 45 | L_.push_back(sum); 46 | } 47 | 48 | uint64_t RSDic::rank(const uint64_t pos, const uint8_t b) const{ 49 | uint64_t pos1 = pos+1; 50 | uint64_t rank1 = L_[pos1 >> L_SHIFT]; 51 | uint64_t bpos = (pos1 >> L_SHIFT) << (L_SHIFT - S_SHIFT); 52 | uint64_t epos = pos1 >> S_SHIFT; 53 | for (uint64_t i = bpos; i < epos; ++i){ 54 | rank1 += popCount(bitVec_.lookupBlock(i)); 55 | } 56 | rank1 += popCountMasked(bitVec_.lookupBlock(epos), pos1 % S_BLOCK); 57 | 58 | if (b == 1) return rank1; 59 | else return pos1 - rank1; 60 | } 61 | 62 | uint64_t RSDic::select(const uint64_t pos, const uint8_t b) const{ 63 | uint64_t retPos = 0; 64 | uint64_t posS = selectOverL(pos, b, retPos); 65 | return posS * S_BLOCK + selectBlock(retPos, bitVec_.lookupBlock(posS), b); 66 | } 67 | 68 | uint64_t RSDic::selectOverL(const uint64_t pos, const uint8_t b, uint64_t& retPos) const { 69 | uint64_t left = 0; 70 | uint64_t right = L_.size(); 71 | 72 | retPos = pos; 73 | while (left < right){ 74 | uint64_t mid = (left + right)/2; 75 | assert(mid < L_.size()); 76 | if (getBitNum(L_[mid], L_BLOCK * mid, b) < retPos) left = mid+1; 77 | else right = mid; 78 | } 79 | uint64_t posL = (left != 0) ? left - 1 : 0; 80 | uint64_t posS = posL * S_RATIO; 81 | 82 | assert(retPos >= getBitNum(L_[posL], L_BLOCK * posL, b)); 83 | 84 | retPos -= getBitNum(L_[posL], L_BLOCK * posL, b); 85 | for (;;posS++){ 86 | if (posS >= bitVec_.size()) break; 87 | uint64_t num = getBitNum(popCount(bitVec_.lookupBlock(posS)), S_BLOCK, b); 88 | if (retPos <= num) break; 89 | retPos -= num; 90 | } 91 | return posS; 92 | } 93 | 94 | void RSDic::save(ostream& ofs) const{ 95 | bitVec_.save(ofs); 96 | } 97 | 98 | void RSDic::load(istream& ifs) { 99 | bitVec_.load(ifs); 100 | build(bitVec_); 101 | } 102 | 103 | size_t RSDic::getAllocSize() const { 104 | return bitVec_.getAllocSize() + sizeof(L_[0]) * L_.size(); 105 | } 106 | 107 | uint8_t RSDic::getBit(const uint64_t pos) const{ 108 | return bitVec_.getBit(pos); 109 | } 110 | 111 | size_t RSDic::size() const { 112 | return bitVec_.size(); 113 | } 114 | 115 | void RSDic::clear() { 116 | bitVec_.clear(); 117 | L_.clear(); 118 | size_ = 0; 119 | } 120 | 121 | } 122 | -------------------------------------------------------------------------------- /benchmark/ux-trie/rsDic.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010 Daisuke Okanohara 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above Copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above Copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | */ 19 | 20 | #ifndef RSDIC_HPP__ 21 | #define RSDIC_HPP__ 22 | 23 | #include 24 | #include 25 | #include 26 | #include "bitVec.hpp" 27 | #include "uxUtil.hpp" 28 | 29 | namespace ux { 30 | 31 | class RSDic { 32 | public: 33 | RSDic(); 34 | ~RSDic(); 35 | 36 | void build(BitVec& bv); 37 | uint64_t rank(uint64_t pos, uint8_t b) const; 38 | uint64_t select(uint64_t pos, uint8_t b) const; 39 | 40 | void save(std::ostream& os) const; 41 | void load(std::istream& is); 42 | size_t getAllocSize() const; 43 | uint8_t getBit(uint64_t pos) const; 44 | size_t size() const; 45 | void clear(); 46 | 47 | private: 48 | uint64_t selectOverL(uint64_t pos, uint8_t b, uint64_t& retPos) const; 49 | 50 | BitVec bitVec_; 51 | std::vector L_; 52 | size_t size_; 53 | }; 54 | 55 | } 56 | 57 | #endif // RSDIC_HPP__ 58 | -------------------------------------------------------------------------------- /benchmark/ux-trie/ux.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010 Daisuke Okanohara 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above Copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above Copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | */ 19 | 20 | #ifndef UX_HPP__ 21 | #define UX_HPP__ 22 | 23 | #include "uxTrie.hpp" 24 | #include "uxMap.hpp" 25 | 26 | #endif // UX_HPP__ 27 | -------------------------------------------------------------------------------- /benchmark/ux-trie/uxMap.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhirose/cpp-fstlib/89b47e6300fe0457e5160a1dfb25fe93bd2efcd4/benchmark/ux-trie/uxMap.cpp -------------------------------------------------------------------------------- /benchmark/ux-trie/uxMapTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010 Daisuke Okanohara 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above Copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above Copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include "uxMap.hpp" 24 | 25 | using namespace std; 26 | 27 | TEST(uxmap, trivial){ 28 | ux::Map uxm; 29 | 30 | vector wordList; 31 | vector valueList; 32 | uxm.build(wordList); 33 | ASSERT_EQ(0, uxm.size()); 34 | } 35 | 36 | 37 | TEST(uxmap, simple){ 38 | vector wordList; 39 | vector valueList; 40 | wordList.push_back("i"); 41 | valueList.push_back(1); 42 | wordList.push_back("in"); 43 | valueList.push_back(2); 44 | wordList.push_back("to"); 45 | valueList.push_back(3); 46 | wordList.push_back("we"); 47 | valueList.push_back(4); 48 | wordList.push_back("inn"); 49 | valueList.push_back(5); 50 | wordList.push_back("tea"); 51 | valueList.push_back(6); 52 | wordList.push_back("ten"); 53 | valueList.push_back(7); 54 | 55 | vector origWordList = wordList; 56 | ux::Map uxm; 57 | uxm.build(wordList); 58 | 59 | for (size_t i = 0; i < origWordList.size(); ++i){ 60 | string key = origWordList[i]; 61 | ASSERT_EQ(0, uxm.set(key.c_str(), key.size(), valueList[i])); 62 | } 63 | 64 | for (size_t i = 0; i < origWordList.size(); ++i){ 65 | string key = origWordList[i]; 66 | int ret = -1; 67 | ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret)); 68 | ASSERT_EQ(valueList[i], ret); 69 | } 70 | } 71 | 72 | TEST(uxmap, pair){ 73 | vector > kvs; 74 | kvs.push_back(make_pair("i", 1)); 75 | kvs.push_back(make_pair("in", 2)); 76 | kvs.push_back(make_pair("to", 3)); 77 | kvs.push_back(make_pair("we", 4)); 78 | kvs.push_back(make_pair("inn", 5)); 79 | kvs.push_back(make_pair("tea", 6)); 80 | kvs.push_back(make_pair("ten", 7)); 81 | 82 | ux::Map uxm; 83 | uxm.build(kvs); 84 | 85 | for (size_t i = 0; i < kvs.size(); ++i){ 86 | int ret = -1; 87 | string key = kvs[i].first; 88 | ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret)); 89 | ASSERT_EQ(kvs[i].second, ret); 90 | } 91 | } 92 | 93 | TEST(uxmap, map){ 94 | map kvs; 95 | kvs[string("i")] = 1; 96 | kvs[string("in")] = 2; 97 | kvs[string("to")] = 3; 98 | kvs[string("we")] = 4; 99 | kvs[string("inn")] = 5; 100 | kvs[string("tea")] = 6; 101 | kvs[string("ten")] = 7; 102 | 103 | ux::Map uxm; 104 | uxm.build(kvs); 105 | for (map::const_iterator it = kvs.begin(); 106 | it != kvs.end(); ++it){ 107 | string key = it->first; 108 | int ret = -1; 109 | ASSERT_EQ(0, uxm.get(key.c_str(), key.size(), ret)); 110 | ASSERT_EQ(it->second, ret); 111 | } 112 | } 113 | 114 | TEST(uxmap, save){ 115 | map kvs; 116 | kvs[string("i")] = 1; 117 | kvs[string("in")] = 2; 118 | kvs[string("to")] = 3; 119 | kvs[string("we")] = 4; 120 | kvs[string("inn")] = 5; 121 | kvs[string("tea")] = 6; 122 | kvs[string("ten")] = 7; 123 | 124 | ux::Map uxm; 125 | uxm.build(kvs); 126 | 127 | ostringstream os; 128 | ASSERT_EQ(0, uxm.save(os)); 129 | istringstream is(os.str()); 130 | ux::Map uxm_load; 131 | ASSERT_EQ(0, uxm_load.load(is)); 132 | for (map::const_iterator it = kvs.begin(); 133 | it != kvs.end(); ++it){ 134 | string key = it->first; 135 | int ret = -1; 136 | ASSERT_EQ(0, uxm_load.get(key.c_str(), key.size(), ret)); 137 | ASSERT_EQ(it->second, ret); 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /benchmark/ux-trie/uxUtil.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010 Daisuke Okanohara 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above Copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above Copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | */ 19 | 20 | #include "uxUtil.hpp" 21 | 22 | namespace ux{ 23 | 24 | uint64_t lg2(const uint64_t x){ 25 | uint64_t ret = 0; 26 | while (x >> ret){ 27 | ++ret; 28 | } 29 | return ret; 30 | } 31 | 32 | uint64_t mask(uint64_t x, uint64_t pos){ 33 | return x & ((1LLU << pos) - 1); 34 | } 35 | 36 | uint64_t popCount(uint64_t r) { 37 | r = (r & 0x5555555555555555ULL) + 38 | ((r >> 1) & 0x5555555555555555ULL); 39 | r = (r & 0x3333333333333333ULL) + 40 | ((r >> 2) & 0x3333333333333333ULL); 41 | r = (r + (r >> 4)) & 0x0f0f0f0f0f0f0f0fULL; 42 | r = r + (r >> 8); 43 | r = r + (r >> 16); 44 | r = r + (r >> 32); 45 | return (uint64_t)(r & 0x7f); 46 | } 47 | 48 | uint64_t popCountMasked(uint64_t x, uint64_t pos){ 49 | return popCount(mask(x, pos)); 50 | } 51 | 52 | uint64_t selectBlock(uint64_t r, uint64_t x, uint8_t b) { 53 | if (!b) x = ~x; 54 | uint64_t x1 = x - ((x & 0xAAAAAAAAAAAAAAAALLU) >> 1); 55 | uint64_t x2 = (x1 & 0x3333333333333333LLU) + ((x1 >> 2) & 0x3333333333333333LLU); 56 | uint64_t x3 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FLLU; 57 | 58 | uint64_t pos = 0; 59 | for (;; pos += 8){ 60 | uint64_t b = (x3 >> pos) & 0xFFLLU; 61 | if (r <= b) break; 62 | r -= b; 63 | } 64 | 65 | uint64_t v2 = (x2 >> pos) & 0xFLLU; 66 | if (r > v2) { 67 | r -= v2; 68 | pos += 4; 69 | } 70 | 71 | uint64_t v1 = (x1 >> pos) & 0x3LLU; 72 | if (r > v1){ 73 | r -= v1; 74 | pos += 2; 75 | } 76 | 77 | uint64_t v0 = (x >> pos) & 0x1LLU; 78 | if (v0 < r){ 79 | r -= v0; 80 | pos += 1; 81 | } 82 | 83 | return pos; 84 | } 85 | 86 | uint64_t getBitNum(uint64_t oneNum, uint64_t num, uint8_t bit){ 87 | if (bit) return oneNum; 88 | else return num - oneNum; 89 | } 90 | 91 | } 92 | 93 | -------------------------------------------------------------------------------- /benchmark/ux-trie/uxUtil.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010 Daisuke Okanohara 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 8 | * 1. Redistributions of source code must retain the above Copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above Copyright 12 | * notice, this list of conditions and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the authors nor the names of its contributors 16 | * may be used to endorse or promote products derived from this 17 | * software without specific prior written permission. 18 | */ 19 | 20 | #ifndef UX_UTIL_HPP__ 21 | #define UX_UTIL_HPP__ 22 | 23 | #include 24 | 25 | namespace ux { 26 | uint64_t lg2(uint64_t x); 27 | uint64_t mask(uint64_t x, uint64_t pos); 28 | uint64_t popCount(uint64_t r); 29 | uint64_t popCountMasked(uint64_t x, uint64_t pos); 30 | uint64_t selectBlock(uint64_t pos, uint64_t x, uint8_t b); 31 | uint64_t getBitNum(uint64_t oneNum, uint64_t num, uint8_t bit); 32 | } 33 | 34 | #endif // UX_UTIL_HPP__ 35 | 36 | -------------------------------------------------------------------------------- /benchmark/ux-trie/wscript: -------------------------------------------------------------------------------- 1 | APPNAME= 'ux' 2 | 3 | top = '.' 4 | out = 'bin' 5 | 6 | def options(ctx): 7 | pass 8 | 9 | def configure(ctx): 10 | pass 11 | 12 | def build(bld): 13 | bld.shlib( 14 | source = 'uxTrie.cpp bitVec.cpp rsDic.cpp uxUtil.cpp uxMap.cpp', 15 | target = 'ux', 16 | name = 'UX', 17 | includes = '.') 18 | bld.program( 19 | source = 'uxMain.cpp', 20 | target = 'ux', 21 | includes = '.', 22 | use = 'UX') 23 | bld.program( 24 | features = 'gtest', 25 | source = 'uxTest.cpp', 26 | target = 'ux_test', 27 | use = 'UX' , 28 | includes = '.') 29 | bld.program( 30 | features = 'gtest', 31 | source = 'bitVecTest.cpp', 32 | target = 'bitvec_test', 33 | use = 'UX', 34 | includes = '.') 35 | 36 | bld.program( 37 | features = 'gtest', 38 | source = 'uxMapTest.cpp', 39 | target = 'uxmap_test', 40 | use = 'UX', 41 | includes = '.') 42 | 43 | bld.install_files('${PREFIX}/include/ux', bld.path.ant_glob('*.hpp')) 44 | -------------------------------------------------------------------------------- /cmd/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(cmd) 3 | 4 | add_executable(fst main.cc) 5 | 6 | target_include_directories(fst PRIVATE ..) 7 | -------------------------------------------------------------------------------- /fstlib.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhirose/cpp-fstlib/89b47e6300fe0457e5160a1dfb25fe93bd2efcd4/fstlib.pdf -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(test) 3 | 4 | include(FetchContent) 5 | FetchContent_Declare( 6 | googletest 7 | URL https://github.com/google/googletest/archive/609281088cfefc76f9d0ce82e1ff6c30cc3591e5.zip 8 | ) 9 | # For Windows: Prevent overriding the parent project's compiler/linker settings 10 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 11 | FetchContent_MakeAvailable(googletest) 12 | 13 | enable_testing() 14 | 15 | add_executable(test-main test.cc) 16 | 17 | target_include_directories(test-main PRIVATE ..) 18 | target_link_libraries(test-main PRIVATE gtest_main) 19 | 20 | include(GoogleTest) 21 | gtest_discover_tests(test-main) 22 | --------------------------------------------------------------------------------