├── LICENSE ├── README.md ├── releases ├── rime-table-decompiler_linux-64.7z └── rime-table-decompiler_win.7z └── src ├── CMakeLists.txt ├── common.h ├── darts.h ├── main.cpp ├── mapped_file.cc ├── mapped_file.h ├── marisa.h ├── marisa ├── agent.cc ├── agent.h ├── base.h ├── exception.h ├── grimoire │ ├── algorithm.h │ ├── algorithm │ │ └── sort.h │ ├── intrin.h │ ├── io.h │ ├── io │ │ ├── mapper.cc │ │ ├── mapper.h │ │ ├── reader.cc │ │ ├── reader.h │ │ ├── writer.cc │ │ └── writer.h │ ├── trie.h │ ├── trie │ │ ├── cache.h │ │ ├── config.h │ │ ├── entry.h │ │ ├── header.h │ │ ├── history.h │ │ ├── key.h │ │ ├── louds-trie.cc │ │ ├── louds-trie.h │ │ ├── range.h │ │ ├── state.h │ │ ├── tail.cc │ │ └── tail.h │ ├── vector.h │ └── vector │ │ ├── bit-vector.cc │ │ ├── bit-vector.h │ │ ├── flat-vector.h │ │ ├── pop-count.h │ │ ├── rank-index.h │ │ └── vector.h ├── iostream.h ├── key.h ├── keyset.cc ├── keyset.h ├── mstdio.h ├── query.h ├── scoped-array.h ├── scoped-ptr.h ├── trie.cc └── trie.h ├── no_logging.h ├── spelling.h ├── string_table.cc ├── string_table.h ├── table.cc ├── table.h ├── thirdparty └── marisa-trie │ ├── .gitignore │ ├── AUTHORS │ ├── COPYING │ ├── ChangeLog │ ├── INSTALL │ ├── Makefile │ ├── Makefile.am │ ├── Makefile.in │ ├── NEWS │ ├── README │ ├── aclocal.m4 │ ├── bindings │ ├── Makefile │ ├── marisa-swig.cxx │ ├── marisa-swig.h │ ├── marisa-swig.i │ ├── perl │ │ ├── Makefile.PL │ │ ├── marisa-swig.cxx │ │ ├── marisa-swig.h │ │ ├── marisa-swig_wrap.cxx │ │ ├── marisa.pm │ │ ├── sample.dic │ │ └── sample.pl │ ├── python │ │ ├── marisa-swig.cxx │ │ ├── marisa-swig.h │ │ ├── marisa-swig_wrap.cxx │ │ ├── marisa.py │ │ ├── sample.dic │ │ ├── sample.py │ │ └── setup.py │ └── ruby │ │ ├── extconf.rb │ │ ├── marisa-swig.cxx │ │ ├── marisa-swig.h │ │ ├── marisa-swig_wrap.cxx │ │ ├── sample.dic │ │ └── sample.rb │ ├── config.guess │ ├── config.log │ ├── config.status │ ├── config.sub │ ├── configure │ ├── configure.ac │ ├── depcomp │ ├── docs │ ├── readme.en.html │ ├── readme.ja.html │ └── style.css │ ├── install-sh │ ├── lib │ ├── .libs │ │ ├── agent.o │ │ ├── bit-vector.o │ │ ├── keyset.o │ │ ├── libmarisa.0.dylib │ │ ├── libmarisa.a │ │ ├── libmarisa.dylib │ │ ├── libmarisa.la │ │ ├── libmarisa.lai │ │ ├── louds-trie.o │ │ ├── mapper.o │ │ ├── reader.o │ │ ├── tail.o │ │ ├── trie.o │ │ └── writer.o │ ├── Makefile │ ├── Makefile.am │ ├── Makefile.in │ ├── agent.lo │ ├── agent.o │ ├── bit-vector.lo │ ├── bit-vector.o │ ├── keyset.lo │ ├── keyset.o │ ├── libmarisa.la │ ├── louds-trie.lo │ ├── louds-trie.o │ ├── mapper.lo │ ├── mapper.o │ ├── marisa.h │ ├── marisa │ │ ├── agent.cc │ │ ├── agent.h │ │ ├── base.h │ │ ├── exception.h │ │ ├── grimoire │ │ │ ├── algorithm.h │ │ │ ├── algorithm │ │ │ │ └── sort.h │ │ │ ├── intrin.h │ │ │ ├── io.h │ │ │ ├── io │ │ │ │ ├── mapper.cc │ │ │ │ ├── mapper.h │ │ │ │ ├── reader.cc │ │ │ │ ├── reader.h │ │ │ │ ├── writer.cc │ │ │ │ └── writer.h │ │ │ ├── trie.h │ │ │ ├── trie │ │ │ │ ├── cache.h │ │ │ │ ├── config.h │ │ │ │ ├── entry.h │ │ │ │ ├── header.h │ │ │ │ ├── history.h │ │ │ │ ├── key.h │ │ │ │ ├── louds-trie.cc │ │ │ │ ├── louds-trie.h │ │ │ │ ├── range.h │ │ │ │ ├── state.h │ │ │ │ ├── tail.cc │ │ │ │ └── tail.h │ │ │ ├── vector.h │ │ │ └── vector │ │ │ │ ├── bit-vector.cc │ │ │ │ ├── bit-vector.h │ │ │ │ ├── flat-vector.h │ │ │ │ ├── pop-count.h │ │ │ │ ├── rank-index.h │ │ │ │ └── vector.h │ │ ├── iostream.h │ │ ├── key.h │ │ ├── keyset.cc │ │ ├── keyset.h │ │ ├── mstdio.h │ │ ├── query.h │ │ ├── scoped-array.h │ │ ├── scoped-ptr.h │ │ ├── trie.cc │ │ └── trie.h │ ├── reader.lo │ ├── reader.o │ ├── tail.lo │ ├── tail.o │ ├── trie.lo │ ├── trie.o │ ├── writer.lo │ └── writer.o │ ├── libtool │ ├── ltmain.sh │ ├── m4 │ ├── libtool.m4 │ ├── ltoptions.m4 │ ├── ltsugar.m4 │ ├── ltversion.m4 │ └── lt~obsolete.m4 │ ├── marisa.pc │ ├── marisa.pc.in │ ├── missing │ ├── tests │ ├── Makefile │ ├── Makefile.am │ ├── Makefile.in │ ├── base-test.cc │ ├── io-test.cc │ ├── marisa-assert.h │ ├── marisa-test.cc │ ├── trie-test.cc │ └── vector-test.cc │ ├── tools │ ├── .libs │ │ ├── cmdopt.o │ │ ├── libcmdopt.a │ │ ├── libcmdopt.la │ │ ├── marisa-benchmark │ │ ├── marisa-build │ │ ├── marisa-common-prefix-search │ │ ├── marisa-dump │ │ ├── marisa-lookup │ │ ├── marisa-predictive-search │ │ └── marisa-reverse-lookup │ ├── Makefile │ ├── Makefile.am │ ├── Makefile.in │ ├── cmdopt.cc │ ├── cmdopt.h │ ├── cmdopt.lo │ ├── cmdopt.o │ ├── libcmdopt.la │ ├── marisa-benchmark │ ├── marisa-benchmark.cc │ ├── marisa-benchmark.o │ ├── marisa-build │ ├── marisa-build.cc │ ├── marisa-build.o │ ├── marisa-common-prefix-search │ ├── marisa-common-prefix-search.cc │ ├── marisa-common-prefix-search.o │ ├── marisa-dump │ ├── marisa-dump.cc │ ├── marisa-dump.o │ ├── marisa-lookup │ ├── marisa-lookup.cc │ ├── marisa-lookup.o │ ├── marisa-predictive-search │ ├── marisa-predictive-search.cc │ ├── marisa-predictive-search.o │ ├── marisa-reverse-lookup │ ├── marisa-reverse-lookup.cc │ └── marisa-reverse-lookup.o │ ├── vs2008 │ ├── base-test │ │ └── base-test.vcproj │ ├── io-test │ │ └── io-test.vcproj │ ├── libmarisa │ │ └── libmarisa.vcproj │ ├── marisa-benchmark │ │ └── marisa-benchmark.vcproj │ ├── marisa-build │ │ └── marisa-build.vcproj │ ├── marisa-common-prefix-search │ │ └── marisa-common-prefix-search.vcproj │ ├── marisa-dump │ │ └── marisa-dump.vcproj │ ├── marisa-lookup │ │ └── marisa-lookup.vcproj │ ├── marisa-predictive-search │ │ └── marisa-predictive-search.vcproj │ ├── marisa-reverse-lookup │ │ └── marisa-reverse-lookup.vcproj │ ├── marisa-test │ │ └── marisa-test.vcproj │ ├── trie-test │ │ └── trie-test.vcproj │ ├── vector-test │ │ └── vector-test.vcproj │ └── vs2008.sln │ ├── vs2010 │ ├── base-test │ │ ├── base-test.vcxproj │ │ └── base-test.vcxproj.filters │ ├── io-test │ │ ├── io-test.vcxproj │ │ └── io-test.vcxproj.filters │ ├── libmarisa │ │ ├── libmarisa.vcxproj │ │ └── libmarisa.vcxproj.filters │ ├── marisa-benchmark │ │ ├── marisa-benchmark.vcxproj │ │ └── marisa-benchmark.vcxproj.filters │ ├── marisa-build │ │ ├── marisa-build.vcxproj │ │ └── marisa-build.vcxproj.filters │ ├── marisa-common-prefix-search │ │ ├── marisa-common-prefix-search.vcxproj │ │ └── marisa-common-prefix-search.vcxproj.filters │ ├── marisa-dump │ │ ├── marisa-dump.vcxproj │ │ └── marisa-dump.vcxproj.filters │ ├── marisa-lookup │ │ ├── marisa-lookup.vcxproj │ │ └── marisa-lookup.vcxproj.filters │ ├── marisa-predictive-search │ │ ├── marisa-predictive-search.vcxproj │ │ └── marisa-predictive-search.vcxproj.filters │ ├── marisa-reverse-lookup │ │ ├── marisa-reverse-lookup.vcxproj │ │ └── marisa-reverse-lookup.vcxproj.filters │ ├── marisa-test │ │ ├── marisa-test.vcxproj │ │ └── marisa-test.vcxproj.filters │ ├── trie-test │ │ ├── trie-test.vcxproj │ │ └── trie-test.vcxproj.filters │ ├── vector-test │ │ ├── vector-test.vcxproj │ │ └── vector-test.vcxproj.filters │ └── vs2010.sln │ ├── vs2013 │ ├── base-test │ │ ├── base-test.vcxproj │ │ └── base-test.vcxproj.filters │ ├── io-test │ │ ├── io-test.vcxproj │ │ └── io-test.vcxproj.filters │ ├── libmarisa │ │ ├── libmarisa.vcxproj │ │ └── libmarisa.vcxproj.filters │ ├── marisa-benchmark │ │ ├── marisa-benchmark.vcxproj │ │ └── marisa-benchmark.vcxproj.filters │ ├── marisa-build │ │ ├── marisa-build.vcxproj │ │ └── marisa-build.vcxproj.filters │ ├── marisa-common-prefix-search │ │ ├── marisa-common-prefix-search.vcxproj │ │ └── marisa-common-prefix-search.vcxproj.filters │ ├── marisa-dump │ │ ├── marisa-dump.vcxproj │ │ └── marisa-dump.vcxproj.filters │ ├── marisa-lookup │ │ ├── marisa-lookup.vcxproj │ │ └── marisa-lookup.vcxproj.filters │ ├── marisa-predictive-search │ │ ├── marisa-predictive-search.vcxproj │ │ └── marisa-predictive-search.vcxproj.filters │ ├── marisa-reverse-lookup │ │ ├── marisa-reverse-lookup.vcxproj │ │ └── marisa-reverse-lookup.vcxproj.filters │ ├── marisa-test │ │ ├── marisa-test.vcxproj │ │ └── marisa-test.vcxproj.filters │ ├── trie-test │ │ ├── trie-test.vcxproj │ │ └── trie-test.vcxproj.filters │ ├── vector-test │ │ ├── vector-test.vcxproj │ │ └── vector-test.vcxproj.filters │ └── vs2013.sln │ └── vs2015 │ ├── base-test │ ├── base-test.vcxproj │ └── base-test.vcxproj.filters │ ├── io-test │ ├── io-test.vcxproj │ └── io-test.vcxproj.filters │ ├── libmarisa │ ├── libmarisa.vcxproj │ └── libmarisa.vcxproj.filters │ ├── marisa-benchmark │ ├── marisa-benchmark.vcxproj │ └── marisa-benchmark.vcxproj.filters │ ├── marisa-build │ ├── marisa-build.vcxproj │ └── marisa-build.vcxproj.filters │ ├── marisa-common-prefix-search │ ├── marisa-common-prefix-search.vcxproj │ └── marisa-common-prefix-search.vcxproj.filters │ ├── marisa-dump │ ├── marisa-dump.vcxproj │ └── marisa-dump.vcxproj.filters │ ├── marisa-lookup │ ├── marisa-lookup.vcxproj │ └── marisa-lookup.vcxproj.filters │ ├── marisa-predictive-search │ ├── marisa-predictive-search.vcxproj │ └── marisa-predictive-search.vcxproj.filters │ ├── marisa-reverse-lookup │ ├── marisa-reverse-lookup.vcxproj │ └── marisa-reverse-lookup.vcxproj.filters │ ├── marisa-test │ ├── marisa-test.vcxproj │ └── marisa-test.vcxproj.filters │ ├── trie-test │ ├── trie-test.vcxproj │ └── trie-test.vcxproj.filters │ ├── vector-test │ ├── vector-test.vcxproj │ └── vector-test.vcxproj.filters │ └── vs2015.sln ├── utf8.h ├── vocabulary.cc └── vocabulary.h /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Aj-Ash 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rime-table-decompiler 2 | 3 | 反编译 Rime 的 ``xxx.table.bin`` 二进制词典文件。 4 | 5 | ## 简介 6 | 7 | 本项目 Fork 自 [whjiang/rime_table_bin_decompiler]。 8 | 9 | 相对于原项目,本项目有以下的更改。 10 | 11 | + 修复了在 Linux 系统上的编译错误 12 | + 更易读的 README 13 | 14 | 本项目用于简单的反编译 Rime 的 ``xxx.table.bin`` 二进制词典文件, 15 | 以生成 ``xxx.dict.yaml`` 源纯文本词典文件。 16 | 17 | 需要注意的是,由于 ``xxx.table.bin`` 二进制词库文件没有元数据 18 | 信息,所以生成的 ``xxx.dict.yaml`` 源纯文本词典文件的文件头中 19 | 的元数据信息是根据常见的元数据信息填补进去的,所以可能是错误的, 20 | 需自行修正。 21 | 22 | [whjiang/rime_table_bin_decompiler]: https://github.com/whjiang/rime_table_bin_decompiler 23 | 24 | ## 下载 25 | 26 | 如果你懒的编译可以下载已编译好的程序 (支持 Linux、Windows)。 27 | 28 | + [Github Releases] 29 | 30 | [Github Releases]: https://github.com/aj-ash/rime-table-decompiler/tree/master/releases 31 | 32 | ## 编译 33 | 34 | ### 工具链 35 | 36 | + GNU toolchain 37 | + Boost 38 | + CMake 39 | 40 | ### 步骤 41 | 42 | 1. ``cd rime-table-decompiler/src`` 43 | 2. ``mkdir build && cd build`` 44 | 3. ``cmake ..`` 45 | 4. ``make`` 46 | 47 | ## 使用 48 | 49 | 反编译二进制词库并标准输出。 50 | 51 | ```Bash 52 | ./rime-table-decompiler xxx.table.bin 53 | ``` 54 | 55 | 反编译二进制词库并输出到纯文本词库文件中。 56 | 57 | ```Bash 58 | ./rime-table-decompiler xxx.table.bin > xxx.dict.yaml 59 | ``` 60 | -------------------------------------------------------------------------------- /releases/rime-table-decompiler_linux-64.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/releases/rime-table-decompiler_linux-64.7z -------------------------------------------------------------------------------- /releases/rime-table-decompiler_win.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/releases/rime-table-decompiler_win.7z -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright RIME Developers 3 | // Distributed under the BSD License 4 | // 5 | // 2011-03-14 GONG Chen 6 | // 7 | #ifndef RIME_COMMON_H_ 8 | #define RIME_COMMON_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #define BOOST_BIND_NO_PLACEHOLDERS 23 | #ifdef BOOST_SIGNALS2 24 | #include 25 | #else 26 | #include 27 | #include 28 | #endif 29 | 30 | #ifdef RIME_ENABLE_LOGGING 31 | #include 32 | #else 33 | #include "no_logging.h" 34 | #endif // RIME_ENABLE_LOGGING 35 | 36 | // call a pointer to member function on this 37 | #define RIME_THIS_CALL(f) (this->*(f)) 38 | 39 | #define RIME_THIS_CALL_AS(T, f) ((T*)this->*(f)) 40 | 41 | #define RIME_API 42 | 43 | namespace rime { 44 | 45 | using std::function; 46 | using std::list; 47 | using std::make_pair; 48 | using std::map; 49 | using std::pair; 50 | using std::set; 51 | using std::string; 52 | using std::vector; 53 | using boost::optional; 54 | 55 | template 56 | using hash_map = std::unordered_map; 57 | template 58 | using hash_set = std::unordered_set; 59 | 60 | template 61 | using the = std::unique_ptr; 62 | template 63 | using an = std::shared_ptr; 64 | template 65 | using of = an; 66 | template 67 | using weak = std::weak_ptr; 68 | 69 | template 70 | inline an As(const an& ptr) { 71 | return std::dynamic_pointer_cast(ptr); 72 | } 73 | 74 | template 75 | inline bool Is(const an& ptr) { 76 | return bool(As(ptr)); 77 | } 78 | 79 | template 80 | inline an New(Args&&... args) { 81 | return std::make_shared(std::forward(args)...); 82 | } 83 | 84 | #ifdef BOOST_SIGNALS2 85 | using boost::signals::connection; 86 | using boost::signal; 87 | #else 88 | using boost::signals2::connection; 89 | using boost::signals2::signal; 90 | #endif 91 | 92 | } // namespace rime 93 | 94 | #endif // RIME_COMMON_H_ 95 | -------------------------------------------------------------------------------- /src/marisa.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_H_ 2 | #define MARISA_H_ 3 | 4 | // "marisa/stdio.h" includes for I/O using std::FILE. 5 | #include "mstdio.h" 6 | 7 | // "marisa/iostream.h" includes for I/O using std::iostream. 8 | #include "marisa/iostream.h" 9 | 10 | // You can use instead of if you don't need the 11 | // above I/O interfaces and don't want to include the above I/O headers. 12 | #include "marisa/trie.h" 13 | 14 | #endif // MARISA_H_ 15 | -------------------------------------------------------------------------------- /src/marisa/agent.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "agent.h" 4 | #include "grimoire/trie.h" 5 | 6 | namespace marisa { 7 | 8 | Agent::Agent() : query_(), key_(), state_() {} 9 | 10 | Agent::~Agent() {} 11 | 12 | void Agent::set_query(const char *str) { 13 | MARISA_THROW_IF(str == NULL, MARISA_NULL_ERROR); 14 | if (state_.get() != NULL) { 15 | state_->reset(); 16 | } 17 | query_.set_str(str); 18 | } 19 | 20 | void Agent::set_query(const char *ptr, std::size_t length) { 21 | MARISA_THROW_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 22 | if (state_.get() != NULL) { 23 | state_->reset(); 24 | } 25 | query_.set_str(ptr, length); 26 | } 27 | 28 | void Agent::set_query(std::size_t key_id) { 29 | if (state_.get() != NULL) { 30 | state_->reset(); 31 | } 32 | query_.set_id(key_id); 33 | } 34 | 35 | void Agent::init_state() { 36 | MARISA_THROW_IF(state_.get() != NULL, MARISA_STATE_ERROR); 37 | state_.reset(new (std::nothrow) grimoire::State); 38 | MARISA_THROW_IF(state_.get() == NULL, MARISA_MEMORY_ERROR); 39 | } 40 | 41 | void Agent::clear() { 42 | Agent().swap(*this); 43 | } 44 | 45 | void Agent::swap(Agent &rhs) { 46 | query_.swap(rhs.query_); 47 | key_.swap(rhs.key_); 48 | state_.swap(rhs.state_); 49 | } 50 | 51 | } // namespace marisa 52 | -------------------------------------------------------------------------------- /src/marisa/agent.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_AGENT_H_ 2 | #define MARISA_AGENT_H_ 3 | 4 | #include "key.h" 5 | #include "query.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace trie { 10 | 11 | class State; 12 | 13 | } // namespace trie 14 | } // namespace grimoire 15 | 16 | class Agent { 17 | public: 18 | Agent(); 19 | ~Agent(); 20 | 21 | const Query &query() const { 22 | return query_; 23 | } 24 | const Key &key() const { 25 | return key_; 26 | } 27 | 28 | void set_query(const char *str); 29 | void set_query(const char *ptr, std::size_t length); 30 | void set_query(std::size_t key_id); 31 | 32 | const grimoire::trie::State &state() const { 33 | return *state_; 34 | } 35 | grimoire::trie::State &state() { 36 | return *state_; 37 | } 38 | 39 | void set_key(const char *str) { 40 | MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR); 41 | key_.set_str(str); 42 | } 43 | void set_key(const char *ptr, std::size_t length) { 44 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 45 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 46 | key_.set_str(ptr, length); 47 | } 48 | void set_key(std::size_t id) { 49 | MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 50 | key_.set_id(id); 51 | } 52 | 53 | bool has_state() const { 54 | return state_.get() != NULL; 55 | } 56 | void init_state(); 57 | 58 | void clear(); 59 | void swap(Agent &rhs); 60 | 61 | private: 62 | Query query_; 63 | Key key_; 64 | scoped_ptr state_; 65 | 66 | // Disallows copy and assignment. 67 | Agent(const Agent &); 68 | Agent &operator=(const Agent &); 69 | }; 70 | 71 | } // namespace marisa 72 | 73 | #endif // MARISA_AGENT_H_ 74 | -------------------------------------------------------------------------------- /src/marisa/exception.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_EXCEPTION_H_ 2 | #define MARISA_EXCEPTION_H_ 3 | 4 | #include 5 | 6 | #include "base.h" 7 | 8 | namespace marisa { 9 | 10 | // An exception object keeps a filename, a line number, an error code and an 11 | // error message. The message format is as follows: 12 | // "__FILE__:__LINE__: error_code: error_message" 13 | class Exception : public std::exception { 14 | public: 15 | Exception(const char *filename, int line, 16 | ErrorCode error_code, const char *error_message) 17 | : std::exception(), filename_(filename), line_(line), 18 | error_code_(error_code), error_message_(error_message) {} 19 | Exception(const Exception &ex) 20 | : std::exception(), filename_(ex.filename_), line_(ex.line_), 21 | error_code_(ex.error_code_), error_message_(ex.error_message_) {} 22 | virtual ~Exception() throw() {} 23 | 24 | Exception &operator=(const Exception &rhs) { 25 | filename_ = rhs.filename_; 26 | line_ = rhs.line_; 27 | error_code_ = rhs.error_code_; 28 | error_message_ = rhs.error_message_; 29 | return *this; 30 | } 31 | 32 | const char *filename() const { 33 | return filename_; 34 | } 35 | int line() const { 36 | return line_; 37 | } 38 | ErrorCode error_code() const { 39 | return error_code_; 40 | } 41 | const char *error_message() const { 42 | return error_message_; 43 | } 44 | 45 | virtual const char *what() const throw() { 46 | return error_message_; 47 | } 48 | 49 | private: 50 | const char *filename_; 51 | int line_; 52 | ErrorCode error_code_; 53 | const char *error_message_; 54 | }; 55 | 56 | // These macros are used to convert a line number to a string constant. 57 | #define MARISA_INT_TO_STR(value) #value 58 | #define MARISA_LINE_TO_STR(line) MARISA_INT_TO_STR(line) 59 | #define MARISA_LINE_STR MARISA_LINE_TO_STR(__LINE__) 60 | 61 | // MARISA_THROW throws an exception with a filename, a line number, an error 62 | // code and an error message. The message format is as follows: 63 | // "__FILE__:__LINE__: error_code: error_message" 64 | #define MARISA_THROW(error_code, error_message) \ 65 | (throw marisa::Exception(__FILE__, __LINE__, error_code, \ 66 | __FILE__ ":" MARISA_LINE_STR ": " #error_code ": " error_message)) 67 | 68 | // MARISA_THROW_IF throws an exception if `condition' is true. 69 | #define MARISA_THROW_IF(condition, error_code) \ 70 | (void)((!(condition)) || (MARISA_THROW(error_code, #condition), 0)) 71 | 72 | // MARISA_DEBUG_IF is ignored if _DEBUG is undefined. So, it is useful for 73 | // debugging time-critical codes. 74 | #ifdef _DEBUG 75 | #define MARISA_DEBUG_IF(cond, error_code) MARISA_THROW_IF(cond, error_code) 76 | #else 77 | #define MARISA_DEBUG_IF(cond, error_code) 78 | #endif 79 | 80 | } // namespace marisa 81 | 82 | #endif // MARISA_EXCEPTION_H_ 83 | -------------------------------------------------------------------------------- /src/marisa/grimoire/algorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_ALGORITHM_H_ 2 | #define MARISA_GRIMOIRE_ALGORITHM_H_ 3 | 4 | #include "algorithm/sort.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | 9 | class Algorithm { 10 | public: 11 | Algorithm() {} 12 | 13 | template 14 | std::size_t sort(Iterator begin, Iterator end) const { 15 | return algorithm::sort(begin, end); 16 | } 17 | 18 | private: 19 | Algorithm(const Algorithm &); 20 | Algorithm &operator=(const Algorithm &); 21 | }; 22 | 23 | } // namespace grimoire 24 | } // namespace marisa 25 | 26 | #endif // MARISA_GRIMOIRE_ALGORITHM_H_ 27 | -------------------------------------------------------------------------------- /src/marisa/grimoire/intrin.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_INTRIN_H_ 2 | #define MARISA_GRIMOIRE_INTRIN_H_ 3 | 4 | #include "../base.h" 5 | 6 | #if defined(__x86_64__) || defined(_M_X64) 7 | #define MARISA_X64 8 | #elif defined(__i386__) || defined(_M_IX86) 9 | #define MARISA_X86 10 | #else // defined(__i386__) || defined(_M_IX86) 11 | #ifdef MARISA_USE_POPCNT 12 | #undef MARISA_USE_POPCNT 13 | #endif // MARISA_USE_POPCNT 14 | #ifdef MARISA_USE_SSE4A 15 | #undef MARISA_USE_SSE4A 16 | #endif // MARISA_USE_SSE4A 17 | #ifdef MARISA_USE_SSE4 18 | #undef MARISA_USE_SSE4 19 | #endif // MARISA_USE_SSE4 20 | #ifdef MARISA_USE_SSE4_2 21 | #undef MARISA_USE_SSE4_2 22 | #endif // MARISA_USE_SSE4_2 23 | #ifdef MARISA_USE_SSE4_1 24 | #undef MARISA_USE_SSE4_1 25 | #endif // MARISA_USE_SSE4_1 26 | #ifdef MARISA_USE_SSSE3 27 | #undef MARISA_USE_SSSE3 28 | #endif // MARISA_USE_SSSE3 29 | #ifdef MARISA_USE_SSE3 30 | #undef MARISA_USE_SSE3 31 | #endif // MARISA_USE_SSE3 32 | #ifdef MARISA_USE_SSE2 33 | #undef MARISA_USE_SSE2 34 | #endif // MARISA_USE_SSE2 35 | #endif // defined(__i386__) || defined(_M_IX86) 36 | 37 | #ifdef MARISA_USE_POPCNT 38 | #ifndef MARISA_USE_SSE3 39 | #define MARISA_USE_SSE3 40 | #endif // MARISA_USE_SSE3 41 | #ifdef _MSC_VER 42 | #include 43 | #else // _MSC_VER 44 | #include 45 | #endif // _MSC_VER 46 | #endif // MARISA_USE_POPCNT 47 | 48 | #ifdef MARISA_USE_SSE4A 49 | #ifndef MARISA_USE_SSE3 50 | #define MARISA_USE_SSE3 51 | #endif // MARISA_USE_SSE3 52 | #ifndef MARISA_USE_POPCNT 53 | #define MARISA_USE_POPCNT 54 | #endif // MARISA_USE_POPCNT 55 | #endif // MARISA_USE_SSE4A 56 | 57 | #ifdef MARISA_USE_SSE4 58 | #ifndef MARISA_USE_SSE4_2 59 | #define MARISA_USE_SSE4_2 60 | #endif // MARISA_USE_SSE4_2 61 | #endif // MARISA_USE_SSE4 62 | 63 | #ifdef MARISA_USE_SSE4_2 64 | #ifndef MARISA_USE_SSE4_1 65 | #define MARISA_USE_SSE4_1 66 | #endif // MARISA_USE_SSE4_1 67 | #ifndef MARISA_USE_POPCNT 68 | #define MARISA_USE_POPCNT 69 | #endif // MARISA_USE_POPCNT 70 | #endif // MARISA_USE_SSE4_2 71 | 72 | #ifdef MARISA_USE_SSE4_1 73 | #ifndef MARISA_USE_SSSE3 74 | #define MARISA_USE_SSSE3 75 | #endif // MARISA_USE_SSSE3 76 | #endif // MARISA_USE_SSE4_1 77 | 78 | #ifdef MARISA_USE_SSSE3 79 | #ifndef MARISA_USE_SSE3 80 | #define MARISA_USE_SSE3 81 | #endif // MARISA_USE_SSE3 82 | #ifdef MARISA_X64 83 | #define MARISA_X64_SSSE3 84 | #else // MARISA_X64 85 | #define MARISA_X86_SSSE3 86 | #endif // MAIRSA_X64 87 | #include 88 | #endif // MARISA_USE_SSSE3 89 | 90 | #ifdef MARISA_USE_SSE3 91 | #ifndef MARISA_USE_SSE2 92 | #define MARISA_USE_SSE2 93 | #endif // MARISA_USE_SSE2 94 | #endif // MARISA_USE_SSE3 95 | 96 | #ifdef MARISA_USE_SSE2 97 | #ifdef MARISA_X64 98 | #define MARISA_X64_SSE2 99 | #else // MARISA_X64 100 | #define MARISA_X86_SSE2 101 | #endif // MAIRSA_X64 102 | #include 103 | #endif // MARISA_USE_SSE2 104 | 105 | #ifdef _MSC_VER 106 | #if MARISA_WORD_SIZE == 64 107 | #include 108 | #pragma intrinsic(_BitScanForward64) 109 | #else // MARISA_WORD_SIZE == 64 110 | #include 111 | #pragma intrinsic(_BitScanForward) 112 | #endif // MARISA_WORD_SIZE == 64 113 | #endif // _MSC_VER 114 | 115 | #endif // MARISA_GRIMOIRE_INTRIN_H_ 116 | -------------------------------------------------------------------------------- /src/marisa/grimoire/io.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_H_ 2 | #define MARISA_GRIMOIRE_IO_H_ 3 | 4 | #include "io/mapper.h" 5 | #include "io/reader.h" 6 | #include "io/writer.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | 11 | using io::Mapper; 12 | using io::Reader; 13 | using io::Writer; 14 | 15 | } // namespace grimoire 16 | } // namespace marisa 17 | 18 | #endif // MARISA_GRIMOIRE_IO_H_ 19 | -------------------------------------------------------------------------------- /src/marisa/grimoire/io/mapper.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_MAPPER_H_ 2 | #define MARISA_GRIMOIRE_IO_MAPPER_H_ 3 | 4 | #include 5 | 6 | #include "../../base.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | namespace io { 11 | 12 | class Mapper { 13 | public: 14 | Mapper(); 15 | ~Mapper(); 16 | 17 | void open(const char *filename); 18 | void open(const void *ptr, std::size_t size); 19 | 20 | template 21 | void map(T *obj) { 22 | MARISA_THROW_IF(obj == NULL, MARISA_NULL_ERROR); 23 | *obj = *static_cast(map_data(sizeof(T))); 24 | } 25 | 26 | template 27 | void map(const T **objs, std::size_t num_objs) { 28 | MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR); 29 | MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)), 30 | MARISA_SIZE_ERROR); 31 | *objs = static_cast(map_data(sizeof(T) * num_objs)); 32 | } 33 | 34 | void seek(std::size_t size); 35 | 36 | bool is_open() const; 37 | 38 | void clear(); 39 | void swap(Mapper &rhs); 40 | 41 | private: 42 | const void *ptr_; 43 | void *origin_; 44 | std::size_t avail_; 45 | std::size_t size_; 46 | #if (defined _WIN32) || (defined _WIN64) 47 | void *file_; 48 | void *map_; 49 | #else // (defined _WIN32) || (defined _WIN64) 50 | int fd_; 51 | #endif // (defined _WIN32) || (defined _WIN64) 52 | 53 | void open_(const char *filename); 54 | void open_(const void *ptr, std::size_t size); 55 | 56 | const void *map_data(std::size_t size); 57 | 58 | // Disallows copy and assignment. 59 | Mapper(const Mapper &); 60 | Mapper &operator=(const Mapper &); 61 | }; 62 | 63 | } // namespace io 64 | } // namespace grimoire 65 | } // namespace marisa 66 | 67 | #endif // MARISA_GRIMOIRE_IO_MAPPER_H_ 68 | -------------------------------------------------------------------------------- /src/marisa/grimoire/io/reader.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_READER_H_ 2 | #define MARISA_GRIMOIRE_IO_READER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "../../base.h" 8 | 9 | namespace marisa { 10 | namespace grimoire { 11 | namespace io { 12 | 13 | class Reader { 14 | public: 15 | Reader(); 16 | ~Reader(); 17 | 18 | void open(const char *filename); 19 | void open(std::FILE *file); 20 | void open(int fd); 21 | void open(std::istream &stream); 22 | 23 | template 24 | void read(T *obj) { 25 | MARISA_THROW_IF(obj == NULL, MARISA_NULL_ERROR); 26 | read_data(obj, sizeof(T)); 27 | } 28 | 29 | template 30 | void read(T *objs, std::size_t num_objs) { 31 | MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR); 32 | MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)), 33 | MARISA_SIZE_ERROR); 34 | read_data(objs, sizeof(T) * num_objs); 35 | } 36 | 37 | void seek(std::size_t size); 38 | 39 | bool is_open() const; 40 | 41 | void clear(); 42 | void swap(Reader &rhs); 43 | 44 | private: 45 | std::FILE *file_; 46 | int fd_; 47 | std::istream *stream_; 48 | bool needs_fclose_; 49 | 50 | void open_(const char *filename); 51 | void open_(std::FILE *file); 52 | void open_(int fd); 53 | void open_(std::istream &stream); 54 | 55 | void read_data(void *buf, std::size_t size); 56 | 57 | // Disallows copy and assignment. 58 | Reader(const Reader &); 59 | Reader &operator=(const Reader &); 60 | }; 61 | 62 | } // namespace io 63 | } // namespace grimoire 64 | } // namespace marisa 65 | 66 | #endif // MARISA_GRIMOIRE_IO_READER_H_ 67 | -------------------------------------------------------------------------------- /src/marisa/grimoire/io/writer.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_WRITER_H_ 2 | #define MARISA_GRIMOIRE_IO_WRITER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "../../base.h" 8 | 9 | namespace marisa { 10 | namespace grimoire { 11 | namespace io { 12 | 13 | class Writer { 14 | public: 15 | Writer(); 16 | ~Writer(); 17 | 18 | void open(const char *filename); 19 | void open(std::FILE *file); 20 | void open(int fd); 21 | void open(std::ostream &stream); 22 | 23 | template 24 | void write(const T &obj) { 25 | write_data(&obj, sizeof(T)); 26 | } 27 | 28 | template 29 | void write(const T *objs, std::size_t num_objs) { 30 | MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR); 31 | MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)), 32 | MARISA_SIZE_ERROR); 33 | write_data(objs, sizeof(T) * num_objs); 34 | } 35 | 36 | void seek(std::size_t size); 37 | 38 | bool is_open() const; 39 | 40 | void clear(); 41 | void swap(Writer &rhs); 42 | 43 | private: 44 | std::FILE *file_; 45 | int fd_; 46 | std::ostream *stream_; 47 | bool needs_fclose_; 48 | 49 | void open_(const char *filename); 50 | void open_(std::FILE *file); 51 | void open_(int fd); 52 | void open_(std::ostream &stream); 53 | 54 | void write_data(const void *data, std::size_t size); 55 | 56 | // Disallows copy and assignment. 57 | Writer(const Writer &); 58 | Writer &operator=(const Writer &); 59 | }; 60 | 61 | } // namespace io 62 | } // namespace grimoire 63 | } // namespace marisa 64 | 65 | #endif // MARISA_GRIMOIRE_IO_WRITER_H_ 66 | -------------------------------------------------------------------------------- /src/marisa/grimoire/trie.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_H_ 3 | 4 | #include "trie/state.h" 5 | #include "trie/louds-trie.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | 10 | using trie::State; 11 | using trie::LoudsTrie; 12 | 13 | } // namespace grimoire 14 | } // namespace marisa 15 | 16 | #endif // MARISA_GRIMOIRE_TRIE_H_ 17 | -------------------------------------------------------------------------------- /src/marisa/grimoire/trie/cache.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_CACHE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_CACHE_H_ 3 | 4 | #include 5 | 6 | #include "../../base.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | namespace trie { 11 | 12 | class Cache { 13 | public: 14 | Cache() : parent_(0), child_(0), union_() { 15 | union_.weight = FLT_MIN; 16 | } 17 | Cache(const Cache &cache) 18 | : parent_(cache.parent_), child_(cache.child_), union_(cache.union_) {} 19 | 20 | Cache &operator=(const Cache &cache) { 21 | parent_ = cache.parent_; 22 | child_ = cache.child_; 23 | union_ = cache.union_; 24 | return *this; 25 | } 26 | 27 | void set_parent(std::size_t parent) { 28 | MARISA_DEBUG_IF(parent > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 29 | parent_ = (UInt32)parent; 30 | } 31 | void set_child(std::size_t child) { 32 | MARISA_DEBUG_IF(child > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 33 | child_ = (UInt32)child; 34 | } 35 | void set_base(UInt8 base) { 36 | union_.link = (union_.link & ~0xFFU) | base; 37 | } 38 | void set_extra(std::size_t extra) { 39 | MARISA_DEBUG_IF(extra > (MARISA_UINT32_MAX >> 8), MARISA_SIZE_ERROR); 40 | union_.link = (UInt32)((union_.link & 0xFFU) | (extra << 8)); 41 | } 42 | void set_weight(float weight) { 43 | union_.weight = weight; 44 | } 45 | 46 | std::size_t parent() const { 47 | return parent_; 48 | } 49 | std::size_t child() const { 50 | return child_; 51 | } 52 | UInt8 base() const { 53 | return (UInt8)(union_.link & 0xFFU); 54 | } 55 | std::size_t extra() const { 56 | return union_.link >> 8; 57 | } 58 | char label() const { 59 | return (char)base(); 60 | } 61 | std::size_t link() const { 62 | return union_.link; 63 | } 64 | float weight() const { 65 | return union_.weight; 66 | } 67 | 68 | private: 69 | UInt32 parent_; 70 | UInt32 child_; 71 | union Union { 72 | UInt32 link; 73 | float weight; 74 | } union_; 75 | }; 76 | 77 | } // namespace trie 78 | } // namespace grimoire 79 | } // namespace marisa 80 | 81 | #endif // MARISA_GRIMOIRE_TRIE_CACHE_H_ 82 | -------------------------------------------------------------------------------- /src/marisa/grimoire/trie/entry.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_ENTRY_H_ 2 | #define MARISA_GRIMOIRE_TRIE_ENTRY_H_ 3 | 4 | #include "../../base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Entry { 11 | public: 12 | Entry() 13 | : ptr_(static_cast(NULL) - 1), length_(0), id_(0) {} 14 | Entry(const Entry &entry) 15 | : ptr_(entry.ptr_), length_(entry.length_), id_(entry.id_) {} 16 | 17 | Entry &operator=(const Entry &entry) { 18 | ptr_ = entry.ptr_; 19 | length_ = entry.length_; 20 | id_ = entry.id_; 21 | return *this; 22 | } 23 | 24 | char operator[](std::size_t i) const { 25 | MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR); 26 | return *(ptr_ - i); 27 | } 28 | 29 | void set_str(const char *ptr, std::size_t length) { 30 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 31 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 32 | ptr_ = ptr + length - 1; 33 | length_ = (UInt32)length; 34 | } 35 | void set_id(std::size_t id) { 36 | MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 37 | id_ = (UInt32)id; 38 | } 39 | 40 | const char *ptr() const { 41 | return ptr_ - length_ + 1; 42 | } 43 | std::size_t length() const { 44 | return length_; 45 | } 46 | std::size_t id() const { 47 | return id_; 48 | } 49 | 50 | class StringComparer { 51 | public: 52 | bool operator()(const Entry &lhs, const Entry &rhs) const { 53 | for (std::size_t i = 0; i < lhs.length(); ++i) { 54 | if (i == rhs.length()) { 55 | return true; 56 | } 57 | if (lhs[i] != rhs[i]) { 58 | return (UInt8)lhs[i] > (UInt8)rhs[i]; 59 | } 60 | } 61 | return lhs.length() > rhs.length(); 62 | } 63 | }; 64 | 65 | class IDComparer { 66 | public: 67 | bool operator()(const Entry &lhs, const Entry &rhs) const { 68 | return lhs.id_ < rhs.id_; 69 | } 70 | }; 71 | 72 | private: 73 | const char *ptr_; 74 | UInt32 length_; 75 | UInt32 id_; 76 | }; 77 | 78 | } // namespace trie 79 | } // namespace grimoire 80 | } // namespace marisa 81 | 82 | #endif // MARISA_GRIMOIRE_TRIE_ENTRY_H_ 83 | -------------------------------------------------------------------------------- /src/marisa/grimoire/trie/header.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_HEADER_H_ 2 | #define MARISA_GRIMOIRE_TRIE_HEADER_H_ 3 | 4 | #include "../io.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Header { 11 | public: 12 | enum { 13 | HEADER_SIZE = 16 14 | }; 15 | 16 | Header() {} 17 | 18 | void map(Mapper &mapper) { 19 | const char *ptr; 20 | mapper.map(&ptr, HEADER_SIZE); 21 | MARISA_THROW_IF(!test_header(ptr), MARISA_FORMAT_ERROR); 22 | } 23 | void read(Reader &reader) { 24 | char buf[HEADER_SIZE]; 25 | reader.read(buf, HEADER_SIZE); 26 | MARISA_THROW_IF(!test_header(buf), MARISA_FORMAT_ERROR); 27 | } 28 | void write(Writer &writer) const { 29 | writer.write(get_header(), HEADER_SIZE); 30 | } 31 | 32 | std::size_t io_size() const { 33 | return HEADER_SIZE; 34 | } 35 | 36 | private: 37 | 38 | static const char *get_header() { 39 | static const char buf[HEADER_SIZE] = "We love Marisa."; 40 | return buf; 41 | } 42 | 43 | static bool test_header(const char *ptr) { 44 | for (std::size_t i = 0; i < HEADER_SIZE; ++i) { 45 | if (ptr[i] != get_header()[i]) { 46 | return false; 47 | } 48 | } 49 | return true; 50 | } 51 | 52 | // Disallows copy and assignment. 53 | Header(const Header &); 54 | Header &operator=(const Header &); 55 | }; 56 | 57 | } // namespace trie 58 | } // namespace marisa 59 | } // namespace grimoire 60 | 61 | #endif // MARISA_GRIMOIRE_TRIE_HEADER_H_ 62 | -------------------------------------------------------------------------------- /src/marisa/grimoire/trie/history.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_STATE_HISTORY_H_ 2 | #define MARISA_GRIMOIRE_TRIE_STATE_HISTORY_H_ 3 | 4 | #include "../../base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class History { 11 | public: 12 | History() 13 | : node_id_(0), louds_pos_(0), key_pos_(0), 14 | link_id_(MARISA_INVALID_LINK_ID), key_id_(MARISA_INVALID_KEY_ID) {} 15 | 16 | void set_node_id(std::size_t node_id) { 17 | MARISA_DEBUG_IF(node_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 18 | node_id_ = (UInt32)node_id; 19 | } 20 | void set_louds_pos(std::size_t louds_pos) { 21 | MARISA_DEBUG_IF(louds_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 22 | louds_pos_ = (UInt32)louds_pos; 23 | } 24 | void set_key_pos(std::size_t key_pos) { 25 | MARISA_DEBUG_IF(key_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 26 | key_pos_ = (UInt32)key_pos; 27 | } 28 | void set_link_id(std::size_t link_id) { 29 | MARISA_DEBUG_IF(link_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 30 | link_id_ = (UInt32)link_id; 31 | } 32 | void set_key_id(std::size_t key_id) { 33 | MARISA_DEBUG_IF(key_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 34 | key_id_ = (UInt32)key_id; 35 | } 36 | 37 | std::size_t node_id() const { 38 | return node_id_; 39 | } 40 | std::size_t louds_pos() const { 41 | return louds_pos_; 42 | } 43 | std::size_t key_pos() const { 44 | return key_pos_; 45 | } 46 | std::size_t link_id() const { 47 | return link_id_; 48 | } 49 | std::size_t key_id() const { 50 | return key_id_; 51 | } 52 | 53 | private: 54 | UInt32 node_id_; 55 | UInt32 louds_pos_; 56 | UInt32 key_pos_; 57 | UInt32 link_id_; 58 | UInt32 key_id_; 59 | }; 60 | 61 | } // namespace trie 62 | } // namespace grimoire 63 | } // namespace marisa 64 | 65 | #endif // MARISA_GRIMOIRE_TRIE_STATE_HISTORY_H_ 66 | -------------------------------------------------------------------------------- /src/marisa/grimoire/trie/range.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_RANGE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_RANGE_H_ 3 | 4 | #include "../../base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Range { 11 | public: 12 | Range() : begin_(0), end_(0), key_pos_(0) {} 13 | 14 | void set_begin(std::size_t begin) { 15 | MARISA_DEBUG_IF(begin > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 16 | begin_ = begin; 17 | } 18 | void set_end(std::size_t end) { 19 | MARISA_DEBUG_IF(end > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 20 | end_ = end; 21 | } 22 | void set_key_pos(std::size_t key_pos) { 23 | MARISA_DEBUG_IF(key_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 24 | key_pos_ = key_pos; 25 | } 26 | 27 | std::size_t begin() const { 28 | return begin_; 29 | } 30 | std::size_t end() const { 31 | return end_; 32 | } 33 | std::size_t key_pos() const { 34 | return key_pos_; 35 | } 36 | 37 | private: 38 | UInt32 begin_; 39 | UInt32 end_; 40 | UInt32 key_pos_; 41 | }; 42 | 43 | inline Range make_range(std::size_t begin, std::size_t end, 44 | std::size_t key_pos) { 45 | Range range; 46 | range.set_begin(begin); 47 | range.set_end(end); 48 | range.set_key_pos(key_pos); 49 | return range; 50 | } 51 | 52 | class WeightedRange { 53 | public: 54 | WeightedRange() : range_(), weight_(0.0F) {} 55 | 56 | void set_range(const Range &range) { 57 | range_ = range; 58 | } 59 | void set_begin(std::size_t begin) { 60 | range_.set_begin(begin); 61 | } 62 | void set_end(std::size_t end) { 63 | range_.set_end(end); 64 | } 65 | void set_key_pos(std::size_t key_pos) { 66 | range_.set_key_pos(key_pos); 67 | } 68 | void set_weight(float weight) { 69 | weight_ = weight; 70 | } 71 | 72 | const Range &range() const { 73 | return range_; 74 | } 75 | std::size_t begin() const { 76 | return range_.begin(); 77 | } 78 | std::size_t end() const { 79 | return range_.end(); 80 | } 81 | std::size_t key_pos() const { 82 | return range_.key_pos(); 83 | } 84 | float weight() const { 85 | return weight_; 86 | } 87 | 88 | private: 89 | Range range_; 90 | float weight_; 91 | }; 92 | 93 | inline bool operator<(const WeightedRange &lhs, const WeightedRange &rhs) { 94 | return lhs.weight() < rhs.weight(); 95 | } 96 | 97 | inline bool operator>(const WeightedRange &lhs, const WeightedRange &rhs) { 98 | return lhs.weight() > rhs.weight(); 99 | } 100 | 101 | inline WeightedRange make_weighted_range(std::size_t begin, std::size_t end, 102 | std::size_t key_pos, float weight) { 103 | WeightedRange range; 104 | range.set_begin(begin); 105 | range.set_end(end); 106 | range.set_key_pos(key_pos); 107 | range.set_weight(weight); 108 | return range; 109 | } 110 | 111 | } // namespace trie 112 | } // namespace grimoire 113 | } // namespace marisa 114 | 115 | #endif // MARISA_GRIMOIRE_TRIE_RANGE_H_ 116 | -------------------------------------------------------------------------------- /src/marisa/grimoire/trie/state.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_STATE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_STATE_H_ 3 | 4 | #include "../vector.h" 5 | #include "history.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace trie { 10 | 11 | // A search agent has its internal state and the status codes are defined 12 | // below. 13 | typedef enum StatusCode { 14 | MARISA_READY_TO_ALL, 15 | MARISA_READY_TO_COMMON_PREFIX_SEARCH, 16 | MARISA_READY_TO_PREDICTIVE_SEARCH, 17 | MARISA_END_OF_COMMON_PREFIX_SEARCH, 18 | MARISA_END_OF_PREDICTIVE_SEARCH, 19 | } StatusCode; 20 | 21 | class State { 22 | public: 23 | State() 24 | : key_buf_(), history_(), node_id_(0), query_pos_(0), 25 | history_pos_(0), status_code_(MARISA_READY_TO_ALL) {} 26 | 27 | void set_node_id(std::size_t node_id) { 28 | MARISA_DEBUG_IF(node_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 29 | node_id_ = (UInt32)node_id; 30 | } 31 | void set_query_pos(std::size_t query_pos) { 32 | MARISA_DEBUG_IF(query_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 33 | query_pos_ = (UInt32)query_pos; 34 | } 35 | void set_history_pos(std::size_t history_pos) { 36 | MARISA_DEBUG_IF(history_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 37 | history_pos_ = (UInt32)history_pos; 38 | } 39 | void set_status_code(StatusCode status_code) { 40 | status_code_ = status_code; 41 | } 42 | 43 | std::size_t node_id() const { 44 | return node_id_; 45 | } 46 | std::size_t query_pos() const { 47 | return query_pos_; 48 | } 49 | std::size_t history_pos() const { 50 | return history_pos_; 51 | } 52 | StatusCode status_code() const { 53 | return status_code_; 54 | } 55 | 56 | const Vector &key_buf() const { 57 | return key_buf_; 58 | } 59 | const Vector &history() const { 60 | return history_; 61 | } 62 | 63 | Vector &key_buf() { 64 | return key_buf_; 65 | } 66 | Vector &history() { 67 | return history_; 68 | } 69 | 70 | void reset() { 71 | status_code_ = MARISA_READY_TO_ALL; 72 | } 73 | 74 | void lookup_init() { 75 | node_id_ = 0; 76 | query_pos_ = 0; 77 | status_code_ = MARISA_READY_TO_ALL; 78 | } 79 | void reverse_lookup_init() { 80 | key_buf_.resize(0); 81 | key_buf_.reserve(32); 82 | status_code_ = MARISA_READY_TO_ALL; 83 | } 84 | void common_prefix_search_init() { 85 | node_id_ = 0; 86 | query_pos_ = 0; 87 | status_code_ = MARISA_READY_TO_COMMON_PREFIX_SEARCH; 88 | } 89 | void predictive_search_init() { 90 | key_buf_.resize(0); 91 | key_buf_.reserve(64); 92 | history_.resize(0); 93 | history_.reserve(4); 94 | node_id_ = 0; 95 | query_pos_ = 0; 96 | history_pos_ = 0; 97 | status_code_ = MARISA_READY_TO_PREDICTIVE_SEARCH; 98 | } 99 | 100 | private: 101 | Vector key_buf_; 102 | Vector history_; 103 | UInt32 node_id_; 104 | UInt32 query_pos_; 105 | UInt32 history_pos_; 106 | StatusCode status_code_; 107 | 108 | // Disallows copy and assignment. 109 | State(const State &); 110 | State &operator=(const State &); 111 | }; 112 | 113 | } // namespace trie 114 | } // namespace grimoire 115 | } // namespace marisa 116 | 117 | #endif // MARISA_GRIMOIRE_TRIE_STATE_H_ 118 | -------------------------------------------------------------------------------- /src/marisa/grimoire/trie/tail.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_TAIL_H_ 2 | #define MARISA_GRIMOIRE_TRIE_TAIL_H_ 3 | 4 | #include "../../agent.h" 5 | #include "../vector.h" 6 | #include "entry.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | namespace trie { 11 | 12 | class Tail { 13 | public: 14 | Tail(); 15 | 16 | void build(Vector &entries, Vector *offsets, 17 | TailMode mode); 18 | 19 | void map(Mapper &mapper); 20 | void read(Reader &reader); 21 | void write(Writer &writer) const; 22 | 23 | void restore(Agent &agent, std::size_t offset) const; 24 | bool match(Agent &agent, std::size_t offset) const; 25 | bool prefix_match(Agent &agent, std::size_t offset) const; 26 | 27 | const char &operator[](std::size_t offset) const { 28 | MARISA_DEBUG_IF(offset >= buf_.size(), MARISA_BOUND_ERROR); 29 | return buf_[offset]; 30 | } 31 | 32 | TailMode mode() const { 33 | return end_flags_.empty() ? MARISA_TEXT_TAIL : MARISA_BINARY_TAIL; 34 | } 35 | 36 | bool empty() const { 37 | return buf_.empty(); 38 | } 39 | std::size_t size() const { 40 | return buf_.size(); 41 | } 42 | std::size_t total_size() const { 43 | return buf_.total_size() + end_flags_.total_size(); 44 | } 45 | std::size_t io_size() const { 46 | return buf_.io_size() + end_flags_.io_size(); 47 | } 48 | 49 | void clear(); 50 | void swap(Tail &rhs); 51 | 52 | private: 53 | Vector buf_; 54 | BitVector end_flags_; 55 | 56 | void build_(Vector &entries, Vector *offsets, 57 | TailMode mode); 58 | 59 | void map_(Mapper &mapper); 60 | void read_(Reader &reader); 61 | void write_(Writer &writer) const; 62 | 63 | // Disallows copy and assignment. 64 | Tail(const Tail &); 65 | Tail &operator=(const Tail &); 66 | }; 67 | 68 | } // namespace trie 69 | } // namespace grimoire 70 | } // namespace marisa 71 | 72 | #endif // MARISA_GRIMOIRE_TRIE_TAIL_H_ 73 | -------------------------------------------------------------------------------- /src/marisa/grimoire/vector.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_H_ 3 | 4 | #include "vector/vector.h" 5 | #include "vector/flat-vector.h" 6 | #include "vector/bit-vector.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | 11 | using vector::Vector; 12 | typedef vector::FlatVector FlatVector; 13 | typedef vector::BitVector BitVector; 14 | 15 | } // namespace grimoire 16 | } // namespace marisa 17 | 18 | #endif // MARISA_GRIMOIRE_VECTOR_H_ 19 | -------------------------------------------------------------------------------- /src/marisa/grimoire/vector/pop-count.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_ 3 | 4 | #include "../intrin.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace vector { 9 | 10 | #if MARISA_WORD_SIZE == 64 11 | 12 | class PopCount { 13 | public: 14 | explicit PopCount(UInt64 x) : value_() { 15 | x = (x & 0x5555555555555555ULL) + ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1); 16 | x = (x & 0x3333333333333333ULL) + ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2); 17 | x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4); 18 | x *= 0x0101010101010101ULL; 19 | value_ = x; 20 | } 21 | 22 | std::size_t lo8() const { 23 | return (std::size_t)(value_ & 0xFFU); 24 | } 25 | std::size_t lo16() const { 26 | return (std::size_t)((value_ >> 8) & 0xFFU); 27 | } 28 | std::size_t lo24() const { 29 | return (std::size_t)((value_ >> 16) & 0xFFU); 30 | } 31 | std::size_t lo32() const { 32 | return (std::size_t)((value_ >> 24) & 0xFFU); 33 | } 34 | std::size_t lo40() const { 35 | return (std::size_t)((value_ >> 32) & 0xFFU); 36 | } 37 | std::size_t lo48() const { 38 | return (std::size_t)((value_ >> 40) & 0xFFU); 39 | } 40 | std::size_t lo56() const { 41 | return (std::size_t)((value_ >> 48) & 0xFFU); 42 | } 43 | std::size_t lo64() const { 44 | return (std::size_t)((value_ >> 56) & 0xFFU); 45 | } 46 | 47 | static std::size_t count(UInt64 x) { 48 | #if defined(MARISA_X64) && defined(MARISA_USE_POPCNT) 49 | #ifdef _MSC_VER 50 | return __popcnt64(x); 51 | #else // _MSC_VER 52 | return _mm_popcnt_u64(x); 53 | #endif // _MSC_VER 54 | #else // defined(MARISA_X64) && defined(MARISA_USE_POPCNT) 55 | return PopCount(x).lo64(); 56 | #endif // defined(MARISA_X64) && defined(MARISA_USE_POPCNT) 57 | } 58 | 59 | private: 60 | UInt64 value_; 61 | }; 62 | 63 | #else // MARISA_WORD_SIZE == 64 64 | 65 | class PopCount { 66 | public: 67 | explicit PopCount(UInt32 x) : value_() { 68 | x = (x & 0x55555555U) + ((x & 0xAAAAAAAAU) >> 1); 69 | x = (x & 0x33333333U) + ((x & 0xCCCCCCCCU) >> 2); 70 | x = (x & 0x0F0F0F0FU) + ((x & 0xF0F0F0F0U) >> 4); 71 | x *= 0x01010101U; 72 | value_ = x; 73 | } 74 | 75 | std::size_t lo8() const { 76 | return value_ & 0xFFU; 77 | } 78 | std::size_t lo16() const { 79 | return (value_ >> 8) & 0xFFU; 80 | } 81 | std::size_t lo24() const { 82 | return (value_ >> 16) & 0xFFU; 83 | } 84 | std::size_t lo32() const { 85 | return (value_ >> 24) & 0xFFU; 86 | } 87 | 88 | static std::size_t count(UInt32 x) { 89 | #ifdef MARISA_USE_POPCNT 90 | #ifdef _MSC_VER 91 | return __popcnt(x); 92 | #else // _MSC_VER 93 | return _mm_popcnt_u32(x); 94 | #endif // _MSC_VER 95 | #else // MARISA_USE_POPCNT 96 | return PopCount(x).lo32(); 97 | #endif // MARISA_USE_POPCNT 98 | } 99 | 100 | private: 101 | UInt32 value_; 102 | }; 103 | 104 | #endif // MARISA_WORD_SIZE == 64 105 | 106 | } // namespace vector 107 | } // namespace grimoire 108 | } // namespace marisa 109 | 110 | #endif // MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_ 111 | -------------------------------------------------------------------------------- /src/marisa/grimoire/vector/rank-index.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_ 3 | 4 | #include "../../base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace vector { 9 | 10 | class RankIndex { 11 | public: 12 | RankIndex() : abs_(0), rel_lo_(0), rel_hi_(0) {} 13 | 14 | void set_abs(std::size_t value) { 15 | MARISA_DEBUG_IF(value > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 16 | abs_ = (UInt32)value; 17 | } 18 | void set_rel1(std::size_t value) { 19 | MARISA_DEBUG_IF(value > 64, MARISA_RANGE_ERROR); 20 | rel_lo_ = (UInt32)((rel_lo_ & ~0x7FU) | (value & 0x7FU)); 21 | } 22 | void set_rel2(std::size_t value) { 23 | MARISA_DEBUG_IF(value > 128, MARISA_RANGE_ERROR); 24 | rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 7)) | ((value & 0xFFU) << 7)); 25 | } 26 | void set_rel3(std::size_t value) { 27 | MARISA_DEBUG_IF(value > 192, MARISA_RANGE_ERROR); 28 | rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 15)) | ((value & 0xFFU) << 15)); 29 | } 30 | void set_rel4(std::size_t value) { 31 | MARISA_DEBUG_IF(value > 256, MARISA_RANGE_ERROR); 32 | rel_lo_ = (UInt32)((rel_lo_ & ~(0x1FFU << 23)) | ((value & 0x1FFU) << 23)); 33 | } 34 | void set_rel5(std::size_t value) { 35 | MARISA_DEBUG_IF(value > 320, MARISA_RANGE_ERROR); 36 | rel_hi_ = (UInt32)((rel_hi_ & ~0x1FFU) | (value & 0x1FFU)); 37 | } 38 | void set_rel6(std::size_t value) { 39 | MARISA_DEBUG_IF(value > 384, MARISA_RANGE_ERROR); 40 | rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 9)) | ((value & 0x1FFU) << 9)); 41 | } 42 | void set_rel7(std::size_t value) { 43 | MARISA_DEBUG_IF(value > 448, MARISA_RANGE_ERROR); 44 | rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 18)) | ((value & 0x1FFU) << 18)); 45 | } 46 | 47 | std::size_t abs() const { 48 | return abs_; 49 | } 50 | std::size_t rel1() const { 51 | return rel_lo_ & 0x7FU; 52 | } 53 | std::size_t rel2() const { 54 | return (rel_lo_ >> 7) & 0xFFU; 55 | } 56 | std::size_t rel3() const { 57 | return (rel_lo_ >> 15) & 0xFFU; 58 | } 59 | std::size_t rel4() const { 60 | return (rel_lo_ >> 23) & 0x1FFU; 61 | } 62 | std::size_t rel5() const { 63 | return rel_hi_ & 0x1FFU; 64 | } 65 | std::size_t rel6() const { 66 | return (rel_hi_ >> 9) & 0x1FFU; 67 | } 68 | std::size_t rel7() const { 69 | return (rel_hi_ >> 18) & 0x1FFU; 70 | } 71 | 72 | private: 73 | UInt32 abs_; 74 | UInt32 rel_lo_; 75 | UInt32 rel_hi_; 76 | }; 77 | 78 | } // namespace vector 79 | } // namespace grimoire 80 | } // namespace marisa 81 | 82 | #endif // MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_ 83 | -------------------------------------------------------------------------------- /src/marisa/iostream.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_IOSTREAM_H_ 2 | #define MARISA_IOSTREAM_H_ 3 | 4 | #include 5 | 6 | namespace marisa { 7 | 8 | class Trie; 9 | 10 | std::istream &read(std::istream &stream, Trie *trie); 11 | std::ostream &write(std::ostream &stream, const Trie &trie); 12 | 13 | std::istream &operator>>(std::istream &stream, Trie &trie); 14 | std::ostream &operator<<(std::ostream &stream, const Trie &trie); 15 | 16 | } // namespace marisa 17 | 18 | #endif // MARISA_IOSTREAM_H_ 19 | -------------------------------------------------------------------------------- /src/marisa/key.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_KEY_H_ 2 | #define MARISA_KEY_H_ 3 | 4 | #include "base.h" 5 | 6 | namespace marisa { 7 | 8 | class Key { 9 | public: 10 | Key() : ptr_(NULL), length_(0), union_() { 11 | union_.id = 0; 12 | } 13 | Key(const Key &key) 14 | : ptr_(key.ptr_), length_(key.length_), union_(key.union_) {} 15 | 16 | Key &operator=(const Key &key) { 17 | ptr_ = key.ptr_; 18 | length_ = key.length_; 19 | union_ = key.union_; 20 | return *this; 21 | } 22 | 23 | char operator[](std::size_t i) const { 24 | MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR); 25 | return ptr_[i]; 26 | } 27 | 28 | void set_str(const char *str) { 29 | MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR); 30 | std::size_t length = 0; 31 | while (str[length] != '\0') { 32 | ++length; 33 | } 34 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 35 | ptr_ = str; 36 | length_ = (UInt32)length; 37 | } 38 | void set_str(const char *ptr, std::size_t length) { 39 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 40 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 41 | ptr_ = ptr; 42 | length_ = (UInt32)length; 43 | } 44 | void set_id(std::size_t id) { 45 | MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 46 | union_.id = (UInt32)id; 47 | } 48 | void set_weight(float weight) { 49 | union_.weight = weight; 50 | } 51 | 52 | const char *ptr() const { 53 | return ptr_; 54 | } 55 | std::size_t length() const { 56 | return length_; 57 | } 58 | std::size_t id() const { 59 | return union_.id; 60 | } 61 | float weight() const { 62 | return union_.weight; 63 | } 64 | 65 | void clear() { 66 | Key().swap(*this); 67 | } 68 | void swap(Key &rhs) { 69 | marisa::swap(ptr_, rhs.ptr_); 70 | marisa::swap(length_, rhs.length_); 71 | marisa::swap(union_.id, rhs.union_.id); 72 | } 73 | 74 | private: 75 | const char *ptr_; 76 | UInt32 length_; 77 | union Union { 78 | UInt32 id; 79 | float weight; 80 | } union_; 81 | }; 82 | 83 | } // namespace marisa 84 | 85 | #endif // MARISA_KEY_H_ 86 | -------------------------------------------------------------------------------- /src/marisa/keyset.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_KEYSET_H_ 2 | #define MARISA_KEYSET_H_ 3 | 4 | #include "key.h" 5 | 6 | namespace marisa { 7 | 8 | class Keyset { 9 | public: 10 | enum { 11 | BASE_BLOCK_SIZE = 4096, 12 | EXTRA_BLOCK_SIZE = 1024, 13 | KEY_BLOCK_SIZE = 256 14 | }; 15 | 16 | Keyset(); 17 | 18 | void push_back(const Key &key); 19 | void push_back(const Key &key, char end_marker); 20 | 21 | void push_back(const char *str); 22 | void push_back(const char *ptr, std::size_t length, float weight = 1.0); 23 | 24 | const Key &operator[](std::size_t i) const { 25 | MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR); 26 | return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE]; 27 | } 28 | Key &operator[](std::size_t i) { 29 | MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR); 30 | return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE]; 31 | } 32 | 33 | std::size_t num_keys() const { 34 | return size_; 35 | } 36 | 37 | bool empty() const { 38 | return size_ == 0; 39 | } 40 | std::size_t size() const { 41 | return size_; 42 | } 43 | std::size_t total_length() const { 44 | return total_length_; 45 | } 46 | 47 | void reset(); 48 | 49 | void clear(); 50 | void swap(Keyset &rhs); 51 | 52 | private: 53 | scoped_array > base_blocks_; 54 | std::size_t base_blocks_size_; 55 | std::size_t base_blocks_capacity_; 56 | scoped_array > extra_blocks_; 57 | std::size_t extra_blocks_size_; 58 | std::size_t extra_blocks_capacity_; 59 | scoped_array > key_blocks_; 60 | std::size_t key_blocks_size_; 61 | std::size_t key_blocks_capacity_; 62 | char *ptr_; 63 | std::size_t avail_; 64 | std::size_t size_; 65 | std::size_t total_length_; 66 | 67 | char *reserve(std::size_t size); 68 | 69 | void append_base_block(); 70 | void append_extra_block(std::size_t size); 71 | void append_key_block(); 72 | 73 | // Disallows copy and assignment. 74 | Keyset(const Keyset &); 75 | Keyset &operator=(const Keyset &); 76 | }; 77 | 78 | } // namespace marisa 79 | 80 | #endif // MARISA_KEYSET_H_ 81 | -------------------------------------------------------------------------------- /src/marisa/mstdio.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_STDIO_H_ 2 | #define MARISA_STDIO_H_ 3 | 4 | #include 5 | 6 | namespace marisa { 7 | 8 | class Trie; 9 | 10 | void fread(std::FILE *file, Trie *trie); 11 | void fwrite(std::FILE *file, const Trie &trie); 12 | 13 | } // namespace marisa 14 | 15 | #endif // MARISA_STDIO_H_ 16 | -------------------------------------------------------------------------------- /src/marisa/query.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_QUERY_H_ 2 | #define MARISA_QUERY_H_ 3 | 4 | #include "base.h" 5 | 6 | namespace marisa { 7 | 8 | class Query { 9 | public: 10 | Query() : ptr_(NULL), length_(0), id_(0) {} 11 | Query(const Query &query) 12 | : ptr_(query.ptr_), length_(query.length_), id_(query.id_) {} 13 | 14 | Query &operator=(const Query &query) { 15 | ptr_ = query.ptr_; 16 | length_ = query.length_; 17 | id_ = query.id_; 18 | return *this; 19 | } 20 | 21 | char operator[](std::size_t i) const { 22 | MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR); 23 | return ptr_[i]; 24 | } 25 | 26 | void set_str(const char *str) { 27 | MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR); 28 | std::size_t length = 0; 29 | while (str[length] != '\0') { 30 | ++length; 31 | } 32 | ptr_ = str; 33 | length_ = length; 34 | } 35 | void set_str(const char *ptr, std::size_t length) { 36 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 37 | ptr_ = ptr; 38 | length_ = length; 39 | } 40 | void set_id(std::size_t id) { 41 | id_ = id; 42 | } 43 | 44 | const char *ptr() const { 45 | return ptr_; 46 | } 47 | std::size_t length() const { 48 | return length_; 49 | } 50 | std::size_t id() const { 51 | return id_; 52 | } 53 | 54 | void clear() { 55 | Query().swap(*this); 56 | } 57 | void swap(Query &rhs) { 58 | marisa::swap(ptr_, rhs.ptr_); 59 | marisa::swap(length_, rhs.length_); 60 | marisa::swap(id_, rhs.id_); 61 | } 62 | 63 | private: 64 | const char *ptr_; 65 | std::size_t length_; 66 | std::size_t id_; 67 | }; 68 | 69 | } // namespace marisa 70 | 71 | #endif // MARISA_QUERY_H_ 72 | -------------------------------------------------------------------------------- /src/marisa/scoped-array.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SCOPED_ARRAY_H_ 2 | #define MARISA_SCOPED_ARRAY_H_ 3 | 4 | #include "base.h" 5 | 6 | namespace marisa { 7 | 8 | template 9 | class scoped_array { 10 | public: 11 | scoped_array() : array_(NULL) {} 12 | explicit scoped_array(T *array) : array_(array) {} 13 | 14 | ~scoped_array() { 15 | delete [] array_; 16 | } 17 | 18 | void reset(T *array = NULL) { 19 | MARISA_THROW_IF((array != NULL) && (array == array_), MARISA_RESET_ERROR); 20 | scoped_array(array).swap(*this); 21 | } 22 | 23 | T &operator[](std::size_t i) const { 24 | MARISA_DEBUG_IF(array_ == NULL, MARISA_STATE_ERROR); 25 | return array_[i]; 26 | } 27 | T *get() const { 28 | return array_; 29 | } 30 | 31 | void clear() { 32 | scoped_array().swap(*this); 33 | } 34 | void swap(scoped_array &rhs) { 35 | marisa::swap(array_, rhs.array_); 36 | } 37 | 38 | private: 39 | T *array_; 40 | 41 | // Disallows copy and assignment. 42 | scoped_array(const scoped_array &); 43 | scoped_array &operator=(const scoped_array &); 44 | }; 45 | 46 | } // namespace marisa 47 | 48 | #endif // MARISA_SCOPED_ARRAY_H_ 49 | -------------------------------------------------------------------------------- /src/marisa/scoped-ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SCOPED_PTR_H_ 2 | #define MARISA_SCOPED_PTR_H_ 3 | 4 | #include "base.h" 5 | 6 | namespace marisa { 7 | 8 | template 9 | class scoped_ptr { 10 | public: 11 | scoped_ptr() : ptr_(NULL) {} 12 | explicit scoped_ptr(T *ptr) : ptr_(ptr) {} 13 | 14 | ~scoped_ptr() { 15 | delete ptr_; 16 | } 17 | 18 | void reset(T *ptr = NULL) { 19 | MARISA_THROW_IF((ptr != NULL) && (ptr == ptr_), MARISA_RESET_ERROR); 20 | scoped_ptr(ptr).swap(*this); 21 | } 22 | 23 | T &operator*() const { 24 | MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR); 25 | return *ptr_; 26 | } 27 | T *operator->() const { 28 | MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR); 29 | return ptr_; 30 | } 31 | T *get() const { 32 | return ptr_; 33 | } 34 | 35 | void clear() { 36 | scoped_ptr().swap(*this); 37 | } 38 | void swap(scoped_ptr &rhs) { 39 | marisa::swap(ptr_, rhs.ptr_); 40 | } 41 | 42 | private: 43 | T *ptr_; 44 | 45 | // Disallows copy and assignment. 46 | scoped_ptr(const scoped_ptr &); 47 | scoped_ptr &operator=(const scoped_ptr &); 48 | }; 49 | 50 | } // namespace marisa 51 | 52 | #endif // MARISA_SCOPED_PTR_H_ 53 | -------------------------------------------------------------------------------- /src/marisa/trie.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_TRIE_H_ 2 | #define MARISA_TRIE_H_ 3 | 4 | #include "keyset.h" 5 | #include "agent.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace trie { 10 | 11 | class LoudsTrie; 12 | 13 | } // namespace trie 14 | } // namespace grimoire 15 | 16 | class Trie { 17 | friend class TrieIO; 18 | 19 | public: 20 | Trie(); 21 | ~Trie(); 22 | 23 | void build(Keyset &keyset, int config_flags = 0); 24 | 25 | void mmap(const char *filename); 26 | void map(const void *ptr, std::size_t size); 27 | 28 | void load(const char *filename); 29 | void read(int fd); 30 | 31 | void save(const char *filename) const; 32 | void write(int fd) const; 33 | 34 | bool lookup(Agent &agent) const; 35 | void reverse_lookup(Agent &agent) const; 36 | bool common_prefix_search(Agent &agent) const; 37 | bool predictive_search(Agent &agent) const; 38 | 39 | std::size_t num_tries() const; 40 | std::size_t num_keys() const; 41 | std::size_t num_nodes() const; 42 | 43 | TailMode tail_mode() const; 44 | NodeOrder node_order() const; 45 | 46 | bool empty() const; 47 | std::size_t size() const; 48 | std::size_t total_size() const; 49 | std::size_t io_size() const; 50 | 51 | void clear(); 52 | void swap(Trie &rhs); 53 | 54 | private: 55 | scoped_ptr trie_; 56 | 57 | // Disallows copy and assignment. 58 | Trie(const Trie &); 59 | Trie &operator=(const Trie &); 60 | }; 61 | 62 | } // namespace marisa 63 | 64 | #endif // MARISA_TRIE_H_ 65 | -------------------------------------------------------------------------------- /src/no_logging.h: -------------------------------------------------------------------------------- 1 | #ifndef RIME_NO_LOGGING_H_ 2 | #define RIME_NO_LOGGING_H_ 3 | 4 | namespace rime { 5 | 6 | class VoidLogger { 7 | public: 8 | VoidLogger() {} 9 | 10 | // hack: an unnamed VoidLogger() cannot be used as an l-value 11 | VoidLogger& stream() { return *this; } 12 | 13 | template 14 | VoidLogger& operator<< (const T& x) { return *this; } 15 | }; 16 | 17 | // to avoid compiler warnings 18 | class Voidify { 19 | public: 20 | Voidify() {} 21 | void operator& (VoidLogger&) {} 22 | }; 23 | 24 | } // namespace rime 25 | 26 | #define RIME_NO_LOG true ? (void) 0 : rime::Voidify() & rime::VoidLogger().stream() 27 | 28 | #define LOG(severity) RIME_NO_LOG 29 | #define VLOG(verboselevel) RIME_NO_LOG 30 | #define LOG_IF(severity, condition) RIME_NO_LOG 31 | #define LOG_EVERY_N(severity, n) RIME_NO_LOG 32 | #define LOG_IF_EVERY_N(severity, condition, n) RIME_NO_LOG 33 | #define LOG_ASSERT(condition) RIME_NO_LOG 34 | 35 | #define RIME_NO_CHECK (void) 0 36 | 37 | #define CHECK(condition) RIME_NO_CHECK 38 | #define CHECK_EQ(val1, val2) RIME_NO_CHECK 39 | #define CHECK_NE(val1, val2) RIME_NO_CHECK 40 | #define CHECK_LE(val1, val2) RIME_NO_CHECK 41 | #define CHECK_LT(val1, val2) RIME_NO_CHECK 42 | #define CHECK_GE(val1, val2) RIME_NO_CHECK 43 | #define CHECK_GT(val1, val2) RIME_NO_CHECK 44 | #define CHECK_NOTNULL(val) RIME_NO_CHECK 45 | #define CHECK_STREQ(str1, str2) RIME_NO_CHECK 46 | #define CHECK_STRCASEEQ(str1, str2) RIME_NO_CHECK 47 | #define CHECK_STRNE(str1, str2) RIME_NO_CHECK 48 | #define CHECK_STRCASENE(str1, str2) RIME_NO_CHECK 49 | 50 | #define DLOG(severity) LOG(severity) 51 | #define DVLOG(verboselevel) VLOG(verboselevel) 52 | #define DLOG_IF(severity, condition) LOG_IF(severity, condition) 53 | #define DLOG_EVERY_N(severity, n) LOG_EVERY_N(severity, n) 54 | #define DLOG_IF_EVERY_N(severity, condition, n) \ 55 | LOG_IF_EVERY_N(severity, condition, n) 56 | #define DLOG_ASSERT(condition) LOG_ASSERT(condition) 57 | 58 | #define DCHECK(condition) CHECK(condition) 59 | #define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2) 60 | #define DCHECK_NE(val1, val2) CHECK_NE(val1, val2) 61 | #define DCHECK_LE(val1, val2) CHECK_LE(val1, val2) 62 | #define DCHECK_LT(val1, val2) CHECK_LT(val1, val2) 63 | #define DCHECK_GE(val1, val2) CHECK_GE(val1, val2) 64 | #define DCHECK_GT(val1, val2) CHECK_GT(val1, val2) 65 | #define DCHECK_NOTNULL(val) CHECK_NOTNULL(val) 66 | #define DCHECK_STREQ(str1, str2) CHECK_STREQ(str1, str2) 67 | #define DCHECK_STRCASEEQ(str1, str2) CHECK_STRCASEEQ(str1, str2) 68 | #define DCHECK_STRNE(str1, str2) CHECK_STRNE(str1, str2) 69 | #define DCHECK_STRCASENE(str1, str2) CHECK_STRCASENE(str1, str2) 70 | 71 | #endif // RIME_NO_LOGGING_H_ 72 | -------------------------------------------------------------------------------- /src/spelling.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright RIME Developers 3 | // Distributed under the BSD License 4 | // 5 | // 2012-01-17 GONG Chen 6 | // 7 | 8 | #ifndef RIME_SPELLING_H_ 9 | #define RIME_SPELLING_H_ 10 | 11 | #include "common.h" 12 | 13 | namespace rime { 14 | 15 | enum SpellingType { kNormalSpelling, kFuzzySpelling, 16 | kAbbreviation, kCompletion, kAmbiguousSpelling, 17 | kInvalidSpelling }; 18 | 19 | struct SpellingProperties { 20 | SpellingType type = kNormalSpelling; 21 | size_t end_pos = 0; 22 | double credibility = 1.0; 23 | string tips; 24 | }; 25 | 26 | struct Spelling { 27 | string str; 28 | SpellingProperties properties; 29 | 30 | Spelling() = default; 31 | Spelling(const string& _str) : str(_str) {} 32 | 33 | bool operator== (const Spelling& other) { return str == other.str; } 34 | bool operator< (const Spelling& other) { return str < other.str; } 35 | }; 36 | 37 | } // namespace rime 38 | 39 | #endif // RIME_SPELLING_H_ 40 | -------------------------------------------------------------------------------- /src/string_table.cc: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright RIME Developers 3 | // Distributed under the BSD License 4 | // 5 | // 2014-07-04 GONG Chen 6 | // 7 | 8 | #include 9 | #include 10 | #include "common.h" 11 | #include "string_table.h" 12 | 13 | namespace rime { 14 | 15 | StringTable::StringTable(const char* ptr, size_t size) { 16 | trie_.map(ptr, size); 17 | } 18 | 19 | bool StringTable::HasKey(const string& key) { 20 | marisa::Agent agent; 21 | agent.set_query(key.c_str()); 22 | return trie_.lookup(agent); 23 | } 24 | 25 | StringId StringTable::Lookup(const string& key) { 26 | marisa::Agent agent; 27 | agent.set_query(key.c_str()); 28 | if(trie_.lookup(agent)) { 29 | return agent.key().id(); 30 | } 31 | else { 32 | return kInvalidStringId; 33 | } 34 | } 35 | 36 | void StringTable::CommonPrefixMatch(const string& query, 37 | vector* result) { 38 | marisa::Agent agent; 39 | agent.set_query(query.c_str()); 40 | result->clear(); 41 | while (trie_.common_prefix_search(agent)) { 42 | result->push_back(agent.key().id()); 43 | } 44 | } 45 | 46 | void StringTable::Predict(const string& query, 47 | vector* result) { 48 | marisa::Agent agent; 49 | agent.set_query(query.c_str()); 50 | result->clear(); 51 | while (trie_.predictive_search(agent)) { 52 | result->push_back(agent.key().id()); 53 | } 54 | } 55 | 56 | string StringTable::GetString(StringId string_id) { 57 | marisa::Agent agent; 58 | agent.set_query(string_id); 59 | try { 60 | trie_.reverse_lookup(agent); 61 | } 62 | catch (const marisa::Exception& /*ex*/) { 63 | LOG(ERROR) << "invalid id for string table: " << string_id; 64 | return string(); 65 | } 66 | return string(agent.key().ptr(), agent.key().length()); 67 | } 68 | 69 | size_t StringTable::NumKeys() const { 70 | return trie_.size(); 71 | } 72 | 73 | size_t StringTable::BinarySize() const { 74 | return trie_.io_size(); 75 | } 76 | 77 | void StringTableBuilder::Add(const string& key, 78 | double weight, 79 | StringId* reference) { 80 | keys_.push_back(key.c_str(), key.length(), (float)weight); 81 | references_.push_back(reference); 82 | } 83 | 84 | void StringTableBuilder::Clear() { 85 | trie_.clear(); 86 | keys_.clear(); 87 | references_.clear(); 88 | } 89 | 90 | void StringTableBuilder::Build() { 91 | trie_.build(keys_); 92 | UpdateReferences(); 93 | } 94 | 95 | void StringTableBuilder::UpdateReferences() { 96 | if (keys_.size() != references_.size()) { 97 | return; 98 | } 99 | marisa::Agent agent; 100 | for (size_t i = 0; i < keys_.size(); ++i) { 101 | if (references_[i]) { 102 | *references_[i] = keys_[i].id(); 103 | } 104 | } 105 | } 106 | 107 | void StringTableBuilder::Dump(char* ptr, size_t size) { 108 | if (size < BinarySize()) { 109 | LOG(ERROR) << "insufficient memory to dump string table."; 110 | return; 111 | } 112 | namespace io = boost::iostreams; 113 | io::basic_array_sink sink(ptr, size); 114 | io::stream> stream(sink); 115 | stream << trie_; 116 | } 117 | 118 | } // namespace rime 119 | -------------------------------------------------------------------------------- /src/string_table.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright RIME Developers 3 | // Distributed under the BSD License 4 | // 5 | // 2014-06-25 GONG Chen 6 | // 7 | 8 | #ifndef RIME_STRING_TABLE_H_ 9 | #define RIME_STRING_TABLE_H_ 10 | 11 | #include 12 | #include "marisa.h" 13 | #include "common.h" 14 | 15 | namespace rime { 16 | 17 | using StringId = marisa::UInt32; 18 | 19 | const StringId kInvalidStringId = (StringId)(-1); 20 | 21 | class StringTable { 22 | public: 23 | StringTable() = default; 24 | virtual ~StringTable() = default; 25 | StringTable(const char* ptr, size_t size); 26 | 27 | bool HasKey(const string& key); 28 | StringId Lookup(const string& key); 29 | void CommonPrefixMatch(const string& query, 30 | vector* result); 31 | void Predict(const string& query, 32 | vector* result); 33 | string GetString(StringId string_id); 34 | 35 | size_t NumKeys() const; 36 | size_t BinarySize() const; 37 | 38 | protected: 39 | marisa::Trie trie_; 40 | }; 41 | 42 | class StringTableBuilder: public StringTable { 43 | public: 44 | void Add(const string& key, double weight = 1.0, 45 | StringId* reference = nullptr); 46 | void Clear(); 47 | void Build(); 48 | void Dump(char* ptr, size_t size); 49 | 50 | private: 51 | void UpdateReferences(); 52 | 53 | marisa::Keyset keys_; 54 | vector references_; 55 | }; 56 | 57 | } // namespace rime 58 | 59 | #endif // RIME_STRING_TABLE_H_ 60 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/.gitignore: -------------------------------------------------------------------------------- 1 | vs*/*.suo 2 | vs*/*.sdf 3 | vs*/*.opensdf 4 | vs*/**/*.user 5 | vs*/Release/ 6 | vs*/Debug/ 7 | vs*/**/Release/ 8 | vs*/**/Debug/ 9 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/AUTHORS: -------------------------------------------------------------------------------- 1 | Susumu Yata 2 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/COPYING: -------------------------------------------------------------------------------- 1 | libmarisa and its command line tools are dual-licensed under the BSD 2-clause license and the LGPL. 2 | 3 | * The BSD 2-clause license 4 | 5 | Copyright (c) 2010-2013, Susumu Yata 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 9 | 10 | - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 11 | - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | 15 | * The LGPL 2.1 or any later version 16 | 17 | marisa-trie - A static and space-efficient trie data structure. 18 | Copyright (C) 2010-2013 Susumu Yata 19 | 20 | This library is free software; you can redistribute it and/or 21 | modify it under the terms of the GNU Lesser General Public 22 | License as published by the Free Software Foundation; either 23 | version 2.1 of the License, or (at your option) any later version. 24 | 25 | This library is distributed in the hope that it will be useful, 26 | but WITHOUT ANY WARRANTY; without even the implied warranty of 27 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 28 | Lesser General Public License for more details. 29 | 30 | You should have received a copy of the GNU Lesser General Public 31 | License along with this library; if not, write to the Free Software 32 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 33 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/ChangeLog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/ChangeLog -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I m4 2 | 3 | SUBDIRS = lib tools tests 4 | 5 | pkgconfigdir = ${libdir}/pkgconfig 6 | pkgconfig_DATA = marisa.pc 7 | 8 | EXTRA_DIST = \ 9 | bindings/Makefile \ 10 | bindings/perl/Makefile.PL \ 11 | bindings/perl/marisa.pm \ 12 | bindings/python/setup.py \ 13 | bindings/python/marisa.py \ 14 | bindings/ruby/extconf.rb \ 15 | bindings/*.cxx \ 16 | bindings/*.h \ 17 | bindings/*.i \ 18 | bindings/*/*.cxx \ 19 | bindings/*/*.h \ 20 | bindings/*/sample.* \ 21 | docs/*.html \ 22 | docs/*.css \ 23 | vs2008/vs2008.* \ 24 | vs2008/*/* 25 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/NEWS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/NEWS -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/README: -------------------------------------------------------------------------------- 1 | - Project name 2 | 3 | marisa-trie 4 | http://code.google.com/p/marisa-trie/ 5 | 6 | - Project summary 7 | 8 | MARISA: Matching Algorithm with Recursively Implemented StorAge 9 | 10 | - Version 11 | 12 | 0.2.4 13 | 14 | - Description 15 | 16 | *Matching Algorithm with Recursively Implemented !StorAge (MARISA)* is a static and space-efficient trie data structure. And *libmarisa* is a C++ library to provide an implementation of MARISA. Also, the package of *libmarisa* contains a set of command line tools for building and operating a MARISA-based dictionary. 17 | 18 | A MARISA-based dictionary supports not only lookup but also reverse lookup, common prefix search and predictive search. 19 | 20 | * Lookup is to check whether or not a given string exists in a dictionary. 21 | * Reverse lookup is to restore a key from its ID. 22 | * Common prefix search is to find keys from prefixes of a given string. 23 | * Predictive search is to find keys starting with a given string. 24 | 25 | The biggest advantage of *libmarisa* is that its dictionary size is considerably more compact than others. See below for the dictionary size of other implementations. 26 | 27 | * Input 28 | * Source: enwiki-20121101-all-titles-in-ns0.gz 29 | * Contents: all page titles of English Wikipedia (Nov. 2012) 30 | * Number of keys: 9,805,576 31 | * Total size: 200,435,403 bytes (plain) / 54,933,690 bytes (gzipped) 32 | 33 | || *Implementation* || *Size (bytes)* || *Remarks* || 34 | || darts-clone || 376,613,888 || Compacted double-array trie || 35 | || tx-trie || 127,727,058 || LOUDS-based trie || 36 | || *marisa-trie* || 50,753,560 || MARISA trie || 37 | 38 | * Documentation 39 | * marisa-0.2.4 40 | * [http://marisa-trie.googlecode.com/svn/trunk/docs/readme.en.html README (English)] 41 | * [http://marisa-trie.googlecode.com/svn/trunk/docs/readme.ja.html README (Japanese)] 42 | * marisa-0.1.5 (Japanese) 43 | * HowTo 44 | * ListOfTools 45 | * LibraryInterface 46 | * BenchmarkResults 47 | 48 | - Version control system 49 | 50 | Subversion 51 | 52 | - Source code license 53 | 54 | The BSD 2-clause License 55 | The LGPL 2.1 or any later version 56 | 57 | - Project labels 58 | 59 | Patricia 60 | Trie 61 | Static 62 | Dictionary 63 | CPlusPlus 64 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/Makefile: -------------------------------------------------------------------------------- 1 | ALL: swig-perl swig-python swig-ruby 2 | 3 | swig-perl: 4 | swig -Wall -c++ -perl -outdir perl marisa-swig.i 5 | mv marisa-swig_wrap.cxx perl 6 | cp marisa-swig.cxx marisa-swig.h perl 7 | 8 | swig-python: 9 | swig -Wall -c++ -python -outdir python marisa-swig.i 10 | mv marisa-swig_wrap.cxx python 11 | cp marisa-swig.cxx marisa-swig.h python 12 | 13 | swig-ruby: 14 | swig -Wall -c++ -ruby -outdir ruby marisa-swig.i 15 | mv marisa-swig_wrap.cxx ruby 16 | cp marisa-swig.cxx marisa-swig.h ruby 17 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/marisa-swig.i: -------------------------------------------------------------------------------- 1 | %module marisa 2 | 3 | %include "cstring.i" 4 | %include "exception.i" 5 | 6 | %{ 7 | #include "marisa-swig.h" 8 | %} 9 | 10 | %apply (char *STRING, int LENGTH) { (const char *ptr, std::size_t length) }; 11 | 12 | %cstring_output_allocate_size(const char **ptr_out, std::size_t *length_out, ); 13 | %cstring_output_allocate_size(const char **ptr_out_to_be_deleted, 14 | std::size_t *length_out, delete [] (*$1)); 15 | 16 | %exception { 17 | try { 18 | $action 19 | } catch (const marisa::Exception &ex) { 20 | SWIG_exception(SWIG_RuntimeError, ex.what()); 21 | } catch (...) { 22 | SWIG_exception(SWIG_UnknownError,"Unknown exception"); 23 | } 24 | } 25 | 26 | %include "marisa-swig.h" 27 | 28 | %constant size_t INVALID_KEY_ID = MARISA_INVALID_KEY_ID; 29 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/perl/Makefile.PL: -------------------------------------------------------------------------------- 1 | use ExtUtils::MakeMaker; 2 | 3 | WriteMakefile( 4 | 'NAME' => 'marisa', 5 | 'LIBS' => ['-lmarisa'], 6 | 'OBJECT' => 'marisa-swig.o marisa-swig_wrap.o' 7 | ); 8 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/perl/sample.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/bindings/perl/sample.dic -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/perl/sample.pl: -------------------------------------------------------------------------------- 1 | use marisa; 2 | 3 | $keyset = new marisa::Keyset; 4 | $keyset->push_back("cake"); 5 | $keyset->push_back("cookie"); 6 | $keyset->push_back("ice"); 7 | $keyset->push_back("ice-cream"); 8 | 9 | $trie = new marisa::Trie; 10 | $trie->build($keyset); 11 | print("no. keys: ", $trie->num_keys(), "\n"); 12 | print("no. tries: ", $trie->num_tries(), "\n"); 13 | print("no. nodes: ", $trie->num_nodes(), "\n"); 14 | print("size: ", $trie->io_size(), "\n"); 15 | 16 | $agent = new marisa::Agent; 17 | 18 | $agent->set_query("cake"); 19 | $trie->lookup($agent); 20 | print($agent->query_str(), ": ", $agent->key_id(), "\n"); 21 | 22 | $agent->set_query("cookie"); 23 | $trie->lookup($agent); 24 | print($agent->query_str(), ": ", $agent->key_id(), "\n"); 25 | 26 | $agent->set_query("cockoo"); 27 | if ($trie->lookup(agent)) { 28 | print($agent->query_str(), ": not found\n"); 29 | } 30 | 31 | print("ice: ", $trie->lookup("ice"), "\n"); 32 | print("ice-cream: ", $trie->lookup("ice-cream"), "\n"); 33 | if ($trie->lookup("ice-age") == $marisa::INVALID_KEY_ID) { 34 | print("ice-age: not found\n"); 35 | } 36 | 37 | $trie->save("sample.dic"); 38 | $trie->load("sample.dic"); 39 | 40 | $agent->set_query(0); 41 | $trie->reverse_lookup($agent); 42 | print($agent->query_id(), ": ", $agent->key_str(), "\n"); 43 | $agent->set_query(1); 44 | $trie->reverse_lookup($agent); 45 | print($agent->query_id(), ": ", $agent->key_str(), "\n"); 46 | 47 | print("2: ", $trie->reverse_lookup(2), "\n"); 48 | print("3: ", $trie->reverse_lookup(3), "\n"); 49 | 50 | $trie->mmap("sample.dic"); 51 | 52 | $agent->set_query("ice-cream soda"); 53 | while ($trie->common_prefix_search($agent)) { 54 | print($agent->query_str(), ": ", $agent->key_str(), " (", 55 | $agent->key_id(), ")\n"); 56 | } 57 | 58 | $agent->set_query("ic"); 59 | while ($trie->predictive_search($agent)) { 60 | print($agent->query_str(), ": ", $agent->key_str(), " (", 61 | $agent->key_id(), ")\n"); 62 | } 63 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/python/sample.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/bindings/python/sample.dic -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/python/sample.py: -------------------------------------------------------------------------------- 1 | import marisa 2 | 3 | keyset = marisa.Keyset() 4 | keyset.push_back("cake") 5 | keyset.push_back("cookie") 6 | keyset.push_back("ice") 7 | keyset.push_back("ice-cream") 8 | 9 | trie = marisa.Trie() 10 | trie.build(keyset) 11 | print("no. keys: %d" % trie.num_keys()) 12 | print("no. tries: %d" % trie.num_tries()) 13 | print("no. nodes: %d" % trie.num_nodes()) 14 | print("size: %d" % trie.io_size()) 15 | 16 | agent = marisa.Agent() 17 | 18 | agent.set_query("cake") 19 | trie.lookup(agent) 20 | print("%s: %d" % (agent.query_str(), agent.key_id())) 21 | 22 | agent.set_query("cookie") 23 | trie.lookup(agent) 24 | print("%s: %d" % (agent.query_str(), agent.key_id())) 25 | 26 | agent.set_query("cockoo") 27 | if not trie.lookup(agent): 28 | print("%s: not found" % agent.query_str()) 29 | 30 | print("ice: %d" % trie.lookup("ice")) 31 | print("ice-cream: %d" % trie.lookup("ice-cream")) 32 | if trie.lookup("ice-age") == marisa.INVALID_KEY_ID: 33 | print("ice-age: not found") 34 | 35 | trie.save("sample.dic") 36 | trie.load("sample.dic") 37 | 38 | agent.set_query(0) 39 | trie.reverse_lookup(agent) 40 | print("%d: %s" % (agent.query_id(), agent.key_str())) 41 | 42 | agent.set_query(1) 43 | trie.reverse_lookup(agent) 44 | print("%d: %s" % (agent.query_id(), agent.key_str())) 45 | 46 | print("2: %s" % trie.reverse_lookup(2)) 47 | print("3: %s" % trie.reverse_lookup(3)) 48 | 49 | trie.mmap("sample.dic") 50 | 51 | agent.set_query("ice-cream soda") 52 | while trie.common_prefix_search(agent): 53 | print("%s: %s (%d)" % (agent.query_str(), agent.key_str(), agent.key_id())) 54 | 55 | agent.set_query("ic") 56 | while trie.predictive_search(agent): 57 | print("%s: %s (%d)" % (agent.query_str(), agent.key_str(), agent.key_id())) 58 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/python/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | 3 | marisa_module = Extension("_marisa", 4 | sources=["marisa-swig_wrap.cxx", "marisa-swig.cxx"], 5 | libraries=["marisa"]) 6 | 7 | setup(name = "marisa", 8 | ext_modules = [marisa_module], 9 | py_modules = ["marisa"]) 10 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/ruby/extconf.rb: -------------------------------------------------------------------------------- 1 | require "mkmf" 2 | 3 | have_library("marisa") 4 | 5 | create_makefile("marisa") 6 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/ruby/sample.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/bindings/ruby/sample.dic -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/bindings/ruby/sample.rb: -------------------------------------------------------------------------------- 1 | require "marisa" 2 | 3 | keyset = Marisa::Keyset.new 4 | keyset.push_back("cake") 5 | keyset.push_back("cookie") 6 | keyset.push_back("ice") 7 | keyset.push_back("ice-cream") 8 | 9 | trie = Marisa::Trie.new 10 | trie.build(keyset) 11 | print("no. keys: ", trie.num_keys(), "\n") 12 | print("no. tries: ", trie.num_tries(), "\n") 13 | print("no. nodes: ", trie.num_nodes(), "\n") 14 | print("size: ", trie.io_size(), "\n") 15 | 16 | agent = Marisa::Agent.new 17 | 18 | agent.set_query("cake") 19 | trie.lookup(agent) 20 | print(agent.query_str(), ": ", agent.key_id(), "\n") 21 | 22 | agent.set_query("cookie") 23 | trie.lookup(agent) 24 | print(agent.query_str(), ": ", agent.key_id(), "\n") 25 | 26 | agent.set_query("cockoo") 27 | if not trie.lookup(agent) 28 | print(agent.query_str(), ": not found\n") 29 | end 30 | 31 | print("ice: ", trie.lookup("ice"), "\n") 32 | print("ice-cream: ", trie.lookup("ice-cream"), "\n") 33 | if trie.lookup("ice-age") == Marisa::INVALID_KEY_ID 34 | print("ice-age: not found\n") 35 | end 36 | 37 | trie.save("sample.dic") 38 | trie.load("sample.dic") 39 | 40 | agent.set_query(0) 41 | trie.reverse_lookup(agent) 42 | print(agent.query_id(), ": ", agent.key_str(), "\n") 43 | 44 | agent.set_query(1) 45 | trie.reverse_lookup(agent) 46 | print(agent.query_id(), ": ", agent.key_str(), "\n") 47 | 48 | print("2: ", trie.reverse_lookup(2), "\n") 49 | print("3: ", trie.reverse_lookup(3), "\n") 50 | 51 | trie.mmap("sample.dic") 52 | 53 | agent.set_query("ice-cream soda") 54 | while trie.common_prefix_search(agent) 55 | print(agent.query_str(), ": ", agent.key_str(), " (", agent.key_id(), ")\n") 56 | end 57 | 58 | agent.set_query("ic") 59 | while trie.predictive_search(agent) 60 | print(agent.query_str(), ": ", agent.key_str(), " (", agent.key_id(), ")\n") 61 | end 62 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/agent.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/agent.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/bit-vector.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/bit-vector.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/keyset.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/keyset.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/libmarisa.0.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/libmarisa.0.dylib -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/libmarisa.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/libmarisa.a -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/libmarisa.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/libmarisa.dylib -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/libmarisa.la: -------------------------------------------------------------------------------- 1 | # libmarisa.la - a libtool library file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # The name that we can dlopen(3). 8 | dlname='libmarisa.0.dylib' 9 | 10 | # Names of this library. 11 | library_names='libmarisa.0.dylib libmarisa.dylib' 12 | 13 | # The name of the static archive. 14 | old_library='libmarisa.a' 15 | 16 | # Linker flags that can not go in dependency_libs. 17 | inherited_linker_flags=' ' 18 | 19 | # Libraries that this one depends upon. 20 | dependency_libs='' 21 | 22 | # Names of additional weak libraries provided by this library 23 | weak_library_names='' 24 | 25 | # Version information for libmarisa. 26 | current=0 27 | age=0 28 | revision=0 29 | 30 | # Is this an already installed library? 31 | installed=no 32 | 33 | # Should we warn about portability when linking against -modules? 34 | shouldnotlink=no 35 | 36 | # Files to dlopen/dlpreopen 37 | dlopen='' 38 | dlpreopen='' 39 | 40 | # Directory that this library needs to be installed in: 41 | libdir='/usr/local/lib' 42 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/libmarisa.lai: -------------------------------------------------------------------------------- 1 | # libmarisa.la - a libtool library file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # The name that we can dlopen(3). 8 | dlname='libmarisa.0.dylib' 9 | 10 | # Names of this library. 11 | library_names='libmarisa.0.dylib libmarisa.dylib' 12 | 13 | # The name of the static archive. 14 | old_library='libmarisa.a' 15 | 16 | # Linker flags that can not go in dependency_libs. 17 | inherited_linker_flags=' ' 18 | 19 | # Libraries that this one depends upon. 20 | dependency_libs='' 21 | 22 | # Names of additional weak libraries provided by this library 23 | weak_library_names='' 24 | 25 | # Version information for libmarisa. 26 | current=0 27 | age=0 28 | revision=0 29 | 30 | # Is this an already installed library? 31 | installed=yes 32 | 33 | # Should we warn about portability when linking against -modules? 34 | shouldnotlink=no 35 | 36 | # Files to dlopen/dlpreopen 37 | dlopen='' 38 | dlpreopen='' 39 | 40 | # Directory that this library needs to be installed in: 41 | libdir='/usr/local/lib' 42 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/louds-trie.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/louds-trie.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/mapper.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/mapper.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/reader.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/reader.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/tail.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/tail.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/trie.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/trie.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/.libs/writer.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/.libs/writer.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CXXFLAGS = -Wall -Weffc++ -Wextra 2 | #AM_CXXFLAGS = -Wall -Weffc++ -Wextra -D_DEBUG 3 | 4 | lib_LTLIBRARIES = libmarisa.la 5 | 6 | libmarisa_la_SOURCES = \ 7 | marisa/keyset.cc \ 8 | marisa/agent.cc \ 9 | marisa/trie.cc \ 10 | marisa/grimoire/io/mapper.cc \ 11 | marisa/grimoire/io/reader.cc \ 12 | marisa/grimoire/io/writer.cc \ 13 | marisa/grimoire/vector/bit-vector.cc \ 14 | marisa/grimoire/trie/tail.cc \ 15 | marisa/grimoire/trie/louds-trie.cc 16 | 17 | include_HEADERS = marisa.h 18 | 19 | libmarisa_includedir = ${includedir}/marisa 20 | libmarisa_include_HEADERS = \ 21 | marisa/base.h \ 22 | marisa/exception.h \ 23 | marisa/scoped-ptr.h \ 24 | marisa/scoped-array.h \ 25 | marisa/key.h \ 26 | marisa/keyset.h \ 27 | marisa/query.h \ 28 | marisa/agent.h \ 29 | marisa/stdio.h \ 30 | marisa/iostream.h \ 31 | marisa/trie.h 32 | 33 | noinst_HEADERS = \ 34 | marisa/grimoire/intrin.h \ 35 | marisa/grimoire/io.h \ 36 | marisa/grimoire/io/mapper.h \ 37 | marisa/grimoire/io/reader.h \ 38 | marisa/grimoire/io/writer.h \ 39 | marisa/grimoire/vector.h \ 40 | marisa/grimoire/vector/pop-count.h \ 41 | marisa/grimoire/vector/rank-index.h \ 42 | marisa/grimoire/vector/vector.h \ 43 | marisa/grimoire/vector/flat-vector.h \ 44 | marisa/grimoire/vector/bit-vector.h \ 45 | marisa/grimoire/algorithm.h \ 46 | marisa/grimoire/algorithm/sort.h \ 47 | marisa/grimoire/trie.h \ 48 | marisa/grimoire/trie/config.h \ 49 | marisa/grimoire/trie/header.h \ 50 | marisa/grimoire/trie/key.h \ 51 | marisa/grimoire/trie/range.h \ 52 | marisa/grimoire/trie/entry.h \ 53 | marisa/grimoire/trie/tail.h \ 54 | marisa/grimoire/trie/cache.h \ 55 | marisa/grimoire/trie/history.h \ 56 | marisa/grimoire/trie/state.h \ 57 | marisa/grimoire/trie/louds-trie.h 58 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/agent.lo: -------------------------------------------------------------------------------- 1 | # agent.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/agent.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='agent.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/agent.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/agent.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/bit-vector.lo: -------------------------------------------------------------------------------- 1 | # bit-vector.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/bit-vector.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='bit-vector.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/bit-vector.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/bit-vector.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/keyset.lo: -------------------------------------------------------------------------------- 1 | # keyset.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/keyset.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='keyset.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/keyset.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/keyset.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/libmarisa.la: -------------------------------------------------------------------------------- 1 | # libmarisa.la - a libtool library file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # The name that we can dlopen(3). 8 | dlname='libmarisa.0.dylib' 9 | 10 | # Names of this library. 11 | library_names='libmarisa.0.dylib libmarisa.dylib' 12 | 13 | # The name of the static archive. 14 | old_library='libmarisa.a' 15 | 16 | # Linker flags that can not go in dependency_libs. 17 | inherited_linker_flags=' ' 18 | 19 | # Libraries that this one depends upon. 20 | dependency_libs='' 21 | 22 | # Names of additional weak libraries provided by this library 23 | weak_library_names='' 24 | 25 | # Version information for libmarisa. 26 | current=0 27 | age=0 28 | revision=0 29 | 30 | # Is this an already installed library? 31 | installed=no 32 | 33 | # Should we warn about portability when linking against -modules? 34 | shouldnotlink=no 35 | 36 | # Files to dlopen/dlpreopen 37 | dlopen='' 38 | dlpreopen='' 39 | 40 | # Directory that this library needs to be installed in: 41 | libdir='/usr/local/lib' 42 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/louds-trie.lo: -------------------------------------------------------------------------------- 1 | # louds-trie.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/louds-trie.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='louds-trie.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/louds-trie.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/louds-trie.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/mapper.lo: -------------------------------------------------------------------------------- 1 | # mapper.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/mapper.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='mapper.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/mapper.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/mapper.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_H_ 2 | #define MARISA_H_ 3 | 4 | // "marisa/stdio.h" includes for I/O using std::FILE. 5 | #include "marisa/mstdio.h" 6 | 7 | // "marisa/iostream.h" includes for I/O using std::iostream. 8 | #include "marisa/iostream.h" 9 | 10 | // You can use instead of if you don't need the 11 | // above I/O interfaces and don't want to include the above I/O headers. 12 | #include "marisa/trie.h" 13 | 14 | #endif // MARISA_H_ 15 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/agent.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "agent.h" 4 | #include "grimoire/trie.h" 5 | 6 | namespace marisa { 7 | 8 | Agent::Agent() : query_(), key_(), state_() {} 9 | 10 | Agent::~Agent() {} 11 | 12 | void Agent::set_query(const char *str) { 13 | MARISA_THROW_IF(str == NULL, MARISA_NULL_ERROR); 14 | if (state_.get() != NULL) { 15 | state_->reset(); 16 | } 17 | query_.set_str(str); 18 | } 19 | 20 | void Agent::set_query(const char *ptr, std::size_t length) { 21 | MARISA_THROW_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 22 | if (state_.get() != NULL) { 23 | state_->reset(); 24 | } 25 | query_.set_str(ptr, length); 26 | } 27 | 28 | void Agent::set_query(std::size_t key_id) { 29 | if (state_.get() != NULL) { 30 | state_->reset(); 31 | } 32 | query_.set_id(key_id); 33 | } 34 | 35 | void Agent::init_state() { 36 | MARISA_THROW_IF(state_.get() != NULL, MARISA_STATE_ERROR); 37 | state_.reset(new (std::nothrow) grimoire::State); 38 | MARISA_THROW_IF(state_.get() == NULL, MARISA_MEMORY_ERROR); 39 | } 40 | 41 | void Agent::clear() { 42 | Agent().swap(*this); 43 | } 44 | 45 | void Agent::swap(Agent &rhs) { 46 | query_.swap(rhs.query_); 47 | key_.swap(rhs.key_); 48 | state_.swap(rhs.state_); 49 | } 50 | 51 | } // namespace marisa 52 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/agent.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_AGENT_H_ 2 | #define MARISA_AGENT_H_ 3 | 4 | #include "key.h" 5 | #include "query.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace trie { 10 | 11 | class State; 12 | 13 | } // namespace trie 14 | } // namespace grimoire 15 | 16 | class Agent { 17 | public: 18 | Agent(); 19 | ~Agent(); 20 | 21 | const Query &query() const { 22 | return query_; 23 | } 24 | const Key &key() const { 25 | return key_; 26 | } 27 | 28 | void set_query(const char *str); 29 | void set_query(const char *ptr, std::size_t length); 30 | void set_query(std::size_t key_id); 31 | 32 | const grimoire::trie::State &state() const { 33 | return *state_; 34 | } 35 | grimoire::trie::State &state() { 36 | return *state_; 37 | } 38 | 39 | void set_key(const char *str) { 40 | MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR); 41 | key_.set_str(str); 42 | } 43 | void set_key(const char *ptr, std::size_t length) { 44 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 45 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 46 | key_.set_str(ptr, length); 47 | } 48 | void set_key(std::size_t id) { 49 | MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 50 | key_.set_id(id); 51 | } 52 | 53 | bool has_state() const { 54 | return state_.get() != NULL; 55 | } 56 | void init_state(); 57 | 58 | void clear(); 59 | void swap(Agent &rhs); 60 | 61 | private: 62 | Query query_; 63 | Key key_; 64 | scoped_ptr state_; 65 | 66 | // Disallows copy and assignment. 67 | Agent(const Agent &); 68 | Agent &operator=(const Agent &); 69 | }; 70 | 71 | } // namespace marisa 72 | 73 | #endif // MARISA_AGENT_H_ 74 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/exception.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_EXCEPTION_H_ 2 | #define MARISA_EXCEPTION_H_ 3 | 4 | #include 5 | 6 | #include "base.h" 7 | 8 | namespace marisa { 9 | 10 | // An exception object keeps a filename, a line number, an error code and an 11 | // error message. The message format is as follows: 12 | // "__FILE__:__LINE__: error_code: error_message" 13 | class Exception : public std::exception { 14 | public: 15 | Exception(const char *filename, int line, 16 | ErrorCode error_code, const char *error_message) 17 | : std::exception(), filename_(filename), line_(line), 18 | error_code_(error_code), error_message_(error_message) {} 19 | Exception(const Exception &ex) 20 | : std::exception(), filename_(ex.filename_), line_(ex.line_), 21 | error_code_(ex.error_code_), error_message_(ex.error_message_) {} 22 | virtual ~Exception() throw() {} 23 | 24 | Exception &operator=(const Exception &rhs) { 25 | filename_ = rhs.filename_; 26 | line_ = rhs.line_; 27 | error_code_ = rhs.error_code_; 28 | error_message_ = rhs.error_message_; 29 | return *this; 30 | } 31 | 32 | const char *filename() const { 33 | return filename_; 34 | } 35 | int line() const { 36 | return line_; 37 | } 38 | ErrorCode error_code() const { 39 | return error_code_; 40 | } 41 | const char *error_message() const { 42 | return error_message_; 43 | } 44 | 45 | virtual const char *what() const throw() { 46 | return error_message_; 47 | } 48 | 49 | private: 50 | const char *filename_; 51 | int line_; 52 | ErrorCode error_code_; 53 | const char *error_message_; 54 | }; 55 | 56 | // These macros are used to convert a line number to a string constant. 57 | #define MARISA_INT_TO_STR(value) #value 58 | #define MARISA_LINE_TO_STR(line) MARISA_INT_TO_STR(line) 59 | #define MARISA_LINE_STR MARISA_LINE_TO_STR(__LINE__) 60 | 61 | // MARISA_THROW throws an exception with a filename, a line number, an error 62 | // code and an error message. The message format is as follows: 63 | // "__FILE__:__LINE__: error_code: error_message" 64 | #define MARISA_THROW(error_code, error_message) \ 65 | (throw marisa::Exception(__FILE__, __LINE__, error_code, \ 66 | __FILE__ ":" MARISA_LINE_STR ": " #error_code ": " error_message)) 67 | 68 | // MARISA_THROW_IF throws an exception if `condition' is true. 69 | #define MARISA_THROW_IF(condition, error_code) \ 70 | (void)((!(condition)) || (MARISA_THROW(error_code, #condition), 0)) 71 | 72 | // MARISA_DEBUG_IF is ignored if _DEBUG is undefined. So, it is useful for 73 | // debugging time-critical codes. 74 | #ifdef _DEBUG 75 | #define MARISA_DEBUG_IF(cond, error_code) MARISA_THROW_IF(cond, error_code) 76 | #else 77 | #define MARISA_DEBUG_IF(cond, error_code) 78 | #endif 79 | 80 | } // namespace marisa 81 | 82 | #endif // MARISA_EXCEPTION_H_ 83 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/algorithm.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_ALGORITHM_H_ 2 | #define MARISA_GRIMOIRE_ALGORITHM_H_ 3 | 4 | #include "algorithm/sort.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | 9 | class Algorithm { 10 | public: 11 | Algorithm() {} 12 | 13 | template 14 | std::size_t sort(Iterator begin, Iterator end) const { 15 | return algorithm::sort(begin, end); 16 | } 17 | 18 | private: 19 | Algorithm(const Algorithm &); 20 | Algorithm &operator=(const Algorithm &); 21 | }; 22 | 23 | } // namespace grimoire 24 | } // namespace marisa 25 | 26 | #endif // MARISA_GRIMOIRE_ALGORITHM_H_ 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/intrin.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_INTRIN_H_ 2 | #define MARISA_GRIMOIRE_INTRIN_H_ 3 | 4 | #include "../base.h" 5 | 6 | #if defined(__x86_64__) || defined(_M_X64) 7 | #define MARISA_X64 8 | #elif defined(__i386__) || defined(_M_IX86) 9 | #define MARISA_X86 10 | #else // defined(__i386__) || defined(_M_IX86) 11 | #ifdef MARISA_USE_POPCNT 12 | #undef MARISA_USE_POPCNT 13 | #endif // MARISA_USE_POPCNT 14 | #ifdef MARISA_USE_SSE4A 15 | #undef MARISA_USE_SSE4A 16 | #endif // MARISA_USE_SSE4A 17 | #ifdef MARISA_USE_SSE4 18 | #undef MARISA_USE_SSE4 19 | #endif // MARISA_USE_SSE4 20 | #ifdef MARISA_USE_SSE4_2 21 | #undef MARISA_USE_SSE4_2 22 | #endif // MARISA_USE_SSE4_2 23 | #ifdef MARISA_USE_SSE4_1 24 | #undef MARISA_USE_SSE4_1 25 | #endif // MARISA_USE_SSE4_1 26 | #ifdef MARISA_USE_SSSE3 27 | #undef MARISA_USE_SSSE3 28 | #endif // MARISA_USE_SSSE3 29 | #ifdef MARISA_USE_SSE3 30 | #undef MARISA_USE_SSE3 31 | #endif // MARISA_USE_SSE3 32 | #ifdef MARISA_USE_SSE2 33 | #undef MARISA_USE_SSE2 34 | #endif // MARISA_USE_SSE2 35 | #endif // defined(__i386__) || defined(_M_IX86) 36 | 37 | #ifdef MARISA_USE_POPCNT 38 | #ifndef MARISA_USE_SSE3 39 | #define MARISA_USE_SSE3 40 | #endif // MARISA_USE_SSE3 41 | #ifdef _MSC_VER 42 | #include 43 | #else // _MSC_VER 44 | #include 45 | #endif // _MSC_VER 46 | #endif // MARISA_USE_POPCNT 47 | 48 | #ifdef MARISA_USE_SSE4A 49 | #ifndef MARISA_USE_SSE3 50 | #define MARISA_USE_SSE3 51 | #endif // MARISA_USE_SSE3 52 | #ifndef MARISA_USE_POPCNT 53 | #define MARISA_USE_POPCNT 54 | #endif // MARISA_USE_POPCNT 55 | #endif // MARISA_USE_SSE4A 56 | 57 | #ifdef MARISA_USE_SSE4 58 | #ifndef MARISA_USE_SSE4_2 59 | #define MARISA_USE_SSE4_2 60 | #endif // MARISA_USE_SSE4_2 61 | #endif // MARISA_USE_SSE4 62 | 63 | #ifdef MARISA_USE_SSE4_2 64 | #ifndef MARISA_USE_SSE4_1 65 | #define MARISA_USE_SSE4_1 66 | #endif // MARISA_USE_SSE4_1 67 | #ifndef MARISA_USE_POPCNT 68 | #define MARISA_USE_POPCNT 69 | #endif // MARISA_USE_POPCNT 70 | #endif // MARISA_USE_SSE4_2 71 | 72 | #ifdef MARISA_USE_SSE4_1 73 | #ifndef MARISA_USE_SSSE3 74 | #define MARISA_USE_SSSE3 75 | #endif // MARISA_USE_SSSE3 76 | #endif // MARISA_USE_SSE4_1 77 | 78 | #ifdef MARISA_USE_SSSE3 79 | #ifndef MARISA_USE_SSE3 80 | #define MARISA_USE_SSE3 81 | #endif // MARISA_USE_SSE3 82 | #ifdef MARISA_X64 83 | #define MARISA_X64_SSSE3 84 | #else // MARISA_X64 85 | #define MARISA_X86_SSSE3 86 | #endif // MAIRSA_X64 87 | #include 88 | #endif // MARISA_USE_SSSE3 89 | 90 | #ifdef MARISA_USE_SSE3 91 | #ifndef MARISA_USE_SSE2 92 | #define MARISA_USE_SSE2 93 | #endif // MARISA_USE_SSE2 94 | #endif // MARISA_USE_SSE3 95 | 96 | #ifdef MARISA_USE_SSE2 97 | #ifdef MARISA_X64 98 | #define MARISA_X64_SSE2 99 | #else // MARISA_X64 100 | #define MARISA_X86_SSE2 101 | #endif // MAIRSA_X64 102 | #include 103 | #endif // MARISA_USE_SSE2 104 | 105 | #ifdef _MSC_VER 106 | #if MARISA_WORD_SIZE == 64 107 | #include 108 | #pragma intrinsic(_BitScanForward64) 109 | #else // MARISA_WORD_SIZE == 64 110 | #include 111 | #pragma intrinsic(_BitScanForward) 112 | #endif // MARISA_WORD_SIZE == 64 113 | #endif // _MSC_VER 114 | 115 | #endif // MARISA_GRIMOIRE_INTRIN_H_ 116 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/io.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_H_ 2 | #define MARISA_GRIMOIRE_IO_H_ 3 | 4 | #include "io/mapper.h" 5 | #include "io/reader.h" 6 | #include "io/writer.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | 11 | using io::Mapper; 12 | using io::Reader; 13 | using io::Writer; 14 | 15 | } // namespace grimoire 16 | } // namespace marisa 17 | 18 | #endif // MARISA_GRIMOIRE_IO_H_ 19 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/io/mapper.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_MAPPER_H_ 2 | #define MARISA_GRIMOIRE_IO_MAPPER_H_ 3 | 4 | #include 5 | 6 | #include "../../base.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | namespace io { 11 | 12 | class Mapper { 13 | public: 14 | Mapper(); 15 | ~Mapper(); 16 | 17 | void open(const char *filename); 18 | void open(const void *ptr, std::size_t size); 19 | 20 | template 21 | void map(T *obj) { 22 | MARISA_THROW_IF(obj == NULL, MARISA_NULL_ERROR); 23 | *obj = *static_cast(map_data(sizeof(T))); 24 | } 25 | 26 | template 27 | void map(const T **objs, std::size_t num_objs) { 28 | MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR); 29 | MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)), 30 | MARISA_SIZE_ERROR); 31 | *objs = static_cast(map_data(sizeof(T) * num_objs)); 32 | } 33 | 34 | void seek(std::size_t size); 35 | 36 | bool is_open() const; 37 | 38 | void clear(); 39 | void swap(Mapper &rhs); 40 | 41 | private: 42 | const void *ptr_; 43 | void *origin_; 44 | std::size_t avail_; 45 | std::size_t size_; 46 | #if (defined _WIN32) || (defined _WIN64) 47 | void *file_; 48 | void *map_; 49 | #else // (defined _WIN32) || (defined _WIN64) 50 | int fd_; 51 | #endif // (defined _WIN32) || (defined _WIN64) 52 | 53 | void open_(const char *filename); 54 | void open_(const void *ptr, std::size_t size); 55 | 56 | const void *map_data(std::size_t size); 57 | 58 | // Disallows copy and assignment. 59 | Mapper(const Mapper &); 60 | Mapper &operator=(const Mapper &); 61 | }; 62 | 63 | } // namespace io 64 | } // namespace grimoire 65 | } // namespace marisa 66 | 67 | #endif // MARISA_GRIMOIRE_IO_MAPPER_H_ 68 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/io/reader.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_READER_H_ 2 | #define MARISA_GRIMOIRE_IO_READER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "../../base.h" 8 | 9 | namespace marisa { 10 | namespace grimoire { 11 | namespace io { 12 | 13 | class Reader { 14 | public: 15 | Reader(); 16 | ~Reader(); 17 | 18 | void open(const char *filename); 19 | void open(std::FILE *file); 20 | void open(int fd); 21 | void open(std::istream &stream); 22 | 23 | template 24 | void read(T *obj) { 25 | MARISA_THROW_IF(obj == NULL, MARISA_NULL_ERROR); 26 | read_data(obj, sizeof(T)); 27 | } 28 | 29 | template 30 | void read(T *objs, std::size_t num_objs) { 31 | MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR); 32 | MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)), 33 | MARISA_SIZE_ERROR); 34 | read_data(objs, sizeof(T) * num_objs); 35 | } 36 | 37 | void seek(std::size_t size); 38 | 39 | bool is_open() const; 40 | 41 | void clear(); 42 | void swap(Reader &rhs); 43 | 44 | private: 45 | std::FILE *file_; 46 | int fd_; 47 | std::istream *stream_; 48 | bool needs_fclose_; 49 | 50 | void open_(const char *filename); 51 | void open_(std::FILE *file); 52 | void open_(int fd); 53 | void open_(std::istream &stream); 54 | 55 | void read_data(void *buf, std::size_t size); 56 | 57 | // Disallows copy and assignment. 58 | Reader(const Reader &); 59 | Reader &operator=(const Reader &); 60 | }; 61 | 62 | } // namespace io 63 | } // namespace grimoire 64 | } // namespace marisa 65 | 66 | #endif // MARISA_GRIMOIRE_IO_READER_H_ 67 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/io/writer.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_IO_WRITER_H_ 2 | #define MARISA_GRIMOIRE_IO_WRITER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "../../base.h" 8 | 9 | namespace marisa { 10 | namespace grimoire { 11 | namespace io { 12 | 13 | class Writer { 14 | public: 15 | Writer(); 16 | ~Writer(); 17 | 18 | void open(const char *filename); 19 | void open(std::FILE *file); 20 | void open(int fd); 21 | void open(std::ostream &stream); 22 | 23 | template 24 | void write(const T &obj) { 25 | write_data(&obj, sizeof(T)); 26 | } 27 | 28 | template 29 | void write(const T *objs, std::size_t num_objs) { 30 | MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR); 31 | MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)), 32 | MARISA_SIZE_ERROR); 33 | write_data(objs, sizeof(T) * num_objs); 34 | } 35 | 36 | void seek(std::size_t size); 37 | 38 | bool is_open() const; 39 | 40 | void clear(); 41 | void swap(Writer &rhs); 42 | 43 | private: 44 | std::FILE *file_; 45 | int fd_; 46 | std::ostream *stream_; 47 | bool needs_fclose_; 48 | 49 | void open_(const char *filename); 50 | void open_(std::FILE *file); 51 | void open_(int fd); 52 | void open_(std::ostream &stream); 53 | 54 | void write_data(const void *data, std::size_t size); 55 | 56 | // Disallows copy and assignment. 57 | Writer(const Writer &); 58 | Writer &operator=(const Writer &); 59 | }; 60 | 61 | } // namespace io 62 | } // namespace grimoire 63 | } // namespace marisa 64 | 65 | #endif // MARISA_GRIMOIRE_IO_WRITER_H_ 66 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/trie.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_H_ 3 | 4 | #include "trie/state.h" 5 | #include "trie/louds-trie.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | 10 | using trie::State; 11 | using trie::LoudsTrie; 12 | 13 | } // namespace grimoire 14 | } // namespace marisa 15 | 16 | #endif // MARISA_GRIMOIRE_TRIE_H_ 17 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/trie/cache.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_CACHE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_CACHE_H_ 3 | 4 | #include 5 | 6 | #include "../../base.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | namespace trie { 11 | 12 | class Cache { 13 | public: 14 | Cache() : parent_(0), child_(0), union_() { 15 | union_.weight = FLT_MIN; 16 | } 17 | Cache(const Cache &cache) 18 | : parent_(cache.parent_), child_(cache.child_), union_(cache.union_) {} 19 | 20 | Cache &operator=(const Cache &cache) { 21 | parent_ = cache.parent_; 22 | child_ = cache.child_; 23 | union_ = cache.union_; 24 | return *this; 25 | } 26 | 27 | void set_parent(std::size_t parent) { 28 | MARISA_DEBUG_IF(parent > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 29 | parent_ = (UInt32)parent; 30 | } 31 | void set_child(std::size_t child) { 32 | MARISA_DEBUG_IF(child > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 33 | child_ = (UInt32)child; 34 | } 35 | void set_base(UInt8 base) { 36 | union_.link = (union_.link & ~0xFFU) | base; 37 | } 38 | void set_extra(std::size_t extra) { 39 | MARISA_DEBUG_IF(extra > (MARISA_UINT32_MAX >> 8), MARISA_SIZE_ERROR); 40 | union_.link = (UInt32)((union_.link & 0xFFU) | (extra << 8)); 41 | } 42 | void set_weight(float weight) { 43 | union_.weight = weight; 44 | } 45 | 46 | std::size_t parent() const { 47 | return parent_; 48 | } 49 | std::size_t child() const { 50 | return child_; 51 | } 52 | UInt8 base() const { 53 | return (UInt8)(union_.link & 0xFFU); 54 | } 55 | std::size_t extra() const { 56 | return union_.link >> 8; 57 | } 58 | char label() const { 59 | return (char)base(); 60 | } 61 | std::size_t link() const { 62 | return union_.link; 63 | } 64 | float weight() const { 65 | return union_.weight; 66 | } 67 | 68 | private: 69 | UInt32 parent_; 70 | UInt32 child_; 71 | union Union { 72 | UInt32 link; 73 | float weight; 74 | } union_; 75 | }; 76 | 77 | } // namespace trie 78 | } // namespace grimoire 79 | } // namespace marisa 80 | 81 | #endif // MARISA_GRIMOIRE_TRIE_CACHE_H_ 82 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/trie/entry.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_ENTRY_H_ 2 | #define MARISA_GRIMOIRE_TRIE_ENTRY_H_ 3 | 4 | #include "../../base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Entry { 11 | public: 12 | Entry() 13 | : ptr_(static_cast(NULL) - 1), length_(0), id_(0) {} 14 | Entry(const Entry &entry) 15 | : ptr_(entry.ptr_), length_(entry.length_), id_(entry.id_) {} 16 | 17 | Entry &operator=(const Entry &entry) { 18 | ptr_ = entry.ptr_; 19 | length_ = entry.length_; 20 | id_ = entry.id_; 21 | return *this; 22 | } 23 | 24 | char operator[](std::size_t i) const { 25 | MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR); 26 | return *(ptr_ - i); 27 | } 28 | 29 | void set_str(const char *ptr, std::size_t length) { 30 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 31 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 32 | ptr_ = ptr + length - 1; 33 | length_ = (UInt32)length; 34 | } 35 | void set_id(std::size_t id) { 36 | MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 37 | id_ = (UInt32)id; 38 | } 39 | 40 | const char *ptr() const { 41 | return ptr_ - length_ + 1; 42 | } 43 | std::size_t length() const { 44 | return length_; 45 | } 46 | std::size_t id() const { 47 | return id_; 48 | } 49 | 50 | class StringComparer { 51 | public: 52 | bool operator()(const Entry &lhs, const Entry &rhs) const { 53 | for (std::size_t i = 0; i < lhs.length(); ++i) { 54 | if (i == rhs.length()) { 55 | return true; 56 | } 57 | if (lhs[i] != rhs[i]) { 58 | return (UInt8)lhs[i] > (UInt8)rhs[i]; 59 | } 60 | } 61 | return lhs.length() > rhs.length(); 62 | } 63 | }; 64 | 65 | class IDComparer { 66 | public: 67 | bool operator()(const Entry &lhs, const Entry &rhs) const { 68 | return lhs.id_ < rhs.id_; 69 | } 70 | }; 71 | 72 | private: 73 | const char *ptr_; 74 | UInt32 length_; 75 | UInt32 id_; 76 | }; 77 | 78 | } // namespace trie 79 | } // namespace grimoire 80 | } // namespace marisa 81 | 82 | #endif // MARISA_GRIMOIRE_TRIE_ENTRY_H_ 83 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/trie/header.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_HEADER_H_ 2 | #define MARISA_GRIMOIRE_TRIE_HEADER_H_ 3 | 4 | #include "../io.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Header { 11 | public: 12 | enum { 13 | HEADER_SIZE = 16 14 | }; 15 | 16 | Header() {} 17 | 18 | void map(Mapper &mapper) { 19 | const char *ptr; 20 | mapper.map(&ptr, HEADER_SIZE); 21 | MARISA_THROW_IF(!test_header(ptr), MARISA_FORMAT_ERROR); 22 | } 23 | void read(Reader &reader) { 24 | char buf[HEADER_SIZE]; 25 | reader.read(buf, HEADER_SIZE); 26 | MARISA_THROW_IF(!test_header(buf), MARISA_FORMAT_ERROR); 27 | } 28 | void write(Writer &writer) const { 29 | writer.write(get_header(), HEADER_SIZE); 30 | } 31 | 32 | std::size_t io_size() const { 33 | return HEADER_SIZE; 34 | } 35 | 36 | private: 37 | 38 | static const char *get_header() { 39 | static const char buf[HEADER_SIZE] = "We love Marisa."; 40 | return buf; 41 | } 42 | 43 | static bool test_header(const char *ptr) { 44 | for (std::size_t i = 0; i < HEADER_SIZE; ++i) { 45 | if (ptr[i] != get_header()[i]) { 46 | return false; 47 | } 48 | } 49 | return true; 50 | } 51 | 52 | // Disallows copy and assignment. 53 | Header(const Header &); 54 | Header &operator=(const Header &); 55 | }; 56 | 57 | } // namespace trie 58 | } // namespace marisa 59 | } // namespace grimoire 60 | 61 | #endif // MARISA_GRIMOIRE_TRIE_HEADER_H_ 62 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/trie/history.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_STATE_HISTORY_H_ 2 | #define MARISA_GRIMOIRE_TRIE_STATE_HISTORY_H_ 3 | 4 | #include "../../base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class History { 11 | public: 12 | History() 13 | : node_id_(0), louds_pos_(0), key_pos_(0), 14 | link_id_(MARISA_INVALID_LINK_ID), key_id_(MARISA_INVALID_KEY_ID) {} 15 | 16 | void set_node_id(std::size_t node_id) { 17 | MARISA_DEBUG_IF(node_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 18 | node_id_ = (UInt32)node_id; 19 | } 20 | void set_louds_pos(std::size_t louds_pos) { 21 | MARISA_DEBUG_IF(louds_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 22 | louds_pos_ = (UInt32)louds_pos; 23 | } 24 | void set_key_pos(std::size_t key_pos) { 25 | MARISA_DEBUG_IF(key_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 26 | key_pos_ = (UInt32)key_pos; 27 | } 28 | void set_link_id(std::size_t link_id) { 29 | MARISA_DEBUG_IF(link_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 30 | link_id_ = (UInt32)link_id; 31 | } 32 | void set_key_id(std::size_t key_id) { 33 | MARISA_DEBUG_IF(key_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 34 | key_id_ = (UInt32)key_id; 35 | } 36 | 37 | std::size_t node_id() const { 38 | return node_id_; 39 | } 40 | std::size_t louds_pos() const { 41 | return louds_pos_; 42 | } 43 | std::size_t key_pos() const { 44 | return key_pos_; 45 | } 46 | std::size_t link_id() const { 47 | return link_id_; 48 | } 49 | std::size_t key_id() const { 50 | return key_id_; 51 | } 52 | 53 | private: 54 | UInt32 node_id_; 55 | UInt32 louds_pos_; 56 | UInt32 key_pos_; 57 | UInt32 link_id_; 58 | UInt32 key_id_; 59 | }; 60 | 61 | } // namespace trie 62 | } // namespace grimoire 63 | } // namespace marisa 64 | 65 | #endif // MARISA_GRIMOIRE_TRIE_STATE_HISTORY_H_ 66 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/trie/range.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_RANGE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_RANGE_H_ 3 | 4 | #include "../../base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace trie { 9 | 10 | class Range { 11 | public: 12 | Range() : begin_(0), end_(0), key_pos_(0) {} 13 | 14 | void set_begin(std::size_t begin) { 15 | MARISA_DEBUG_IF(begin > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 16 | begin_ = begin; 17 | } 18 | void set_end(std::size_t end) { 19 | MARISA_DEBUG_IF(end > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 20 | end_ = end; 21 | } 22 | void set_key_pos(std::size_t key_pos) { 23 | MARISA_DEBUG_IF(key_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 24 | key_pos_ = key_pos; 25 | } 26 | 27 | std::size_t begin() const { 28 | return begin_; 29 | } 30 | std::size_t end() const { 31 | return end_; 32 | } 33 | std::size_t key_pos() const { 34 | return key_pos_; 35 | } 36 | 37 | private: 38 | UInt32 begin_; 39 | UInt32 end_; 40 | UInt32 key_pos_; 41 | }; 42 | 43 | inline Range make_range(std::size_t begin, std::size_t end, 44 | std::size_t key_pos) { 45 | Range range; 46 | range.set_begin(begin); 47 | range.set_end(end); 48 | range.set_key_pos(key_pos); 49 | return range; 50 | } 51 | 52 | class WeightedRange { 53 | public: 54 | WeightedRange() : range_(), weight_(0.0F) {} 55 | 56 | void set_range(const Range &range) { 57 | range_ = range; 58 | } 59 | void set_begin(std::size_t begin) { 60 | range_.set_begin(begin); 61 | } 62 | void set_end(std::size_t end) { 63 | range_.set_end(end); 64 | } 65 | void set_key_pos(std::size_t key_pos) { 66 | range_.set_key_pos(key_pos); 67 | } 68 | void set_weight(float weight) { 69 | weight_ = weight; 70 | } 71 | 72 | const Range &range() const { 73 | return range_; 74 | } 75 | std::size_t begin() const { 76 | return range_.begin(); 77 | } 78 | std::size_t end() const { 79 | return range_.end(); 80 | } 81 | std::size_t key_pos() const { 82 | return range_.key_pos(); 83 | } 84 | float weight() const { 85 | return weight_; 86 | } 87 | 88 | private: 89 | Range range_; 90 | float weight_; 91 | }; 92 | 93 | inline bool operator<(const WeightedRange &lhs, const WeightedRange &rhs) { 94 | return lhs.weight() < rhs.weight(); 95 | } 96 | 97 | inline bool operator>(const WeightedRange &lhs, const WeightedRange &rhs) { 98 | return lhs.weight() > rhs.weight(); 99 | } 100 | 101 | inline WeightedRange make_weighted_range(std::size_t begin, std::size_t end, 102 | std::size_t key_pos, float weight) { 103 | WeightedRange range; 104 | range.set_begin(begin); 105 | range.set_end(end); 106 | range.set_key_pos(key_pos); 107 | range.set_weight(weight); 108 | return range; 109 | } 110 | 111 | } // namespace trie 112 | } // namespace grimoire 113 | } // namespace marisa 114 | 115 | #endif // MARISA_GRIMOIRE_TRIE_RANGE_H_ 116 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/trie/state.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_STATE_H_ 2 | #define MARISA_GRIMOIRE_TRIE_STATE_H_ 3 | 4 | #include "../vector.h" 5 | #include "history.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace trie { 10 | 11 | // A search agent has its internal state and the status codes are defined 12 | // below. 13 | typedef enum StatusCode { 14 | MARISA_READY_TO_ALL, 15 | MARISA_READY_TO_COMMON_PREFIX_SEARCH, 16 | MARISA_READY_TO_PREDICTIVE_SEARCH, 17 | MARISA_END_OF_COMMON_PREFIX_SEARCH, 18 | MARISA_END_OF_PREDICTIVE_SEARCH, 19 | } StatusCode; 20 | 21 | class State { 22 | public: 23 | State() 24 | : key_buf_(), history_(), node_id_(0), query_pos_(0), 25 | history_pos_(0), status_code_(MARISA_READY_TO_ALL) {} 26 | 27 | void set_node_id(std::size_t node_id) { 28 | MARISA_DEBUG_IF(node_id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 29 | node_id_ = (UInt32)node_id; 30 | } 31 | void set_query_pos(std::size_t query_pos) { 32 | MARISA_DEBUG_IF(query_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 33 | query_pos_ = (UInt32)query_pos; 34 | } 35 | void set_history_pos(std::size_t history_pos) { 36 | MARISA_DEBUG_IF(history_pos > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 37 | history_pos_ = (UInt32)history_pos; 38 | } 39 | void set_status_code(StatusCode status_code) { 40 | status_code_ = status_code; 41 | } 42 | 43 | std::size_t node_id() const { 44 | return node_id_; 45 | } 46 | std::size_t query_pos() const { 47 | return query_pos_; 48 | } 49 | std::size_t history_pos() const { 50 | return history_pos_; 51 | } 52 | StatusCode status_code() const { 53 | return status_code_; 54 | } 55 | 56 | const Vector &key_buf() const { 57 | return key_buf_; 58 | } 59 | const Vector &history() const { 60 | return history_; 61 | } 62 | 63 | Vector &key_buf() { 64 | return key_buf_; 65 | } 66 | Vector &history() { 67 | return history_; 68 | } 69 | 70 | void reset() { 71 | status_code_ = MARISA_READY_TO_ALL; 72 | } 73 | 74 | void lookup_init() { 75 | node_id_ = 0; 76 | query_pos_ = 0; 77 | status_code_ = MARISA_READY_TO_ALL; 78 | } 79 | void reverse_lookup_init() { 80 | key_buf_.resize(0); 81 | key_buf_.reserve(32); 82 | status_code_ = MARISA_READY_TO_ALL; 83 | } 84 | void common_prefix_search_init() { 85 | node_id_ = 0; 86 | query_pos_ = 0; 87 | status_code_ = MARISA_READY_TO_COMMON_PREFIX_SEARCH; 88 | } 89 | void predictive_search_init() { 90 | key_buf_.resize(0); 91 | key_buf_.reserve(64); 92 | history_.resize(0); 93 | history_.reserve(4); 94 | node_id_ = 0; 95 | query_pos_ = 0; 96 | history_pos_ = 0; 97 | status_code_ = MARISA_READY_TO_PREDICTIVE_SEARCH; 98 | } 99 | 100 | private: 101 | Vector key_buf_; 102 | Vector history_; 103 | UInt32 node_id_; 104 | UInt32 query_pos_; 105 | UInt32 history_pos_; 106 | StatusCode status_code_; 107 | 108 | // Disallows copy and assignment. 109 | State(const State &); 110 | State &operator=(const State &); 111 | }; 112 | 113 | } // namespace trie 114 | } // namespace grimoire 115 | } // namespace marisa 116 | 117 | #endif // MARISA_GRIMOIRE_TRIE_STATE_H_ 118 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/trie/tail.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_TRIE_TAIL_H_ 2 | #define MARISA_GRIMOIRE_TRIE_TAIL_H_ 3 | 4 | #include "../../agent.h" 5 | #include "../vector.h" 6 | #include "entry.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | namespace trie { 11 | 12 | class Tail { 13 | public: 14 | Tail(); 15 | 16 | void build(Vector &entries, Vector *offsets, 17 | TailMode mode); 18 | 19 | void map(Mapper &mapper); 20 | void read(Reader &reader); 21 | void write(Writer &writer) const; 22 | 23 | void restore(Agent &agent, std::size_t offset) const; 24 | bool match(Agent &agent, std::size_t offset) const; 25 | bool prefix_match(Agent &agent, std::size_t offset) const; 26 | 27 | const char &operator[](std::size_t offset) const { 28 | MARISA_DEBUG_IF(offset >= buf_.size(), MARISA_BOUND_ERROR); 29 | return buf_[offset]; 30 | } 31 | 32 | TailMode mode() const { 33 | return end_flags_.empty() ? MARISA_TEXT_TAIL : MARISA_BINARY_TAIL; 34 | } 35 | 36 | bool empty() const { 37 | return buf_.empty(); 38 | } 39 | std::size_t size() const { 40 | return buf_.size(); 41 | } 42 | std::size_t total_size() const { 43 | return buf_.total_size() + end_flags_.total_size(); 44 | } 45 | std::size_t io_size() const { 46 | return buf_.io_size() + end_flags_.io_size(); 47 | } 48 | 49 | void clear(); 50 | void swap(Tail &rhs); 51 | 52 | private: 53 | Vector buf_; 54 | BitVector end_flags_; 55 | 56 | void build_(Vector &entries, Vector *offsets, 57 | TailMode mode); 58 | 59 | void map_(Mapper &mapper); 60 | void read_(Reader &reader); 61 | void write_(Writer &writer) const; 62 | 63 | // Disallows copy and assignment. 64 | Tail(const Tail &); 65 | Tail &operator=(const Tail &); 66 | }; 67 | 68 | } // namespace trie 69 | } // namespace grimoire 70 | } // namespace marisa 71 | 72 | #endif // MARISA_GRIMOIRE_TRIE_TAIL_H_ 73 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/vector.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_H_ 3 | 4 | #include "vector/vector.h" 5 | #include "vector/flat-vector.h" 6 | #include "vector/bit-vector.h" 7 | 8 | namespace marisa { 9 | namespace grimoire { 10 | 11 | using vector::Vector; 12 | typedef vector::FlatVector FlatVector; 13 | typedef vector::BitVector BitVector; 14 | 15 | } // namespace grimoire 16 | } // namespace marisa 17 | 18 | #endif // MARISA_GRIMOIRE_VECTOR_H_ 19 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/vector/pop-count.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_ 3 | 4 | #include "../intrin.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace vector { 9 | 10 | #if MARISA_WORD_SIZE == 64 11 | 12 | class PopCount { 13 | public: 14 | explicit PopCount(UInt64 x) : value_() { 15 | x = (x & 0x5555555555555555ULL) + ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1); 16 | x = (x & 0x3333333333333333ULL) + ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2); 17 | x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4); 18 | x *= 0x0101010101010101ULL; 19 | value_ = x; 20 | } 21 | 22 | std::size_t lo8() const { 23 | return (std::size_t)(value_ & 0xFFU); 24 | } 25 | std::size_t lo16() const { 26 | return (std::size_t)((value_ >> 8) & 0xFFU); 27 | } 28 | std::size_t lo24() const { 29 | return (std::size_t)((value_ >> 16) & 0xFFU); 30 | } 31 | std::size_t lo32() const { 32 | return (std::size_t)((value_ >> 24) & 0xFFU); 33 | } 34 | std::size_t lo40() const { 35 | return (std::size_t)((value_ >> 32) & 0xFFU); 36 | } 37 | std::size_t lo48() const { 38 | return (std::size_t)((value_ >> 40) & 0xFFU); 39 | } 40 | std::size_t lo56() const { 41 | return (std::size_t)((value_ >> 48) & 0xFFU); 42 | } 43 | std::size_t lo64() const { 44 | return (std::size_t)((value_ >> 56) & 0xFFU); 45 | } 46 | 47 | static std::size_t count(UInt64 x) { 48 | #if defined(MARISA_X64) && defined(MARISA_USE_POPCNT) 49 | #ifdef _MSC_VER 50 | return __popcnt64(x); 51 | #else // _MSC_VER 52 | return _mm_popcnt_u64(x); 53 | #endif // _MSC_VER 54 | #else // defined(MARISA_X64) && defined(MARISA_USE_POPCNT) 55 | return PopCount(x).lo64(); 56 | #endif // defined(MARISA_X64) && defined(MARISA_USE_POPCNT) 57 | } 58 | 59 | private: 60 | UInt64 value_; 61 | }; 62 | 63 | #else // MARISA_WORD_SIZE == 64 64 | 65 | class PopCount { 66 | public: 67 | explicit PopCount(UInt32 x) : value_() { 68 | x = (x & 0x55555555U) + ((x & 0xAAAAAAAAU) >> 1); 69 | x = (x & 0x33333333U) + ((x & 0xCCCCCCCCU) >> 2); 70 | x = (x & 0x0F0F0F0FU) + ((x & 0xF0F0F0F0U) >> 4); 71 | x *= 0x01010101U; 72 | value_ = x; 73 | } 74 | 75 | std::size_t lo8() const { 76 | return value_ & 0xFFU; 77 | } 78 | std::size_t lo16() const { 79 | return (value_ >> 8) & 0xFFU; 80 | } 81 | std::size_t lo24() const { 82 | return (value_ >> 16) & 0xFFU; 83 | } 84 | std::size_t lo32() const { 85 | return (value_ >> 24) & 0xFFU; 86 | } 87 | 88 | static std::size_t count(UInt32 x) { 89 | #ifdef MARISA_USE_POPCNT 90 | #ifdef _MSC_VER 91 | return __popcnt(x); 92 | #else // _MSC_VER 93 | return _mm_popcnt_u32(x); 94 | #endif // _MSC_VER 95 | #else // MARISA_USE_POPCNT 96 | return PopCount(x).lo32(); 97 | #endif // MARISA_USE_POPCNT 98 | } 99 | 100 | private: 101 | UInt32 value_; 102 | }; 103 | 104 | #endif // MARISA_WORD_SIZE == 64 105 | 106 | } // namespace vector 107 | } // namespace grimoire 108 | } // namespace marisa 109 | 110 | #endif // MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_ 111 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/grimoire/vector/rank-index.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_ 2 | #define MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_ 3 | 4 | #include "../../base.h" 5 | 6 | namespace marisa { 7 | namespace grimoire { 8 | namespace vector { 9 | 10 | class RankIndex { 11 | public: 12 | RankIndex() : abs_(0), rel_lo_(0), rel_hi_(0) {} 13 | 14 | void set_abs(std::size_t value) { 15 | MARISA_DEBUG_IF(value > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 16 | abs_ = (UInt32)value; 17 | } 18 | void set_rel1(std::size_t value) { 19 | MARISA_DEBUG_IF(value > 64, MARISA_RANGE_ERROR); 20 | rel_lo_ = (UInt32)((rel_lo_ & ~0x7FU) | (value & 0x7FU)); 21 | } 22 | void set_rel2(std::size_t value) { 23 | MARISA_DEBUG_IF(value > 128, MARISA_RANGE_ERROR); 24 | rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 7)) | ((value & 0xFFU) << 7)); 25 | } 26 | void set_rel3(std::size_t value) { 27 | MARISA_DEBUG_IF(value > 192, MARISA_RANGE_ERROR); 28 | rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 15)) | ((value & 0xFFU) << 15)); 29 | } 30 | void set_rel4(std::size_t value) { 31 | MARISA_DEBUG_IF(value > 256, MARISA_RANGE_ERROR); 32 | rel_lo_ = (UInt32)((rel_lo_ & ~(0x1FFU << 23)) | ((value & 0x1FFU) << 23)); 33 | } 34 | void set_rel5(std::size_t value) { 35 | MARISA_DEBUG_IF(value > 320, MARISA_RANGE_ERROR); 36 | rel_hi_ = (UInt32)((rel_hi_ & ~0x1FFU) | (value & 0x1FFU)); 37 | } 38 | void set_rel6(std::size_t value) { 39 | MARISA_DEBUG_IF(value > 384, MARISA_RANGE_ERROR); 40 | rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 9)) | ((value & 0x1FFU) << 9)); 41 | } 42 | void set_rel7(std::size_t value) { 43 | MARISA_DEBUG_IF(value > 448, MARISA_RANGE_ERROR); 44 | rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 18)) | ((value & 0x1FFU) << 18)); 45 | } 46 | 47 | std::size_t abs() const { 48 | return abs_; 49 | } 50 | std::size_t rel1() const { 51 | return rel_lo_ & 0x7FU; 52 | } 53 | std::size_t rel2() const { 54 | return (rel_lo_ >> 7) & 0xFFU; 55 | } 56 | std::size_t rel3() const { 57 | return (rel_lo_ >> 15) & 0xFFU; 58 | } 59 | std::size_t rel4() const { 60 | return (rel_lo_ >> 23) & 0x1FFU; 61 | } 62 | std::size_t rel5() const { 63 | return rel_hi_ & 0x1FFU; 64 | } 65 | std::size_t rel6() const { 66 | return (rel_hi_ >> 9) & 0x1FFU; 67 | } 68 | std::size_t rel7() const { 69 | return (rel_hi_ >> 18) & 0x1FFU; 70 | } 71 | 72 | private: 73 | UInt32 abs_; 74 | UInt32 rel_lo_; 75 | UInt32 rel_hi_; 76 | }; 77 | 78 | } // namespace vector 79 | } // namespace grimoire 80 | } // namespace marisa 81 | 82 | #endif // MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_ 83 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/iostream.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_IOSTREAM_H_ 2 | #define MARISA_IOSTREAM_H_ 3 | 4 | #include 5 | 6 | namespace marisa { 7 | 8 | class Trie; 9 | 10 | std::istream &read(std::istream &stream, Trie *trie); 11 | std::ostream &write(std::ostream &stream, const Trie &trie); 12 | 13 | std::istream &operator>>(std::istream &stream, Trie &trie); 14 | std::ostream &operator<<(std::ostream &stream, const Trie &trie); 15 | 16 | } // namespace marisa 17 | 18 | #endif // MARISA_IOSTREAM_H_ 19 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/key.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_KEY_H_ 2 | #define MARISA_KEY_H_ 3 | 4 | #include "base.h" 5 | 6 | namespace marisa { 7 | 8 | class Key { 9 | public: 10 | Key() : ptr_(NULL), length_(0), union_() { 11 | union_.id = 0; 12 | } 13 | Key(const Key &key) 14 | : ptr_(key.ptr_), length_(key.length_), union_(key.union_) {} 15 | 16 | Key &operator=(const Key &key) { 17 | ptr_ = key.ptr_; 18 | length_ = key.length_; 19 | union_ = key.union_; 20 | return *this; 21 | } 22 | 23 | char operator[](std::size_t i) const { 24 | MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR); 25 | return ptr_[i]; 26 | } 27 | 28 | void set_str(const char *str) { 29 | MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR); 30 | std::size_t length = 0; 31 | while (str[length] != '\0') { 32 | ++length; 33 | } 34 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 35 | ptr_ = str; 36 | length_ = (UInt32)length; 37 | } 38 | void set_str(const char *ptr, std::size_t length) { 39 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 40 | MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 41 | ptr_ = ptr; 42 | length_ = (UInt32)length; 43 | } 44 | void set_id(std::size_t id) { 45 | MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); 46 | union_.id = (UInt32)id; 47 | } 48 | void set_weight(float weight) { 49 | union_.weight = weight; 50 | } 51 | 52 | const char *ptr() const { 53 | return ptr_; 54 | } 55 | std::size_t length() const { 56 | return length_; 57 | } 58 | std::size_t id() const { 59 | return union_.id; 60 | } 61 | float weight() const { 62 | return union_.weight; 63 | } 64 | 65 | void clear() { 66 | Key().swap(*this); 67 | } 68 | void swap(Key &rhs) { 69 | marisa::swap(ptr_, rhs.ptr_); 70 | marisa::swap(length_, rhs.length_); 71 | marisa::swap(union_.id, rhs.union_.id); 72 | } 73 | 74 | private: 75 | const char *ptr_; 76 | UInt32 length_; 77 | union Union { 78 | UInt32 id; 79 | float weight; 80 | } union_; 81 | }; 82 | 83 | } // namespace marisa 84 | 85 | #endif // MARISA_KEY_H_ 86 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/keyset.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_KEYSET_H_ 2 | #define MARISA_KEYSET_H_ 3 | 4 | #include "key.h" 5 | 6 | namespace marisa { 7 | 8 | class Keyset { 9 | public: 10 | enum { 11 | BASE_BLOCK_SIZE = 4096, 12 | EXTRA_BLOCK_SIZE = 1024, 13 | KEY_BLOCK_SIZE = 256 14 | }; 15 | 16 | Keyset(); 17 | 18 | void push_back(const Key &key); 19 | void push_back(const Key &key, char end_marker); 20 | 21 | void push_back(const char *str); 22 | void push_back(const char *ptr, std::size_t length, float weight = 1.0); 23 | 24 | const Key &operator[](std::size_t i) const { 25 | MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR); 26 | return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE]; 27 | } 28 | Key &operator[](std::size_t i) { 29 | MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR); 30 | return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE]; 31 | } 32 | 33 | std::size_t num_keys() const { 34 | return size_; 35 | } 36 | 37 | bool empty() const { 38 | return size_ == 0; 39 | } 40 | std::size_t size() const { 41 | return size_; 42 | } 43 | std::size_t total_length() const { 44 | return total_length_; 45 | } 46 | 47 | void reset(); 48 | 49 | void clear(); 50 | void swap(Keyset &rhs); 51 | 52 | private: 53 | scoped_array > base_blocks_; 54 | std::size_t base_blocks_size_; 55 | std::size_t base_blocks_capacity_; 56 | scoped_array > extra_blocks_; 57 | std::size_t extra_blocks_size_; 58 | std::size_t extra_blocks_capacity_; 59 | scoped_array > key_blocks_; 60 | std::size_t key_blocks_size_; 61 | std::size_t key_blocks_capacity_; 62 | char *ptr_; 63 | std::size_t avail_; 64 | std::size_t size_; 65 | std::size_t total_length_; 66 | 67 | char *reserve(std::size_t size); 68 | 69 | void append_base_block(); 70 | void append_extra_block(std::size_t size); 71 | void append_key_block(); 72 | 73 | // Disallows copy and assignment. 74 | Keyset(const Keyset &); 75 | Keyset &operator=(const Keyset &); 76 | }; 77 | 78 | } // namespace marisa 79 | 80 | #endif // MARISA_KEYSET_H_ 81 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/mstdio.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_STDIO_H_ 2 | #define MARISA_STDIO_H_ 3 | 4 | #include 5 | 6 | namespace marisa { 7 | 8 | class Trie; 9 | 10 | void fread(std::FILE *file, Trie *trie); 11 | void fwrite(std::FILE *file, const Trie &trie); 12 | 13 | } // namespace marisa 14 | 15 | #endif // MARISA_STDIO_H_ 16 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/query.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_QUERY_H_ 2 | #define MARISA_QUERY_H_ 3 | 4 | #include "base.h" 5 | 6 | namespace marisa { 7 | 8 | class Query { 9 | public: 10 | Query() : ptr_(NULL), length_(0), id_(0) {} 11 | Query(const Query &query) 12 | : ptr_(query.ptr_), length_(query.length_), id_(query.id_) {} 13 | 14 | Query &operator=(const Query &query) { 15 | ptr_ = query.ptr_; 16 | length_ = query.length_; 17 | id_ = query.id_; 18 | return *this; 19 | } 20 | 21 | char operator[](std::size_t i) const { 22 | MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR); 23 | return ptr_[i]; 24 | } 25 | 26 | void set_str(const char *str) { 27 | MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR); 28 | std::size_t length = 0; 29 | while (str[length] != '\0') { 30 | ++length; 31 | } 32 | ptr_ = str; 33 | length_ = length; 34 | } 35 | void set_str(const char *ptr, std::size_t length) { 36 | MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR); 37 | ptr_ = ptr; 38 | length_ = length; 39 | } 40 | void set_id(std::size_t id) { 41 | id_ = id; 42 | } 43 | 44 | const char *ptr() const { 45 | return ptr_; 46 | } 47 | std::size_t length() const { 48 | return length_; 49 | } 50 | std::size_t id() const { 51 | return id_; 52 | } 53 | 54 | void clear() { 55 | Query().swap(*this); 56 | } 57 | void swap(Query &rhs) { 58 | marisa::swap(ptr_, rhs.ptr_); 59 | marisa::swap(length_, rhs.length_); 60 | marisa::swap(id_, rhs.id_); 61 | } 62 | 63 | private: 64 | const char *ptr_; 65 | std::size_t length_; 66 | std::size_t id_; 67 | }; 68 | 69 | } // namespace marisa 70 | 71 | #endif // MARISA_QUERY_H_ 72 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/scoped-array.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SCOPED_ARRAY_H_ 2 | #define MARISA_SCOPED_ARRAY_H_ 3 | 4 | #include "base.h" 5 | 6 | namespace marisa { 7 | 8 | template 9 | class scoped_array { 10 | public: 11 | scoped_array() : array_(NULL) {} 12 | explicit scoped_array(T *array) : array_(array) {} 13 | 14 | ~scoped_array() { 15 | delete [] array_; 16 | } 17 | 18 | void reset(T *array = NULL) { 19 | MARISA_THROW_IF((array != NULL) && (array == array_), MARISA_RESET_ERROR); 20 | scoped_array(array).swap(*this); 21 | } 22 | 23 | T &operator[](std::size_t i) const { 24 | MARISA_DEBUG_IF(array_ == NULL, MARISA_STATE_ERROR); 25 | return array_[i]; 26 | } 27 | T *get() const { 28 | return array_; 29 | } 30 | 31 | void clear() { 32 | scoped_array().swap(*this); 33 | } 34 | void swap(scoped_array &rhs) { 35 | marisa::swap(array_, rhs.array_); 36 | } 37 | 38 | private: 39 | T *array_; 40 | 41 | // Disallows copy and assignment. 42 | scoped_array(const scoped_array &); 43 | scoped_array &operator=(const scoped_array &); 44 | }; 45 | 46 | } // namespace marisa 47 | 48 | #endif // MARISA_SCOPED_ARRAY_H_ 49 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/scoped-ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_SCOPED_PTR_H_ 2 | #define MARISA_SCOPED_PTR_H_ 3 | 4 | #include "base.h" 5 | 6 | namespace marisa { 7 | 8 | template 9 | class scoped_ptr { 10 | public: 11 | scoped_ptr() : ptr_(NULL) {} 12 | explicit scoped_ptr(T *ptr) : ptr_(ptr) {} 13 | 14 | ~scoped_ptr() { 15 | delete ptr_; 16 | } 17 | 18 | void reset(T *ptr = NULL) { 19 | MARISA_THROW_IF((ptr != NULL) && (ptr == ptr_), MARISA_RESET_ERROR); 20 | scoped_ptr(ptr).swap(*this); 21 | } 22 | 23 | T &operator*() const { 24 | MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR); 25 | return *ptr_; 26 | } 27 | T *operator->() const { 28 | MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR); 29 | return ptr_; 30 | } 31 | T *get() const { 32 | return ptr_; 33 | } 34 | 35 | void clear() { 36 | scoped_ptr().swap(*this); 37 | } 38 | void swap(scoped_ptr &rhs) { 39 | marisa::swap(ptr_, rhs.ptr_); 40 | } 41 | 42 | private: 43 | T *ptr_; 44 | 45 | // Disallows copy and assignment. 46 | scoped_ptr(const scoped_ptr &); 47 | scoped_ptr &operator=(const scoped_ptr &); 48 | }; 49 | 50 | } // namespace marisa 51 | 52 | #endif // MARISA_SCOPED_PTR_H_ 53 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/marisa/trie.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_TRIE_H_ 2 | #define MARISA_TRIE_H_ 3 | 4 | #include "keyset.h" 5 | #include "agent.h" 6 | 7 | namespace marisa { 8 | namespace grimoire { 9 | namespace trie { 10 | 11 | class LoudsTrie; 12 | 13 | } // namespace trie 14 | } // namespace grimoire 15 | 16 | class Trie { 17 | friend class TrieIO; 18 | 19 | public: 20 | Trie(); 21 | ~Trie(); 22 | 23 | void build(Keyset &keyset, int config_flags = 0); 24 | 25 | void mmap(const char *filename); 26 | void map(const void *ptr, std::size_t size); 27 | 28 | void load(const char *filename); 29 | void read(int fd); 30 | 31 | void save(const char *filename) const; 32 | void write(int fd) const; 33 | 34 | bool lookup(Agent &agent) const; 35 | void reverse_lookup(Agent &agent) const; 36 | bool common_prefix_search(Agent &agent) const; 37 | bool predictive_search(Agent &agent) const; 38 | 39 | std::size_t num_tries() const; 40 | std::size_t num_keys() const; 41 | std::size_t num_nodes() const; 42 | 43 | TailMode tail_mode() const; 44 | NodeOrder node_order() const; 45 | 46 | bool empty() const; 47 | std::size_t size() const; 48 | std::size_t total_size() const; 49 | std::size_t io_size() const; 50 | 51 | void clear(); 52 | void swap(Trie &rhs); 53 | 54 | private: 55 | scoped_ptr trie_; 56 | 57 | // Disallows copy and assignment. 58 | Trie(const Trie &); 59 | Trie &operator=(const Trie &); 60 | }; 61 | 62 | } // namespace marisa 63 | 64 | #endif // MARISA_TRIE_H_ 65 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/reader.lo: -------------------------------------------------------------------------------- 1 | # reader.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/reader.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='reader.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/reader.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/reader.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/tail.lo: -------------------------------------------------------------------------------- 1 | # tail.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/tail.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='tail.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/tail.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/tail.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/trie.lo: -------------------------------------------------------------------------------- 1 | # trie.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/trie.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='trie.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/trie.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/trie.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/writer.lo: -------------------------------------------------------------------------------- 1 | # writer.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/writer.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='writer.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/lib/writer.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/lib/writer.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/m4/ltversion.m4: -------------------------------------------------------------------------------- 1 | # ltversion.m4 -- version numbers -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # @configure_input@ 11 | 12 | # serial 3337 ltversion.m4 13 | # This file is part of GNU Libtool 14 | 15 | m4_define([LT_PACKAGE_VERSION], [2.4.2]) 16 | m4_define([LT_PACKAGE_REVISION], [1.3337]) 17 | 18 | AC_DEFUN([LTVERSION_VERSION], 19 | [macro_version='2.4.2' 20 | macro_revision='1.3337' 21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) 22 | _LT_DECL(, macro_revision, 0) 23 | ]) 24 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/marisa.pc: -------------------------------------------------------------------------------- 1 | prefix=/usr/local 2 | exec_prefix=${prefix} 3 | bindir=${exec_prefix}/bin 4 | libdir=${exec_prefix}/lib 5 | includedir=${prefix}/include 6 | 7 | Name: Marisa 8 | Description: Matching Algorithm with Recursively Implemented StorAge 9 | Version: 0.2.4 10 | Cflags: -I${includedir} 11 | Libs: -L${libdir} -lmarisa 12 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/marisa.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | bindir=@bindir@ 4 | libdir=@libdir@ 5 | includedir=@includedir@ 6 | 7 | Name: Marisa 8 | Description: Matching Algorithm with Recursively Implemented StorAge 9 | Version: @VERSION@ 10 | Cflags: -I${includedir} 11 | Libs: -L${libdir} -lmarisa 12 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tests/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CXXFLAGS = -Wall -Weffc++ -Wextra -D_DEBUG -I${top_srcdir}/lib 2 | 3 | TESTS = \ 4 | base-test \ 5 | io-test \ 6 | vector-test \ 7 | trie-test \ 8 | marisa-test 9 | 10 | check_PROGRAMS = $(TESTS) 11 | 12 | noinst_HEADERS = marisa-assert.h 13 | 14 | base_test_SOURCES = base-test.cc 15 | base_test_LDADD = ../lib/libmarisa.la 16 | 17 | io_test_SOURCES = io-test.cc 18 | io_test_LDADD = ../lib/libmarisa.la 19 | 20 | vector_test_SOURCES = vector-test.cc 21 | vector_test_LDADD = ../lib/libmarisa.la 22 | 23 | trie_test_SOURCES = trie-test.cc 24 | trie_test_LDADD = ../lib/libmarisa.la 25 | 26 | marisa_test_SOURCES = marisa-test.cc 27 | marisa_test_LDADD = ../lib/libmarisa.la 28 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tests/marisa-assert.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_ASSERT_H_ 2 | #define MARISA_ASSERT_H_ 3 | 4 | #include 5 | #include 6 | 7 | #define ASSERT(cond) (void)((!!(cond)) || \ 8 | ((std::cout << __LINE__ << ": Assertion `" << #cond << "' failed." \ 9 | << std::endl), std::exit(-1), 0)) 10 | 11 | #define EXCEPT(code, expected_error_code) try { \ 12 | code; \ 13 | std::cout << __LINE__ << ": Exception `" << #code << "' failed." \ 14 | << std::endl; \ 15 | std::exit(-1); \ 16 | } catch (const marisa::Exception &ex) { \ 17 | ASSERT(ex.error_code() == expected_error_code); \ 18 | } 19 | 20 | #define TEST_START() \ 21 | (std::cout << __FILE__ << ":" << __LINE__ << ": " << __FUNCTION__ << "(): ") 22 | 23 | #define TEST_END() \ 24 | (std::cout << "ok" << std::endl) 25 | 26 | #endif // MARISA_ASSERT_H_ 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/cmdopt.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/.libs/cmdopt.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/libcmdopt.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/.libs/libcmdopt.a -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/libcmdopt.la: -------------------------------------------------------------------------------- 1 | # libcmdopt.la - a libtool library file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # The name that we can dlopen(3). 8 | dlname='' 9 | 10 | # Names of this library. 11 | library_names='' 12 | 13 | # The name of the static archive. 14 | old_library='libcmdopt.a' 15 | 16 | # Linker flags that can not go in dependency_libs. 17 | inherited_linker_flags=' ' 18 | 19 | # Libraries that this one depends upon. 20 | dependency_libs='' 21 | 22 | # Names of additional weak libraries provided by this library 23 | weak_library_names='' 24 | 25 | # Version information for libcmdopt. 26 | current= 27 | age= 28 | revision= 29 | 30 | # Is this an already installed library? 31 | installed=no 32 | 33 | # Should we warn about portability when linking against -modules? 34 | shouldnotlink=no 35 | 36 | # Files to dlopen/dlpreopen 37 | dlopen='' 38 | dlpreopen='' 39 | 40 | # Directory that this library needs to be installed in: 41 | libdir='' 42 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/marisa-benchmark: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/.libs/marisa-benchmark -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/marisa-build: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/.libs/marisa-build -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/marisa-common-prefix-search: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/.libs/marisa-common-prefix-search -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/marisa-dump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/.libs/marisa-dump -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/marisa-lookup: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/.libs/marisa-lookup -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/marisa-predictive-search: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/.libs/marisa-predictive-search -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/.libs/marisa-reverse-lookup: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/.libs/marisa-reverse-lookup -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CFLAGS = -Wall -Wextra 2 | AM_CXXFLAGS = -Wall -Weffc++ -Wextra -I${top_srcdir}/lib 3 | #AM_CXXFLAGS = -Wall -Weffc++ -Wextra -D_DEBUG -I${top_srcdir}/lib 4 | 5 | noinst_LTLIBRARIES = libcmdopt.la 6 | 7 | libcmdopt_la_SOURCES = cmdopt.cc 8 | 9 | noinst_HEADERS = cmdopt.h 10 | 11 | bin_PROGRAMS = \ 12 | marisa-build \ 13 | marisa-lookup \ 14 | marisa-reverse-lookup \ 15 | marisa-common-prefix-search \ 16 | marisa-predictive-search \ 17 | marisa-dump \ 18 | marisa-benchmark 19 | 20 | marisa_build_SOURCES = marisa-build.cc 21 | marisa_build_LDADD = ../lib/libmarisa.la libcmdopt.la 22 | 23 | marisa_lookup_SOURCES = marisa-lookup.cc 24 | marisa_lookup_LDADD = ../lib/libmarisa.la libcmdopt.la 25 | 26 | marisa_reverse_lookup_SOURCES = marisa-reverse-lookup.cc 27 | marisa_reverse_lookup_LDADD = ../lib/libmarisa.la libcmdopt.la 28 | 29 | marisa_common_prefix_search_SOURCES = marisa-common-prefix-search.cc 30 | marisa_common_prefix_search_LDADD = ../lib/libmarisa.la libcmdopt.la 31 | 32 | marisa_predictive_search_SOURCES = marisa-predictive-search.cc 33 | marisa_predictive_search_LDADD = ../lib/libmarisa.la libcmdopt.la 34 | 35 | marisa_dump_SOURCES = marisa-dump.cc 36 | marisa_dump_LDADD = ../lib/libmarisa.la libcmdopt.la 37 | 38 | marisa_benchmark_SOURCES = marisa-benchmark.cc 39 | marisa_benchmark_LDADD = ../lib/libmarisa.la libcmdopt.la 40 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/cmdopt.h: -------------------------------------------------------------------------------- 1 | #ifndef MARISA_CMDOPT_H_ 2 | #define MARISA_CMDOPT_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | typedef struct cmdopt_option_ { 9 | // `name' specifies the name of this option. 10 | // An array of options must be terminated with an option whose name == NULL. 11 | const char *name; 12 | 13 | // `has_name' specifies whether an option takes an argument or not. 14 | // 0 specifies that this option does not have any argument. 15 | // 1 specifies that this option has an argument. 16 | // 2 specifies that this option may have an argument. 17 | int has_arg; 18 | 19 | // `flag' specifies an integer variable which is overwritten by cmdopt_next() 20 | // with its return value. 21 | int *flag; 22 | 23 | // `val' specifies a return value of cmdopt_next(). This value is returned 24 | // when cmdopt_next() finds this option. 25 | int val; 26 | } cmdopt_option; 27 | 28 | typedef struct cmdopt_t_ { 29 | // Command line arguments. 30 | int argc; 31 | char **argv; 32 | 33 | // Option settings. 34 | const cmdopt_option *longopts; 35 | const char *optstring; 36 | 37 | int optind; // Index of the next argument. 38 | char *nextchar; // Next character. 39 | char *optarg; // Argument of the last option. 40 | int optopt; // Label of the last option. 41 | char *optlong; // Long option. 42 | int opterr; // Warning level (0: nothing, 1: warning, 2: all). 43 | int longindex; // Index of the last long option. 44 | int optnum; // Number of options. 45 | } cmdopt_t; 46 | 47 | // cmdopt_init() initializes a cmdopt_t for successive cmdopt_next()s. 48 | void cmdopt_init(cmdopt_t *h, int argc, char **argv, 49 | const char *optstring, const cmdopt_option *longopts); 50 | 51 | // cmdopt_get() analyzes command line arguments and gets the next option. 52 | int cmdopt_get(cmdopt_t *h); 53 | 54 | #ifdef __cplusplus 55 | } // extern "C" 56 | #endif 57 | 58 | #endif // MARISA_CMDOPT_H_ 59 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/cmdopt.lo: -------------------------------------------------------------------------------- 1 | # cmdopt.lo - a libtool object file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # Name of the PIC object. 8 | pic_object='.libs/cmdopt.o' 9 | 10 | # Name of the non-PIC object 11 | non_pic_object='cmdopt.o' 12 | 13 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/cmdopt.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/cmdopt.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/libcmdopt.la: -------------------------------------------------------------------------------- 1 | # libcmdopt.la - a libtool library file 2 | # Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 3 | # 4 | # Please DO NOT delete this file! 5 | # It is necessary for linking the library. 6 | 7 | # The name that we can dlopen(3). 8 | dlname='' 9 | 10 | # Names of this library. 11 | library_names='' 12 | 13 | # The name of the static archive. 14 | old_library='libcmdopt.a' 15 | 16 | # Linker flags that can not go in dependency_libs. 17 | inherited_linker_flags=' ' 18 | 19 | # Libraries that this one depends upon. 20 | dependency_libs='' 21 | 22 | # Names of additional weak libraries provided by this library 23 | weak_library_names='' 24 | 25 | # Version information for libcmdopt. 26 | current= 27 | age= 28 | revision= 29 | 30 | # Is this an already installed library? 31 | installed=no 32 | 33 | # Should we warn about portability when linking against -modules? 34 | shouldnotlink=no 35 | 36 | # Files to dlopen/dlpreopen 37 | dlopen='' 38 | dlpreopen='' 39 | 40 | # Directory that this library needs to be installed in: 41 | libdir='' 42 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/marisa-benchmark.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/marisa-benchmark.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/marisa-build.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/marisa-build.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/marisa-common-prefix-search.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/marisa-common-prefix-search.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/marisa-dump.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/marisa-dump.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/marisa-lookup.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "cmdopt.h" 7 | 8 | namespace { 9 | 10 | bool mmap_flag = true; 11 | 12 | void print_help(const char *cmd) { 13 | std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n" 14 | "Options:\n" 15 | " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary" 16 | " (default)\n" 17 | " -r, --read-dictionary read an entire dictionary into memory\n" 18 | " -h, --help print this help\n" 19 | << std::endl; 20 | } 21 | 22 | int lookup(const char * const *args, std::size_t num_args) { 23 | if (num_args == 0) { 24 | std::cerr << "error: dictionary is not specified" << std::endl; 25 | return 10; 26 | } else if (num_args > 1) { 27 | std::cerr << "error: more than one dictionaries are specified" 28 | << std::endl; 29 | return 11; 30 | } 31 | 32 | marisa::Trie trie; 33 | if (mmap_flag) { 34 | try { 35 | trie.mmap(args[0]); 36 | } catch (const marisa::Exception &ex) { 37 | std::cerr << ex.what() << ": failed to mmap a dictionary file: " 38 | << args[0] << std::endl; 39 | return 20; 40 | } 41 | } else { 42 | try { 43 | trie.load(args[0]); 44 | } catch (const marisa::Exception &ex) { 45 | std::cerr << ex.what() << ": failed to load a dictionary file: " 46 | << args[0] << std::endl; 47 | return 21; 48 | } 49 | } 50 | 51 | marisa::Agent agent; 52 | std::string str; 53 | while (std::getline(std::cin, str)) { 54 | try { 55 | agent.set_query(str.c_str(), str.length()); 56 | if (trie.lookup(agent)) { 57 | std::cout << agent.key().id() << '\t' << str << '\n'; 58 | } else { 59 | std::cout << "-1\t" << str << '\n'; 60 | } 61 | } catch (const marisa::Exception &ex) { 62 | std::cerr << ex.what() << ": lookup() failed: " << str << std::endl; 63 | return 30; 64 | } 65 | 66 | if (!std::cout) { 67 | std::cerr << "error: failed to write results to standard output" 68 | << std::endl; 69 | return 30; 70 | } 71 | } 72 | 73 | return 0; 74 | } 75 | 76 | } // namespace 77 | 78 | int main(int argc, char *argv[]) { 79 | std::ios::sync_with_stdio(false); 80 | 81 | ::cmdopt_option long_options[] = { 82 | { "mmap-dictionary", 0, NULL, 'm' }, 83 | { "read-dictionary", 0, NULL, 'r' }, 84 | { "help", 0, NULL, 'h' }, 85 | { NULL, 0, NULL, 0 } 86 | }; 87 | ::cmdopt_t cmdopt; 88 | ::cmdopt_init(&cmdopt, argc, argv, "mrh", long_options); 89 | int label; 90 | while ((label = ::cmdopt_get(&cmdopt)) != -1) { 91 | switch (label) { 92 | case 'm': { 93 | mmap_flag = true; 94 | break; 95 | } 96 | case 'r': { 97 | mmap_flag = false; 98 | break; 99 | } 100 | case 'h': { 101 | print_help(argv[0]); 102 | return 0; 103 | } 104 | default: { 105 | return 1; 106 | } 107 | } 108 | } 109 | return lookup(cmdopt.argv + cmdopt.optind, cmdopt.argc - cmdopt.optind); 110 | } 111 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/marisa-lookup.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/marisa-lookup.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/marisa-predictive-search.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/marisa-predictive-search.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/marisa-reverse-lookup.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "cmdopt.h" 7 | 8 | namespace { 9 | 10 | bool mmap_flag = true; 11 | 12 | void print_help(const char *cmd) { 13 | std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n" 14 | "Options:\n" 15 | " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary" 16 | " (default)\n" 17 | " -r, --read-dictionary read an entire dictionary into memory\n" 18 | " -h, --help print this help\n" 19 | << std::endl; 20 | } 21 | 22 | int reverse_lookup(const char * const *args, std::size_t num_args) { 23 | if (num_args == 0) { 24 | std::cerr << "error: dictionary is not specified" << std::endl; 25 | return 10; 26 | } else if (num_args > 1) { 27 | std::cerr << "error: more than one dictionaries are specified" 28 | << std::endl; 29 | return 11; 30 | } 31 | 32 | marisa::Trie trie; 33 | if (mmap_flag) { 34 | try { 35 | trie.mmap(args[0]); 36 | } catch (const marisa::Exception &ex) { 37 | std::cerr << ex.what() << ": failed to mmap a dictionary file: " 38 | << args[0] << std::endl; 39 | return 20; 40 | } 41 | } else { 42 | try { 43 | trie.load(args[0]); 44 | } catch (const marisa::Exception &ex) { 45 | std::cerr << ex.what() << ": failed to load a dictionary file: " 46 | << args[0] << std::endl; 47 | return 21; 48 | } 49 | } 50 | 51 | marisa::Agent agent; 52 | std::size_t key_id; 53 | while (std::cin >> key_id) { 54 | try { 55 | agent.set_query(key_id); 56 | trie.reverse_lookup(agent); 57 | std::cout << agent.key().id() << '\t'; 58 | std::cout.write(agent.key().ptr(), agent.key().length()) << '\n'; 59 | } catch (const marisa::Exception &ex) { 60 | std::cerr << ex.what() << ": reverse_lookup() failed: " 61 | << key_id << std::endl; 62 | return 30; 63 | } 64 | 65 | if (!std::cout) { 66 | std::cerr << "error: failed to write results to standard output" 67 | << std::endl; 68 | return 30; 69 | } 70 | } 71 | 72 | return 0; 73 | } 74 | 75 | } // namespace 76 | 77 | int main(int argc, char *argv[]) { 78 | std::ios::sync_with_stdio(false); 79 | 80 | ::cmdopt_option long_options[] = { 81 | { "mmap-dictionary", 0, NULL, 'm' }, 82 | { "read-dictionary", 0, NULL, 'r' }, 83 | { "help", 0, NULL, 'h' }, 84 | { NULL, 0, NULL, 0 } 85 | }; 86 | ::cmdopt_t cmdopt; 87 | ::cmdopt_init(&cmdopt, argc, argv, "mrh", long_options); 88 | int label; 89 | while ((label = ::cmdopt_get(&cmdopt)) != -1) { 90 | switch (label) { 91 | case 'm': { 92 | mmap_flag = true; 93 | break; 94 | } 95 | case 'r': { 96 | mmap_flag = false; 97 | break; 98 | } 99 | case 'h': { 100 | print_help(argv[0]); 101 | return 0; 102 | } 103 | default: { 104 | return 1; 105 | } 106 | } 107 | } 108 | return reverse_lookup(cmdopt.argv + cmdopt.optind, 109 | cmdopt.argc - cmdopt.optind); 110 | } 111 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/tools/marisa-reverse-lookup.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aqen/rime-table-decompiler/f3131bfc5d3c5834b9721b44a78e398c7d90dbee/src/thirdparty/marisa-trie/tools/marisa-reverse-lookup.o -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/base-test/base-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/io-test/io-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/marisa-benchmark/marisa-benchmark.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/marisa-build/marisa-build.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/marisa-common-prefix-search/marisa-common-prefix-search.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/marisa-dump/marisa-dump.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/marisa-lookup/marisa-lookup.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/marisa-predictive-search/marisa-predictive-search.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/marisa-reverse-lookup/marisa-reverse-lookup.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/marisa-test/marisa-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/trie-test/trie-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2010/vector-test/vector-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/base-test/base-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/io-test/io-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/marisa-benchmark/marisa-benchmark.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/marisa-build/marisa-build.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/marisa-common-prefix-search/marisa-common-prefix-search.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/marisa-dump/marisa-dump.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/marisa-lookup/marisa-lookup.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/marisa-predictive-search/marisa-predictive-search.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/marisa-reverse-lookup/marisa-reverse-lookup.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/marisa-test/marisa-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/trie-test/trie-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2013/vector-test/vector-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/base-test/base-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/io-test/io-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/marisa-benchmark/marisa-benchmark.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/marisa-build/marisa-build.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/marisa-common-prefix-search/marisa-common-prefix-search.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/marisa-dump/marisa-dump.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/marisa-lookup/marisa-lookup.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/marisa-predictive-search/marisa-predictive-search.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/marisa-reverse-lookup/marisa-reverse-lookup.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/marisa-test/marisa-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/trie-test/trie-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/thirdparty/marisa-trie/vs2015/vector-test/vector-test.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | 23 | 24 | Header Files 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/utf8.h: -------------------------------------------------------------------------------- 1 | // Copyright 2006 Nemanja Trifunovic 2 | 3 | /* 4 | Permission is hereby granted, free of charge, to any person or organization 5 | obtaining a copy of the software and accompanying documentation covered by 6 | this license (the "Software") to use, reproduce, display, distribute, 7 | execute, and transmit the Software, and to prepare derivative works of the 8 | Software, and to permit third-parties to whom the Software is furnished to 9 | do so, all subject to the following: 10 | 11 | The copyright notices in the Software and this entire statement, including 12 | the above license grant, this restriction and the following disclaimer, 13 | must be included in all copies of the Software, in whole or in part, and 14 | all derivative works of the Software, unless such copies or derivative 15 | works are solely in the form of machine-executable object code generated by 16 | a source language processor. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 21 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 22 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 23 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | 28 | #ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 29 | #define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 30 | 31 | #include "utf8/checked.h" 32 | #include "utf8/unchecked.h" 33 | 34 | #endif // header guard 35 | -------------------------------------------------------------------------------- /src/vocabulary.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright RIME Developers 3 | // Distributed under the BSD License 4 | // 5 | // 2011-07-10 GONG Chen 6 | // 7 | 8 | #ifndef RIME_VOCABULARY_H_ 9 | #define RIME_VOCABULARY_H_ 10 | 11 | #include 12 | #include "common.h" 13 | 14 | namespace rime { 15 | 16 | using Syllabary = set; 17 | 18 | using SyllableId = int32_t; 19 | 20 | class Code : public vector { 21 | public: 22 | static const size_t kIndexCodeMaxLength = 3; 23 | 24 | bool operator< (const Code& other) const; 25 | bool operator== (const Code& other) const; 26 | 27 | void CreateIndex(Code* index_code); 28 | 29 | string ToString() const; 30 | }; 31 | 32 | struct DictEntry { 33 | string text; 34 | string comment; 35 | string preedit; 36 | double weight = 0.0; 37 | int commit_count = 0; 38 | Code code; // multi-syllable code from prism 39 | string custom_code; // user defined code 40 | int remaining_code_length = 0; 41 | 42 | DictEntry() = default; 43 | bool operator< (const DictEntry& other) const; 44 | }; 45 | 46 | class DictEntryList : public vector> { 47 | public: 48 | void Sort(); 49 | void SortRange(size_t start, size_t count); 50 | }; 51 | 52 | using DictEntryFilter = function entry)>; 53 | 54 | class DictEntryFilterBinder { 55 | public: 56 | void AddFilter(DictEntryFilter filter); 57 | 58 | protected: 59 | DictEntryFilter filter_; 60 | }; 61 | 62 | class Vocabulary; 63 | 64 | struct VocabularyPage { 65 | DictEntryList entries; 66 | an next_level; 67 | }; 68 | 69 | class Vocabulary : public map { 70 | public: 71 | DictEntryList* LocateEntries(const Code& code); 72 | void SortHomophones(); 73 | }; 74 | 75 | // word -> { code, ... } 76 | using ReverseLookupTable = map>; 77 | 78 | } // namespace rime 79 | 80 | #endif // RIME_VOCABULARY_H_ 81 | --------------------------------------------------------------------------------