├── src ├── CMakeLists.txt └── libime │ ├── CMakeLists.txt │ ├── table │ ├── log.cpp │ ├── constants.h │ ├── LibIMETableConfig.cmake.in │ ├── log.h │ ├── tabledecoder_p.h │ ├── autophrasedict.h │ ├── tableoptions.h │ ├── CMakeLists.txt │ ├── tabledecoder.h │ ├── tablerule.h │ ├── tableoptions.cpp │ ├── tablebaseddictionary_p.h │ ├── tablecontext.h │ ├── autophrasedict.cpp │ ├── tablebaseddictionary.h │ └── tabledecoder.cpp │ ├── core │ ├── utils.cpp │ ├── inputbuffer.cpp │ ├── dictionary.cpp │ ├── utils.h │ ├── constants.h │ ├── lattice_p.h │ ├── LibIMECoreConfig.cmake.in │ ├── dictionary.h │ ├── prediction.h │ ├── userlanguagemodel.h │ ├── lattice.cpp │ ├── inputbuffer.h │ ├── endian_p.h │ ├── historybigram.h │ ├── triedictionary.h │ ├── decoder.h │ ├── triedictionary.cpp │ ├── lrucache.h │ ├── CMakeLists.txt │ ├── prediction.cpp │ ├── languagemodel.h │ ├── segmentgraph.cpp │ └── userlanguagemodel.cpp │ └── pinyin │ ├── LibIMEPinyinConfig.cmake.in │ ├── constants.h │ ├── pinyindecoder_p.h │ ├── pinyindata_p.h │ ├── pinyinmatchstate.h │ ├── shuangpinprofile.h │ ├── pinyinprediction.h │ ├── pinyindecoder.h │ ├── pinyincorrectionprofile.h │ ├── pinyindata.h │ ├── pinyindecoder.cpp │ ├── pinyinmatchstate.cpp │ ├── CMakeLists.txt │ ├── pinyinime.h │ ├── pinyincorrectionprofile.cpp │ ├── pinyindictionary.h │ ├── shuangpindata.h │ ├── pinyinmatchstate_p.h │ └── pinyincontext.h ├── .gitignore ├── .gitmodules ├── Doxyfile.in ├── test ├── testdir.h.in ├── testprediction.cpp ├── testutils.h ├── testmodel.cpp ├── testusermodel.cpp ├── testautophrasedict.cpp ├── CMakeLists.txt ├── testtrie.cpp ├── testpinyinprediction.cpp ├── testinputbuffer.cpp ├── testtablerule.cpp ├── triebench.cpp ├── testpinyinime_unit.cpp ├── testtableime_unit.cpp ├── testtableime.cpp ├── testpinyindictionary.cpp ├── testdecoder.cpp └── testpinyinime.cpp ├── config.h.in ├── README.md ├── .codedocs ├── tools ├── libime_history.cpp ├── CMakeLists.txt ├── libime_pinyindict.cpp └── libime_tabledict.cpp ├── CMakeLists.txt ├── data └── CMakeLists.txt ├── .clang-format └── .github └── workflows └── check.yml /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(libime) 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.kdev4 2 | *~ 3 | build* 4 | test/bench.cpp 5 | test/cedarpp.h 6 | *.tar.* 7 | .* 8 | -------------------------------------------------------------------------------- /src/libime/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(core) 2 | add_subdirectory(pinyin) 3 | add_subdirectory(table) 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "src/libime/kenlm"] 2 | path = src/libime/core/kenlm 3 | url = https://github.com/kpu/kenlm.git 4 | -------------------------------------------------------------------------------- /Doxyfile.in: -------------------------------------------------------------------------------- 1 | OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@/doc/ 2 | PROJECT_NUMBER = @FCITX_VERSION@ 3 | 4 | @FCITX_DOXYGEN_CONFIGURATION@ 5 | -------------------------------------------------------------------------------- /src/libime/table/log.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include "log.h" 7 | 8 | namespace libime { 9 | FCITX_DEFINE_LOG_CATEGORY(libime_table_logcategory, "libime-table"); 10 | } 11 | -------------------------------------------------------------------------------- /src/libime/core/utils.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include "utils.h" 8 | #include 9 | 10 | namespace libime { 11 | 12 | FCITX_DEFINE_LOG_CATEGORY(libime_logcategory, "libime") 13 | } 14 | -------------------------------------------------------------------------------- /test/testdir.h.in: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017~2017 CSSlayer 3 | * wengxt@gmail.com 4 | * 5 | * SPDX-License-Identifier: LGPL-2.1-or-later 6 | */ 7 | #ifndef _TEST_TESTDIR_H_ 8 | #define _TEST_TESTDIR_H_ 9 | 10 | #define LIBIME_SOURCE_DIR "@CMAKE_SOURCE_DIR@" 11 | #define LIBIME_BINARY_DIR "@CMAKE_BINARY_DIR@" 12 | 13 | #endif // _TEST_TESTDIR_H_ 14 | -------------------------------------------------------------------------------- /config.h.in: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _LIBIME_CONFIG_H_IN_ 7 | #define _LIBIME_CONFIG_H_IN_ 8 | 9 | #define LIBIME_INSTALL_PKGDATADIR "@LIBIME_INSTALL_PKGDATADIR@" 10 | #define LIBIME_INSTALL_LIBDATADIR "@LIBIME_INSTALL_LIBDATADIR@" 11 | 12 | #endif // _LIBIME_CONFIG_H_IN_ 13 | -------------------------------------------------------------------------------- /src/libime/core/inputbuffer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include "inputbuffer.h" 7 | #include 8 | #include 9 | #include 10 | 11 | namespace libime { 12 | 13 | std::string_view InputBuffer::at(size_t i) const { return viewAt(i); } 14 | } // namespace libime 15 | -------------------------------------------------------------------------------- /src/libime/table/constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_TABLE_CONSTANTS_H_ 7 | #define _FCITX_LIBIME_TABLE_CONSTANTS_H_ 8 | 9 | namespace libime { 10 | constexpr int TABLE_AUTOPHRASE_SIZE = 256; 11 | constexpr float TABLE_DEFAULT_MIN_DISTANCE = 1.0f; 12 | } // namespace libime 13 | 14 | #endif // _FCITX_LIBIME_TABLE_CONSTANTS_H_ 15 | -------------------------------------------------------------------------------- /src/libime/pinyin/LibIMEPinyinConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include(CMakeFindDependencyMacro) 4 | 5 | find_dependency(Boost 1.61) 6 | find_dependency(LibIMECore) 7 | 8 | include("${CMAKE_CURRENT_LIST_DIR}/LibIMEPinyinTargets.cmake") 9 | 10 | if(NOT TARGET LibIME::pinyindict) 11 | add_executable(LibIME::pinyindict IMPORTED) 12 | set_target_properties(LibIME::pinyindict PROPERTIES 13 | IMPORTED_LOCATION "@CMAKE_INSTALL_FULL_BINDIR@/libime_pinyindict") 14 | endif() 15 | 16 | -------------------------------------------------------------------------------- /src/libime/table/LibIMETableConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include(CMakeFindDependencyMacro) 4 | 5 | find_dependency(Boost 1.61) 6 | find_dependency(LibIMECore) 7 | 8 | include("${CMAKE_CURRENT_LIST_DIR}/LibIMETableTargets.cmake") 9 | 10 | if(NOT TARGET LibIME::tabledict) 11 | add_executable(LibIME::tabledict IMPORTED) 12 | set_target_properties(LibIME::tabledict PROPERTIES 13 | IMPORTED_LOCATION "@CMAKE_INSTALL_FULL_BINDIR@/libime_tabledict") 14 | endif() 15 | 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | libime 2 | ============================================== 3 | 4 | [![Jenkins Build Status](https://img.shields.io/jenkins/s/https/jenkins.fcitx-im.org/job/libime.svg)](https://jenkins.fcitx-im.org/job/libime/) 5 | [![Coverity Scan Status](https://img.shields.io/coverity/scan/12101.svg)](https://scan.coverity.com/projects/fcitx-libime) 6 | [![Documentation](https://codedocs.xyz/fcitx/libime.svg)](https://codedocs.xyz/fcitx/libime/) 7 | 8 | This is a library to support generic input method implementation. 9 | -------------------------------------------------------------------------------- /.codedocs: -------------------------------------------------------------------------------- 1 | INPUT = src/libime 2 | DOXYFILE_ENCODING = UTF-8 3 | PROJECT_NAME = libime 4 | OUTPUT_LANGUAGE = English 5 | SOURCE_BROWSER = YES 6 | FILE_PATTERNS = 7 | QT_AUTOBRIEF = YES 8 | RECURSIVE = YES 9 | STRIP_CODE_COMMENTS = NO 10 | EXCLUDE_PATTERNS = "*/*_p.h" "*/src/libime/kenlm/*" 11 | ENABLE_PREPROCESSING = YES 12 | MACRO_EXPANSION = YES 13 | 14 | STRIP_FROM_PATH = src 15 | STRIP_FROM_INC_PATH = src 16 | -------------------------------------------------------------------------------- /src/libime/pinyin/constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_CONSTANTS_H_ 7 | #define _FCITX_LIBIME_PINYIN_CONSTANTS_H_ 8 | 9 | namespace libime { 10 | constexpr float PINYIN_DISTANCE_PENALTY_FACTOR = 1.8; 11 | constexpr int PINYIN_ADVACNED_TYPO_FUZZY_FACTOR = 5; 12 | constexpr int PINYIN_CORRECTION_FUZZY_FACTOR = 10; 13 | } // namespace libime 14 | 15 | #endif // _FCITX_LIBIME_PINYIN_CONSTANTS_H_ 16 | -------------------------------------------------------------------------------- /src/libime/core/dictionary.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include "dictionary.h" 8 | #include 9 | #include "segmentgraph.h" 10 | 11 | void libime::Dictionary::matchPrefix( 12 | const SegmentGraph &graph, const GraphMatchCallback &callback, 13 | const std::unordered_set &ignore, 14 | void *helper) const { 15 | matchPrefixImpl(graph, callback, ignore, helper); 16 | } 17 | -------------------------------------------------------------------------------- /src/libime/table/log.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_TABLE_LOG_H_ 7 | #define _FCITX_LIBIME_TABLE_LOG_H_ 8 | 9 | #include 10 | 11 | namespace libime { 12 | FCITX_DECLARE_LOG_CATEGORY(libime_table_logcategory); 13 | 14 | #define LIBIME_TABLE_DEBUG() \ 15 | FCITX_LOGC(::libime::libime_table_logcategory, Debug) 16 | } // namespace libime 17 | 18 | #endif // _FCITX_LIBIME_TABLE_LOG_H_ 19 | -------------------------------------------------------------------------------- /src/libime/core/utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2015-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #ifndef LIBIME_UTILS_H 8 | #define LIBIME_UTILS_H 9 | 10 | #include 11 | #include 12 | 13 | namespace libime { 14 | 15 | LIBIMECORE_EXPORT FCITX_DECLARE_LOG_CATEGORY(libime_logcategory); 16 | #define LIBIME_DEBUG() FCITX_LOGC(::libime::libime_logcategory, Debug) 17 | #define LIBIME_ERROR() FCITX_LOGC(::libime::libime_logcategory, Error) 18 | } // namespace libime 19 | 20 | #endif // LIBIME_UTILS_H 21 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyindecoder_p.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINDECODER_P_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINDECODER_P_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace libime { 14 | 15 | class PinyinLatticeNodePrivate : public LatticeNodeData { 16 | public: 17 | PinyinLatticeNodePrivate(std::string_view encodedPinyin, bool isCorrection) 18 | : encodedPinyin_(encodedPinyin), isCorrection_(isCorrection) {} 19 | 20 | std::string encodedPinyin_; 21 | bool isCorrection_ = false; 22 | }; 23 | } // namespace libime 24 | 25 | #endif // _FCITX_LIBIME_PINYIN_PINYINDECODER_P_H_ 26 | -------------------------------------------------------------------------------- /src/libime/core/constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_CONSTANTS_H_ 7 | #define _FCITX_LIBIME_CORE_CONSTANTS_H_ 8 | 9 | namespace libime { 10 | 11 | constexpr float DEFAULT_USER_LANGUAGE_MODEL_UNIGRAM_WEIGHT = 3; 12 | constexpr float DEFAULT_USER_LANGUAGE_MODEL_BIGRAM_WEIGHT = 15; 13 | constexpr float DEFAULT_LANGUAGE_MODEL_UNKNOWN_PROBABILITY_PENALTY = 14 | 1 / 60000000.0F; 15 | // -38... is log10(2^-127) 16 | constexpr float HISTORY_BIGRAM_ALPHA_VALUE = 1.0F; 17 | constexpr float MIN_FLOAT_LOG10 = -38.23080944932561; 18 | constexpr float DEFAULT_USER_LANGUAGE_MODEL_USER_WEIGHT = 0.2F; 19 | } // namespace libime 20 | 21 | #endif // _FCITX_LIBIME_CORE_CONSTANTS_H_ 22 | -------------------------------------------------------------------------------- /src/libime/core/lattice_p.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_LATTICE_P_H_ 7 | #define _FCITX_LIBIME_CORE_LATTICE_P_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace libime { 16 | 17 | using LatticeMap = std::unordered_map>; 19 | 20 | class LatticePrivate { 21 | public: 22 | LatticeMap lattice_; 23 | 24 | std::vector nbests_; 25 | }; 26 | } // namespace libime 27 | 28 | #endif // _FCITX_LIBIME_CORE_LATTICE_P_H_ 29 | -------------------------------------------------------------------------------- /test/testprediction.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include "libime/core/historybigram.h" 11 | #include "libime/core/prediction.h" 12 | #include "libime/core/userlanguagemodel.h" 13 | #include "testdir.h" 14 | 15 | using namespace libime; 16 | 17 | int main() { 18 | UserLanguageModel model(LIBIME_BINARY_DIR "/data/sc.lm"); 19 | Prediction pred; 20 | pred.setUserLanguageModel(&model); 21 | model.history().add({"你", "希望"}); 22 | for (const auto &result : pred.predict(std::vector{"你"})) { 23 | FCITX_LOG(Info) << result; 24 | } 25 | for (const auto &result : pred.predict(std::vector{"你"})) { 26 | FCITX_LOG(Info) << result; 27 | } 28 | 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /test/testutils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _TEST_TESTUTILS_H_ 7 | #define _TEST_TESTUTILS_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | struct ScopedNanoTimer { 15 | std::chrono::high_resolution_clock::time_point t0; 16 | std::function cb; 17 | 18 | ScopedNanoTimer(std::function callback) 19 | : t0(std::chrono::high_resolution_clock::now()), 20 | cb(std::move(callback)) {} 21 | ~ScopedNanoTimer(void) { 22 | auto t1 = std::chrono::high_resolution_clock::now(); 23 | auto nanos = 24 | std::chrono::duration_cast(t1 - t0) 25 | .count(); 26 | 27 | cb(nanos); 28 | } 29 | }; 30 | 31 | #endif // _TEST_TESTUTILS_H_ 32 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyindata_p.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINDATA_P_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINDATA_P_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "libime/core/utils_p.h" 15 | 16 | namespace libime { 17 | 18 | using InnerSegmentMap = 19 | std::unordered_map>, 21 | StringHash, std::equal_to<>>; 22 | 23 | const std::unordered_map>, 25 | StringHash, std::equal_to<>> & 26 | getInnerSegmentV2(); 27 | } // namespace libime 28 | 29 | #endif // _FCITX_LIBIME_PINYIN_PINYINDATA_P_H_ 30 | -------------------------------------------------------------------------------- /test/testmodel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include "libime/core/languagemodel.h" 11 | #include "libime/core/lattice.h" 12 | #include "testdir.h" 13 | 14 | int main() { 15 | using namespace libime; 16 | LanguageModel model(LIBIME_BINARY_DIR "/data/sc.lm"); 17 | State state(model.nullState()); 18 | State out_state = model.nullState(); 19 | std::string word; 20 | float sum = 0.0F; 21 | while (std::cin >> word) { 22 | float s; 23 | WordNode w(word, model.index(word)); 24 | std::cout << w.idx() << " " << (s = model.score(state, w, out_state)) 25 | << '\n'; 26 | std::cout << "Prob" << std::pow(10, s) << '\n'; 27 | state = out_state; 28 | sum += s; 29 | } 30 | std::cout << sum << std::endl; 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /src/libime/table/tabledecoder_p.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_TABLE_TABLEDECODER_P_H_ 7 | #define _FCITX_LIBIME_TABLE_TABLEDECODER_P_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "libime/core/lattice.h" 15 | #include "libime/table/tablebaseddictionary.h" 16 | 17 | namespace libime { 18 | 19 | class TableLatticeNodePrivate : public LatticeNodeData { 20 | public: 21 | TableLatticeNodePrivate(std::string_view code, uint32_t index, 22 | PhraseFlag flag) 23 | : code_(code), codeLength_(fcitx::utf8::length(code)), index_(index), 24 | flag_(flag) {} 25 | 26 | std::string code_; 27 | size_t codeLength_; 28 | uint32_t index_; 29 | PhraseFlag flag_; 30 | }; 31 | } // namespace libime 32 | 33 | #endif // _FCITX_LIBIME_TABLE_TABLEDECODER_P_H_ 34 | -------------------------------------------------------------------------------- /src/libime/core/LibIMECoreConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include(CMakeFindDependencyMacro) 4 | 5 | find_dependency(Boost 1.61) 6 | find_dependency(Fcitx5Utils) 7 | 8 | if(NOT TARGET LibIME::slm_build_binary) 9 | add_executable(LibIME::slm_build_binary IMPORTED) 10 | set_target_properties(LibIME::slm_build_binary PROPERTIES 11 | IMPORTED_LOCATION "@CMAKE_INSTALL_FULL_BINDIR@/libime_slm_build_binary") 12 | endif() 13 | 14 | if(NOT TARGET LibIME::prediction) 15 | add_executable(LibIME::prediction IMPORTED) 16 | set_target_properties(LibIME::prediction PROPERTIES 17 | IMPORTED_LOCATION "@CMAKE_INSTALL_FULL_BINDIR@/libime_prediction") 18 | endif() 19 | 20 | if(NOT TARGET LibIME::history) 21 | add_executable(LibIME::history IMPORTED) 22 | set_target_properties(LibIME::history PROPERTIES 23 | IMPORTED_LOCATION "@CMAKE_INSTALL_FULL_BINDIR@/libime_history") 24 | endif() 25 | 26 | include("${CMAKE_CURRENT_LIST_DIR}/LibIMECoreTargets.cmake") 27 | set(LIBIME_INSTALL_PKGDATADIR "@LIBIME_INSTALL_PKGDATADIR@") 28 | -------------------------------------------------------------------------------- /src/libime/core/dictionary.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_DICTIONARY_H_ 7 | #define _FCITX_LIBIME_CORE_DICTIONARY_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace libime { 17 | 18 | class WordNode; 19 | 20 | // The callback accepts the passed path that matches the word. 21 | using GraphMatchCallback = 22 | std::function)>; 24 | 25 | class LIBIMECORE_EXPORT Dictionary { 26 | public: 27 | void 28 | matchPrefix(const SegmentGraph &graph, const GraphMatchCallback &callback, 29 | const std::unordered_set &ignore = {}, 30 | void *helper = nullptr) const; 31 | 32 | protected: 33 | virtual void 34 | matchPrefixImpl(const SegmentGraph &graph, 35 | const GraphMatchCallback &callback, 36 | const std::unordered_set &ignore, 37 | void *helper) const = 0; 38 | }; 39 | } // namespace libime 40 | 41 | #endif // _FCITX_LIBIME_CORE_DICTIONARY_H_ 42 | -------------------------------------------------------------------------------- /test/testusermodel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "libime/core/historybigram.h" 15 | #include "libime/core/languagemodel.h" 16 | #include "libime/core/lattice.h" 17 | #include "libime/core/userlanguagemodel.h" 18 | #include "testdir.h" 19 | 20 | int main(int argc, char *argv[]) { 21 | using namespace libime; 22 | UserLanguageModel model(LIBIME_BINARY_DIR "/data/sc.lm"); 23 | if (argc >= 2) { 24 | std::fstream fin(argv[1], std::ios::in | std::ios::binary); 25 | model.history().load(fin); 26 | } 27 | State state(model.nullState()); 28 | State out_state = model.nullState(); 29 | std::string word; 30 | float sum = 0.0F; 31 | std::list nodes; 32 | while (std::cin >> word) { 33 | float s; 34 | nodes.emplace_back(word, model.index(word)); 35 | std::cout << nodes.back().idx() << " " 36 | << (s = model.score(state, nodes.back(), out_state)) << '\n'; 37 | std::cout << "Prob" << std::pow(10, s) << '\n'; 38 | state = out_state; 39 | sum += s; 40 | } 41 | std::cout << sum << std::endl; 42 | 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /test/testautophrasedict.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "libime/table/autophrasedict.h" 12 | 13 | using namespace libime; 14 | 15 | void testSearch(const AutoPhraseDict &dict, std::string_view key, 16 | std::unordered_set expect) { 17 | dict.search(key, [&expect](std::string_view entry, int) { 18 | expect.erase(std::string{entry}); 19 | return true; 20 | }); 21 | FCITX_ASSERT(expect.empty()); 22 | } 23 | 24 | int main() { 25 | AutoPhraseDict dict(4); 26 | dict.insert("abc"); 27 | dict.insert("ab"); 28 | dict.insert("abcd"); 29 | dict.insert("bcd"); 30 | 31 | testSearch(dict, "a", {"abc", "ab", "abcd"}); 32 | testSearch(dict, "ab", {"ab", "abc", "abcd"}); 33 | testSearch(dict, "abc", {"abc", "abcd"}); 34 | testSearch(dict, "abcd", {"abcd"}); 35 | 36 | std::stringstream ss; 37 | dict.save(ss); 38 | 39 | AutoPhraseDict dict2(4); 40 | dict2.load(ss); 41 | testSearch(dict2, "a", {"abc", "ab", "abcd"}); 42 | testSearch(dict2, "ab", {"ab", "abc", "abcd"}); 43 | testSearch(dict2, "abc", {"abc", "abcd"}); 44 | testSearch(dict2, "abcd", {"abcd"}); 45 | testSearch(dict2, "", {"bcd", "ab", "abc", "abcd"}); 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyinmatchstate.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINMATCHSTATE_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINMATCHSTATE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace libime { 17 | 18 | class PinyinMatchStatePrivate; 19 | class SegmentGraphNode; 20 | class ShuangpinProfile; 21 | class PinyinContext; 22 | 23 | // Provides caching mechanism used by PinyinContext. 24 | class LIBIMEPINYIN_EXPORT PinyinMatchState { 25 | friend class PinyinMatchContext; 26 | 27 | public: 28 | PinyinMatchState(PinyinContext *context); 29 | ~PinyinMatchState(); 30 | 31 | // Invalidate everything in the state. 32 | void clear(); 33 | 34 | // Invalidate a set of node, usually caused by the change of user input. 35 | void discardNode(const std::unordered_set &node); 36 | 37 | // Invalidate a whole dictionary, usually caused by the change to the 38 | // dictionary. 39 | void discardDictionary(size_t idx); 40 | 41 | PinyinFuzzyFlags fuzzyFlags() const; 42 | std::shared_ptr shuangpinProfile() const; 43 | std::shared_ptr correctionProfile() const; 44 | size_t partialLongWordLimit() const; 45 | 46 | private: 47 | std::unique_ptr d_ptr; 48 | FCITX_DECLARE_PRIVATE(PinyinMatchState); 49 | }; 50 | } // namespace libime 51 | 52 | #endif // _FCITX_LIBIME_PINYIN_PINYINMATCHSTATE_H_ 53 | -------------------------------------------------------------------------------- /src/libime/core/prediction.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_PREDICTION_H_ 7 | #define _FCITX_LIBIME_CORE_PREDICTION_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace libime { 20 | 21 | class PredictionPrivate; 22 | class HistoryBigram; 23 | 24 | class LIBIMECORE_EXPORT Prediction { 25 | public: 26 | Prediction(); 27 | virtual ~Prediction(); 28 | 29 | void setUserLanguageModel(const UserLanguageModel *lm) { 30 | setLanguageModel(lm); 31 | setHistoryBigram(&lm->history()); 32 | } 33 | 34 | void setLanguageModel(const LanguageModel *model); 35 | 36 | const LanguageModel *model() const; 37 | 38 | void setHistoryBigram(const HistoryBigram *bigram); 39 | 40 | const HistoryBigram *historyBigram() const; 41 | 42 | std::vector 43 | predict(const State &state, const std::vector &sentence = {}, 44 | size_t maxSize = 0); 45 | std::vector 46 | predict(const std::vector &sentence = {}, size_t maxSize = 0); 47 | 48 | std::vector> 49 | predictWithScore(const State &state, 50 | const std::vector &sentence, size_t maxSize); 51 | 52 | private: 53 | std::unique_ptr d_ptr; 54 | FCITX_DECLARE_PRIVATE(Prediction); 55 | }; 56 | 57 | } // namespace libime 58 | 59 | #endif // _LIBIM_LIBIME_CORE_PREDICTION_H_ 60 | -------------------------------------------------------------------------------- /src/libime/core/userlanguagemodel.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_USERLANGUAGEMODEL_H_ 7 | #define _FCITX_LIBIME_CORE_USERLANGUAGEMODEL_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace libime { 20 | 21 | class UserLanguageModelPrivate; 22 | class HistoryBigram; 23 | 24 | class LIBIMECORE_EXPORT UserLanguageModel : public LanguageModel { 25 | public: 26 | explicit UserLanguageModel(const char *sysfile); 27 | 28 | UserLanguageModel( 29 | std::shared_ptr file = nullptr); 30 | virtual ~UserLanguageModel(); 31 | 32 | HistoryBigram &history(); 33 | const HistoryBigram &history() const; 34 | void load(std::istream &in); 35 | void save(std::ostream &out); 36 | 37 | void setHistoryWeight(float w); 38 | float historyWeight() const; 39 | 40 | void setUseOnlyUnigram(bool useOnlyUnigram); 41 | bool useOnlyUnigram() const; 42 | 43 | const State &beginState() const override; 44 | const State &nullState() const override; 45 | float score(const State &state, const WordNode &word, 46 | State &out) const override; 47 | bool isUnknown(WordIndex idx, std::string_view view) const override; 48 | 49 | bool containsNonUnigram(const std::vector &words) const; 50 | 51 | private: 52 | std::unique_ptr d_ptr; 53 | FCITX_DECLARE_PRIVATE(UserLanguageModel); 54 | }; 55 | } // namespace libime 56 | 57 | #endif // _FCITX_LIBIME_CORE_USERLANGUAGEMODEL_H_ 58 | -------------------------------------------------------------------------------- /src/libime/core/lattice.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include "lattice.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "lattice_p.h" 15 | #include "segmentgraph.h" 16 | 17 | namespace libime { 18 | 19 | WordNode::WordNode(WordNode &&other) noexcept( 20 | std::is_nothrow_move_constructible::value) = default; 21 | WordNode &WordNode::operator=(WordNode &&other) noexcept( 22 | std::is_nothrow_move_assignable::value) = default; 23 | 24 | Lattice::Lattice() : d_ptr(std::make_unique()) {} 25 | 26 | FCITX_DEFINE_DEFAULT_DTOR_AND_MOVE(Lattice) 27 | 28 | size_t Lattice::sentenceSize() const { 29 | FCITX_D(); 30 | return d->nbests_.size(); 31 | } 32 | 33 | const SentenceResult &Lattice::sentence(size_t idx) const { 34 | FCITX_D(); 35 | return d->nbests_[idx]; 36 | } 37 | 38 | Lattice::NodeRange Lattice::nodes(const SegmentGraphNode *node) const { 39 | FCITX_D(); 40 | auto iter = d->lattice_.find(node); 41 | if (iter == d->lattice_.end()) { 42 | return {}; 43 | } 44 | return {iter->second.begin(), iter->second.end()}; 45 | } 46 | 47 | void Lattice::clear() { 48 | FCITX_D(); 49 | d->lattice_.clear(); 50 | d->nbests_.clear(); 51 | } 52 | 53 | void Lattice::discardNode( 54 | const std::unordered_set &nodes) { 55 | FCITX_D(); 56 | for (const auto *node : nodes) { 57 | d->lattice_.erase(node); 58 | } 59 | for (auto &p : d->lattice_) { 60 | p.second.erase_if([&nodes](const LatticeNode &node) { 61 | return nodes.count(node.from()); 62 | }); 63 | } 64 | } 65 | } // namespace libime 66 | -------------------------------------------------------------------------------- /src/libime/table/autophrasedict.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_TABLE_AUTOPHRASEDICT_H_ 7 | #define _FCITX_LIBIME_TABLE_AUTOPHRASEDICT_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace libime { 20 | 21 | class AutoPhraseDictPrivate; 22 | 23 | /// \brief A simple MRU based dictionary. 24 | class LIBIMETABLE_EXPORT AutoPhraseDict { 25 | public: 26 | AutoPhraseDict(size_t maxItems); 27 | AutoPhraseDict(size_t maxItems, std::istream &in); 28 | FCITX_DECLARE_VIRTUAL_DTOR_COPY_AND_MOVE(AutoPhraseDict) 29 | 30 | /// \brief Insert a word into dictionary and refresh the MRU. 31 | /// 32 | /// Set the value of entry to value if value is positive. Otherwise if the 33 | /// value is 0, the actual value will be increased the value by 1. 34 | void insert(const std::string &entry, uint32_t value = 0); 35 | 36 | /// \brief Check if any word starting with s exists in the dictionary. 37 | bool search( 38 | std::string_view s, 39 | const std::function &callback) const; 40 | 41 | /// \brief Returns 0 if there is no such word. 42 | uint32_t exactSearch(std::string_view s) const; 43 | void erase(std::string_view s); 44 | void clear(); 45 | 46 | void load(std::istream &in); 47 | void save(std::ostream &out); 48 | 49 | bool empty() const; 50 | 51 | private: 52 | std::unique_ptr d_ptr; 53 | FCITX_DECLARE_PRIVATE(AutoPhraseDict); 54 | }; 55 | } // namespace libime 56 | 57 | #endif // _FCITX_LIBIME_TABLE_AUTOPHRASEDICT_H_ 58 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | configure_file(testdir.h.in ${CMAKE_CURRENT_BINARY_DIR}/testdir.h @ONLY) 2 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 3 | 4 | set(LIBIME_SINGLE_FILE_TEST 5 | testtable 6 | testpinyindata 7 | testpinyinencoder 8 | testinputbuffer 9 | testhistorybigram 10 | testshuangpinprofile 11 | testtrie 12 | testautophrasedict 13 | testtablerule 14 | ) 15 | 16 | if (ENABLE_DATA) 17 | list(APPEND LIBIME_SINGLE_FILE_TEST 18 | testpinyinime_unit 19 | testdecoder 20 | testpinyincontext 21 | testpinyindictionary 22 | testprediction 23 | testpinyinprediction 24 | testtableime_unit 25 | ) 26 | endif() 27 | 28 | foreach(TESTCASE ${LIBIME_SINGLE_FILE_TEST}) 29 | add_executable(${TESTCASE} ${TESTCASE}.cpp) 30 | target_link_libraries(${TESTCASE} LibIME::Core LibIME::Pinyin LibIME::Table ) 31 | add_test(NAME ${TESTCASE} 32 | COMMAND ${TESTCASE}) 33 | endforeach() 34 | 35 | if (ENABLE_DATA) 36 | add_dependencies(testpinyinime_unit lm) 37 | add_dependencies(testdecoder dict lm) 38 | add_dependencies(testpinyincontext lm) 39 | add_dependencies(testpinyindictionary dict) 40 | add_dependencies(testprediction lm) 41 | add_dependencies(testpinyinprediction lm) 42 | add_dependencies(testtableime_unit lm) 43 | endif() 44 | 45 | add_executable(triebench triebench.cpp) 46 | target_link_libraries(triebench LibIME::Core) 47 | 48 | add_executable(testmodel testmodel.cpp) 49 | target_link_libraries(testmodel LibIME::Core) 50 | 51 | add_executable(testusermodel testusermodel.cpp) 52 | target_link_libraries(testusermodel LibIME::Core) 53 | 54 | add_executable(testpinyinime testpinyinime.cpp) 55 | target_link_libraries(testpinyinime LibIME::Pinyin) 56 | 57 | add_executable(testtableime testtableime.cpp) 58 | target_link_libraries(testtableime LibIME::Table) 59 | -------------------------------------------------------------------------------- /src/libime/pinyin/shuangpinprofile.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_SHUANGPINPROFILE_H_ 7 | #define _FCITX_LIBIME_PINYIN_SHUANGPINPROFILE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace libime { 20 | 21 | enum class ShuangpinBuiltinProfile { 22 | Ziranma, 23 | MS, 24 | Ziguang, 25 | ABC, 26 | Zhongwenzhixing, 27 | PinyinJiajia, 28 | Xiaohe, 29 | }; 30 | 31 | class ShuangpinProfilePrivate; 32 | 33 | class LIBIMEPINYIN_EXPORT ShuangpinProfile { 34 | public: 35 | using TableType = 36 | std::map>; 37 | using ValidInputSetType = std::set; 38 | explicit ShuangpinProfile(ShuangpinBuiltinProfile profile); 39 | explicit ShuangpinProfile(std::istream &in); 40 | 41 | explicit ShuangpinProfile(ShuangpinBuiltinProfile profile, 42 | const PinyinCorrectionProfile *correctionProfile); 43 | explicit ShuangpinProfile(std::istream &in, 44 | const PinyinCorrectionProfile *correctionProfile); 45 | 46 | FCITX_DECLARE_VIRTUAL_DTOR_COPY_AND_MOVE(ShuangpinProfile) 47 | 48 | const TableType &table() const; 49 | const ValidInputSetType &validInput() const; 50 | const ValidInputSetType &validInitial() const; 51 | 52 | private: 53 | void buildShuangpinTable(); 54 | std::unique_ptr d_ptr; 55 | FCITX_DECLARE_PRIVATE(ShuangpinProfile); 56 | }; 57 | } // namespace libime 58 | 59 | #endif // _FCITX_LIBIME_PINYIN_SHUANGPINPROFILE_H_ 60 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyinprediction.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2023-2023 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PREDICTION_H_ 7 | #define _FCITX_LIBIME_PINYIN_PREDICTION_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace libime { 22 | 23 | class PinyinPredictionPrivate; 24 | 25 | enum class PinyinPredictionSource { Model, Dictionary }; 26 | 27 | /** 28 | * This is a prediction class that allows to predict against both model and 29 | * pinyin dictionary. 30 | */ 31 | class LIBIMEPINYIN_EXPORT PinyinPrediction : public Prediction { 32 | public: 33 | PinyinPrediction(); 34 | virtual ~PinyinPrediction(); 35 | 36 | /** 37 | * Set the pinyin dictionary used for prediction. 38 | */ 39 | void setPinyinDictionary(const PinyinDictionary *dict); 40 | 41 | /** 42 | * Predict from model and pinyin dictionary for the last sentnce being type. 43 | */ 44 | std::vector> 45 | predict(const State &state, const std::vector &sentence, 46 | std::string_view lastEncodedPinyin, size_t maxSize = 0); 47 | 48 | /** 49 | * Same as the Prediction::predict with the same signature. 50 | */ 51 | std::vector 52 | predict(const std::vector &sentence = {}, size_t maxSize = 0); 53 | 54 | private: 55 | std::unique_ptr d_ptr; 56 | FCITX_DECLARE_PRIVATE(PinyinPrediction); 57 | }; 58 | 59 | } // namespace libime 60 | 61 | #endif // _LIBIM_LIBIME_CORE_PREDICTION_H_ 62 | -------------------------------------------------------------------------------- /tools/libime_history.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "libime/core/historybigram.h" 12 | 13 | void usage(const char *argv0) { 14 | std::cout << "Usage: " << argv0 << " [-c] " << std::endl 15 | << "-c: Compile text to binary" << std::endl 16 | << "-h: Show this help" << std::endl; 17 | } 18 | 19 | int main(int argc, char *argv[]) { 20 | bool compile = false; 21 | int c; 22 | while ((c = getopt(argc, argv, "ch")) != -1) { 23 | switch (c) { 24 | case 'c': 25 | compile = true; 26 | break; 27 | case 'h': 28 | usage(argv[0]); 29 | return 0; 30 | default: 31 | usage(argv[0]); 32 | return 1; 33 | } 34 | } 35 | 36 | if (optind + 2 != argc) { 37 | usage(argv[0]); 38 | return 1; 39 | } 40 | using namespace libime; 41 | HistoryBigram history; 42 | 43 | try { 44 | std::ifstream in(argv[optind], std::ios::in | std::ios::binary); 45 | if (compile) { 46 | history.loadText(in); 47 | } else { 48 | history.load(in); 49 | } 50 | 51 | std::ofstream fout; 52 | std::ostream *out; 53 | if (strcmp(argv[optind + 1], "-") == 0) { 54 | out = &std::cout; 55 | } else { 56 | fout.open(argv[optind + 1], std::ios::out | std::ios::binary); 57 | out = &fout; 58 | } 59 | if (compile) { 60 | history.save(*out); 61 | } else { 62 | history.dump(*out); 63 | } 64 | } catch (const std::exception &e) { 65 | std::cerr << e.what() << std::endl; 66 | return 1; 67 | } 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /test/testtrie.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include "libime/core/datrie.h" 10 | 11 | using namespace libime; 12 | 13 | int main() { 14 | { 15 | DATrie trie; 16 | trie.set("aaaa", 1); 17 | trie.set("aaab", 1); 18 | trie.set("aaac", 1); 19 | trie.set("aaad", 1); 20 | trie.set("aab", 1); 21 | FCITX_ASSERT(trie.size() == 5); 22 | trie.erase("aaaa"); 23 | FCITX_ASSERT(trie.size() == 4); 24 | DATrie::position_type pos = 0; 25 | auto result = trie.traverse("aaa", pos); 26 | FCITX_ASSERT(trie.isNoValue(result)); 27 | trie.erase(pos); 28 | FCITX_ASSERT(trie.size() == 4); 29 | } 30 | 31 | { 32 | DATrie trie; 33 | trie.set("aaaa", 1); 34 | trie.set("aaab", 1); 35 | trie.set("aaac", 1); 36 | trie.set("aaad", 1); 37 | trie.set("aab", 1); 38 | FCITX_ASSERT(trie.size() == 5); 39 | trie.erase("aaaa"); 40 | FCITX_ASSERT(trie.size() == 4); 41 | DATrie::position_type pos = 0; 42 | auto result = trie.traverse("aaa", pos); 43 | auto nan1 = DATrie::noValue(); 44 | auto nan2 = DATrie::noPath(); 45 | // NaN != NaN, we must use memcmp to do this. 46 | // NOLINTBEGIN(bugprone-suspicious-memory-comparison) 47 | FCITX_ASSERT(memcmp(&nan1, &result, sizeof(float)) == 0); 48 | FCITX_ASSERT(trie.isNoValue(result)); 49 | result = trie.traverse("aaae", pos); 50 | FCITX_ASSERT(memcmp(&nan2, &result, sizeof(float)) == 0); 51 | // NOLINTEND(bugprone-suspicious-memory-comparison) 52 | FCITX_ASSERT(trie.isNoPath(result)); 53 | trie.erase(pos); 54 | FCITX_ASSERT(trie.size() == 4); 55 | } 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /src/libime/core/inputbuffer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_INPUTBUFFER_H_ 7 | #define _FCITX_LIBIME_CORE_INPUTBUFFER_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace libime { 17 | class InputBufferPrivate; 18 | 19 | class LIBIMECORE_EXPORT InputBuffer : public fcitx::InputBuffer { 20 | public: 21 | class iterator 22 | : public boost::iterator_facade { 25 | public: 26 | iterator() = default; 27 | iterator(const InputBuffer *buffer, size_t idx) 28 | : buffer_(buffer), idx_(idx) {} 29 | 30 | bool equal(iterator const &other) const { 31 | return buffer_ == other.buffer_ && idx_ == other.idx_; 32 | } 33 | 34 | void increment() { idx_++; } 35 | 36 | void decrement() { idx_--; } 37 | 38 | std::string_view dereference() const { return buffer_->at(idx_); } 39 | 40 | private: 41 | const InputBuffer *buffer_ = nullptr; 42 | size_t idx_ = 0; 43 | }; 44 | 45 | using fcitx::InputBuffer::InputBuffer; 46 | 47 | using fcitx::InputBuffer::type; 48 | // add one overload for string_view 49 | bool type(std::string_view s) { return type(s.data(), s.length()); } 50 | std::string_view at(size_t i) const; 51 | 52 | std::string_view operator[](size_t i) const { return at(i); } 53 | 54 | iterator begin() { return {this, 0}; } 55 | 56 | iterator end() { return {this, size()}; } 57 | }; 58 | } // namespace libime 59 | 60 | #endif // _FCITX_LIBIME_CORE_INPUTBUFFER_H_ 61 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyindecoder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINDECODER_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINDECODER_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace libime { 20 | 21 | class PinyinLatticeNodePrivate; 22 | 23 | class LIBIMEPINYIN_EXPORT PinyinLatticeNode : public LatticeNode { 24 | public: 25 | PinyinLatticeNode(std::string_view word, WordIndex idx, 26 | SegmentGraphPath path, const State &state, float cost, 27 | std::unique_ptr data); 28 | virtual ~PinyinLatticeNode(); 29 | 30 | const std::string &encodedPinyin() const; 31 | bool isCorrection() const; 32 | bool anyCorrectionOnPath() const; 33 | 34 | private: 35 | std::unique_ptr d_ptr; 36 | }; 37 | 38 | class LIBIMEPINYIN_EXPORT PinyinDecoder : public Decoder { 39 | public: 40 | PinyinDecoder(const PinyinDictionary *dict, const LanguageModelBase *model) 41 | : Decoder(dict, model) {} 42 | 43 | protected: 44 | LatticeNode *createLatticeNodeImpl(const SegmentGraphBase &graph, 45 | const LanguageModelBase *model, 46 | std::string_view word, WordIndex idx, 47 | SegmentGraphPath path, 48 | const State &state, float cost, 49 | std::unique_ptr data, 50 | bool onlyPath) const override; 51 | }; 52 | } // namespace libime 53 | 54 | #endif // _FCITX_LIBIME_PINYIN_PINYINDECODER_H_ 55 | -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(libime_slm_build_binary ../src/libime/core/kenlm/lm/build_binary_main.cc) 2 | target_link_libraries(libime_slm_build_binary kenlm) 3 | 4 | install(TARGETS libime_slm_build_binary DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT tools) 5 | add_executable(LibIME::slm_build_binary ALIAS libime_slm_build_binary) 6 | 7 | add_executable(libime_prediction libime_prediction.cpp) 8 | target_link_libraries(libime_prediction LibIME::Core) 9 | install(TARGETS libime_prediction DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT tools) 10 | add_executable(LibIME::prediction ALIAS libime_prediction) 11 | 12 | add_executable(libime_pinyindict libime_pinyindict.cpp) 13 | target_link_libraries(libime_pinyindict LibIME::Pinyin) 14 | install(TARGETS libime_pinyindict DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT tools) 15 | add_executable(LibIME::pinyindict ALIAS libime_pinyindict) 16 | 17 | add_executable(libime_history libime_history.cpp) 18 | target_link_libraries(libime_history LibIME::Core) 19 | install(TARGETS libime_history DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT tools) 20 | add_executable(LibIME::history ALIAS libime_history) 21 | 22 | add_executable(libime_tabledict libime_tabledict.cpp) 23 | target_link_libraries(libime_tabledict LibIME::Table) 24 | install(TARGETS libime_tabledict DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT tools) 25 | add_executable(LibIME::tabledict ALIAS libime_tabledict) 26 | 27 | add_executable(libime_migrate_fcitx4_table libime_migrate_fcitx4_table.cpp) 28 | target_link_libraries(libime_migrate_fcitx4_table LibIME::Table) 29 | install(TARGETS libime_migrate_fcitx4_table DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT tools) 30 | add_executable(LibIME::migrate_fcitx4_table ALIAS libime_migrate_fcitx4_table) 31 | 32 | add_executable(libime_migrate_fcitx4_pinyin libime_migrate_fcitx4_pinyin.cpp) 33 | target_link_libraries(libime_migrate_fcitx4_pinyin LibIME::Pinyin) 34 | install(TARGETS libime_migrate_fcitx4_pinyin DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT tools) 35 | add_executable(LibIME::migrate_fcitx4_pinyin ALIAS libime_migrate_fcitx4_pinyin) 36 | -------------------------------------------------------------------------------- /tools/libime_pinyindict.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "libime/core/utils.h" 14 | #include "libime/core/utils_p.h" 15 | #include "libime/pinyin/pinyindictionary.h" 16 | 17 | void usage(const char *argv0) { 18 | std::cout << "Usage: " << argv0 << " [-d] \n" 19 | << "-d: Dump binary to text\n" 20 | << "-v: Show debug message\n" 21 | << "-h: Show this help\n"; 22 | } 23 | 24 | int main(int argc, char *argv[]) { 25 | 26 | bool dump = false; 27 | int c; 28 | while ((c = getopt(argc, argv, "dhv")) != -1) { 29 | switch (c) { 30 | case 'd': 31 | dump = true; 32 | break; 33 | case 'v': 34 | fcitx::Log::setLogRule("libime=5"); 35 | break; 36 | case 'h': 37 | usage(argv[0]); 38 | return 0; 39 | default: 40 | usage(argv[0]); 41 | return 1; 42 | } 43 | } 44 | 45 | if (optind + 2 != argc) { 46 | usage(argv[0]); 47 | return 1; 48 | } 49 | using namespace libime; 50 | PinyinDictionary dict; 51 | 52 | auto t0 = std::chrono::high_resolution_clock::now(); 53 | dict.load(PinyinDictionary::SystemDict, argv[optind], 54 | dump ? PinyinDictFormat::Binary : PinyinDictFormat::Text); 55 | LIBIME_DEBUG() << "Load pinyin dict: " << millisecondsTill(t0); 56 | 57 | std::ofstream fout; 58 | std::ostream *out; 59 | if (std::string_view(argv[optind + 1]) == "-") { 60 | out = &std::cout; 61 | } else { 62 | fout.open(argv[optind + 1], std::ios::out | std::ios::binary); 63 | out = &fout; 64 | } 65 | dict.save(PinyinDictionary::SystemDict, *out, 66 | dump ? PinyinDictFormat::Text : PinyinDictFormat::Binary); 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /src/libime/core/endian_p.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2020~2025 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | * 6 | */ 7 | #ifndef _LIBIME_LIBIME_CORE_ENDIAN_P_H_ 8 | #define _LIBIME_LIBIME_CORE_ENDIAN_P_H_ 9 | 10 | #include 11 | #if defined(__linux__) || defined(__GLIBC__) || defined(__EMSCRIPTEN__) 12 | #include // IWYU pragma: export 13 | #elif defined(__APPLE__) 14 | 15 | #include // IWYU pragma: export 16 | 17 | #define htobe16(x) OSSwapHostToBigInt16(x) 18 | #define htole16(x) OSSwapHostToLittleInt16(x) 19 | #define be16toh(x) OSSwapBigToHostInt16(x) 20 | #define le16toh(x) OSSwapLittleToHostInt16(x) 21 | 22 | #define htobe32(x) OSSwapHostToBigInt32(x) 23 | #define htole32(x) OSSwapHostToLittleInt32(x) 24 | #define be32toh(x) OSSwapBigToHostInt32(x) 25 | #define le32toh(x) OSSwapLittleToHostInt32(x) 26 | 27 | #define htobe64(x) OSSwapHostToBigInt64(x) 28 | #define htole64(x) OSSwapHostToLittleInt64(x) 29 | #define be64toh(x) OSSwapBigToHostInt64(x) 30 | #define le64toh(x) OSSwapLittleToHostInt64(x) 31 | 32 | #elif defined(_WIN32) 33 | #include 34 | 35 | #define htobe16(x) _byteswap_ushort(x) 36 | #define htole16(x) (x) 37 | #define be16toh(x) _byteswap_ushort(x) 38 | #define le16toh(x) (x) 39 | 40 | #define htobe32(x) _byteswap_ulong(x) 41 | #define htole32(x) (x) 42 | #define be32toh(x) _byteswap_ulong(x) 43 | #define le32toh(x) (x) 44 | 45 | #define htobe64(x) _byteswap_uint64(x) 46 | #define htole64(x) (x) 47 | #define be64toh(x) _byteswap_uint64(x) 48 | #define le64toh(x) (x) 49 | #else 50 | #include // IWYU pragma: export 51 | #endif 52 | 53 | enum { BYTE_ORDER_MSB_FIRST = 1, BYTE_ORDER_LSB_FIRST = 0 }; 54 | inline char hostByteOrder() { 55 | const uint16_t endian = 1; 56 | uint8_t byteOrder = 0; 57 | if (*reinterpret_cast(&endian)) { 58 | byteOrder = BYTE_ORDER_LSB_FIRST; 59 | } else { 60 | byteOrder = BYTE_ORDER_MSB_FIRST; 61 | } 62 | return byteOrder; 63 | } 64 | 65 | inline bool isLittleEndian() { return hostByteOrder() == BYTE_ORDER_LSB_FIRST; } 66 | 67 | #endif // _LIBIME_LIBIME_CORE_ENDIAN_P_H_ 68 | -------------------------------------------------------------------------------- /src/libime/core/historybigram.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_HISTORYBIGRAM_H_ 7 | #define _FCITX_LIBIME_CORE_HISTORYBIGRAM_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace libime { 22 | 23 | class HistoryBigramPrivate; 24 | 25 | class LIBIMECORE_EXPORT HistoryBigram { 26 | public: 27 | HistoryBigram(); 28 | 29 | FCITX_DECLARE_VIRTUAL_DTOR_MOVE(HistoryBigram); 30 | 31 | void load(std::istream &in); 32 | void loadText(std::istream &in); 33 | void save(std::ostream &out); 34 | void dump(std::ostream &out); 35 | void clear(); 36 | 37 | /// Set unknown probability penatly. 38 | /// \param unknown is a log probability. 39 | void setUnknownPenalty(float unknown); 40 | float unknownPenalty() const; 41 | 42 | void setUseOnlyUnigram(bool useOnlyUnigram); 43 | bool useOnlyUnigram() const; 44 | 45 | void forget(std::string_view word); 46 | 47 | bool isUnknown(std::string_view v) const; 48 | float score(const WordNode *prev, const WordNode *cur) const { 49 | return score(prev ? prev->word() : "", cur ? cur->word() : ""); 50 | } 51 | float score(std::string_view prev, std::string_view cur) const; 52 | void add(const SentenceResult &sentence); 53 | void add(const std::vector &sentence); 54 | 55 | /// Fill the prediction based on current sentence. 56 | void fillPredict(std::unordered_set &words, 57 | const std::vector &sentence, 58 | size_t maxSize) const; 59 | 60 | bool containsBigram(std::string_view prev, std::string_view cur) const; 61 | 62 | private: 63 | std::unique_ptr d_ptr; 64 | FCITX_DECLARE_PRIVATE(HistoryBigram); 65 | }; 66 | } // namespace libime 67 | 68 | #endif // _FCITX_LIBIME_CORE_HISTORYBIGRAM_H_ 69 | -------------------------------------------------------------------------------- /src/libime/table/tableoptions.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_TABLE_TABLEOPTIONS_H_ 7 | #define _FCITX_LIBIME_TABLE_TABLEOPTIONS_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace libime { 18 | 19 | enum class OrderPolicy { 20 | No, 21 | Fast, 22 | Freq, 23 | }; 24 | 25 | class TableOptionsPrivate; 26 | 27 | class LIBIMETABLE_EXPORT TableOptions { 28 | public: 29 | TableOptions(); 30 | FCITX_DECLARE_VIRTUAL_DTOR_COPY_AND_MOVE(TableOptions) 31 | 32 | FCITX_DECLARE_PROPERTY(OrderPolicy, orderPolicy, setOrderPolicy); 33 | FCITX_DECLARE_PROPERTY(uint32_t, noSortInputLength, setNoSortInputLength); 34 | FCITX_DECLARE_PROPERTY(bool, autoSelect, setAutoSelect); 35 | FCITX_DECLARE_PROPERTY(int, autoSelectLength, setAutoSelectLength); 36 | FCITX_DECLARE_PROPERTY(std::string, autoSelectRegex, setAutoSelectRegex); 37 | FCITX_DECLARE_PROPERTY(int, noMatchAutoSelectLength, 38 | setNoMatchAutoSelectLength); 39 | FCITX_DECLARE_PROPERTY(std::string, noMatchAutoSelectRegex, 40 | setNoMatchAutoSelectRegex); 41 | FCITX_DECLARE_PROPERTY(bool, commitRawInput, setCommitRawInput); 42 | FCITX_DECLARE_PROPERTY(std::set, endKey, setEndKey); 43 | FCITX_DECLARE_PROPERTY(uint32_t, matchingKey, setMatchingKey); 44 | FCITX_DECLARE_PROPERTY(bool, exactMatch, setExactMatch); 45 | FCITX_DECLARE_PROPERTY(bool, learning, setLearning); 46 | FCITX_DECLARE_PROPERTY(int, autoPhraseLength, setAutoPhraseLength); 47 | FCITX_DECLARE_PROPERTY(int, saveAutoPhraseAfter, setSaveAutoPhraseAfter); 48 | FCITX_DECLARE_PROPERTY(std::unordered_set, autoRuleSet, 49 | setAutoRuleSet); 50 | FCITX_DECLARE_PROPERTY(std::string, languageCode, setLanguageCode); 51 | FCITX_DECLARE_PROPERTY(bool, sortByCodeLength, setSortByCodeLength); 52 | 53 | private: 54 | std::unique_ptr d_ptr; 55 | FCITX_DECLARE_PRIVATE(TableOptions); 56 | }; 57 | } // namespace libime 58 | 59 | #endif // _FCITX_LIBIME_TABLE_TABLEOPTIONS_H_ 60 | -------------------------------------------------------------------------------- /test/testpinyinprediction.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2023-2023 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include "libime/core/userlanguagemodel.h" 11 | #include "libime/pinyin/pinyindictionary.h" 12 | #include "libime/pinyin/pinyinencoder.h" 13 | #include "libime/pinyin/pinyinprediction.h" 14 | #include "testdir.h" 15 | 16 | using namespace libime; 17 | 18 | namespace fcitx { 19 | 20 | LogMessageBuilder &operator<<(LogMessageBuilder &log, 21 | PinyinPredictionSource type) { 22 | switch (type) { 23 | case PinyinPredictionSource::Dictionary: 24 | log << "Dict"; 25 | break; 26 | case PinyinPredictionSource::Model: 27 | log << "Model"; 28 | break; 29 | } 30 | return log; 31 | } 32 | 33 | } // namespace fcitx 34 | 35 | int main() { 36 | UserLanguageModel model(LIBIME_BINARY_DIR "/data/sc.lm"); 37 | PinyinDictionary dict; 38 | dict.load(PinyinDictionary::SystemDict, 39 | LIBIME_BINARY_DIR "/data/dict_sc.txt", PinyinDictFormat::Text); 40 | 41 | PinyinPrediction prediction; 42 | prediction.setUserLanguageModel(&model); 43 | prediction.setPinyinDictionary(&dict); 44 | auto py = PinyinEncoder::encodeFullPinyin("zhong'guo"); 45 | auto result = prediction.predict(model.nullState(), {"我", "喜欢", "中国"}, 46 | {py.data(), py.size()}, 20); 47 | auto noPyResult = prediction.predict({"我", "喜欢", "中国"}, 20); 48 | FCITX_ASSERT(result.size() > noPyResult.size()) 49 | << result << " " << noPyResult; 50 | 51 | // Check if word that exists in multiple sub dicts won't generate multiple 52 | // result. 53 | py = PinyinEncoder::encodeFullPinyin("guan'xi"); 54 | FCITX_ASSERT( 55 | dict.lookupWord(PinyinDictionary::SystemDict, "guan'xi'ren", "关系人")); 56 | dict.addWord(PinyinDictionary::UserDict, "guan'xi'ren", "关系人"); 57 | result = prediction.predict(model.nullState(), {"关系"}, 58 | {py.data(), py.size()}, 49); 59 | FCITX_ASSERT( 60 | std::count_if(result.begin(), result.end(), [](const auto &item) { 61 | return std::get(item) == "人"; 62 | }) == 1); 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyincorrectionprofile.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2024-2024 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINCORRECTIONPROFILE_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINCORRECTIONPROFILE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace libime { 17 | 18 | /** 19 | * Built-in pinyin profile mapping 20 | * 21 | * @since 1.1.7 22 | */ 23 | enum class BuiltinPinyinCorrectionProfile { 24 | /** 25 | * Pinyin correction based on qwerty keyboard 26 | */ 27 | Qwerty, 28 | }; 29 | 30 | class PinyinCorrectionProfilePrivate; 31 | 32 | /** 33 | * Class that holds updated Pinyin correction mapping based on correction 34 | * mapping. 35 | * @since 1.1.7 36 | */ 37 | class LIBIMEPINYIN_EXPORT PinyinCorrectionProfile { 38 | public: 39 | /** 40 | * Construct the profile based on builtin layout. 41 | * 42 | * @param profile built-in profile 43 | */ 44 | explicit PinyinCorrectionProfile(BuiltinPinyinCorrectionProfile profile); 45 | 46 | /** 47 | * Construct the profile based on customized mapping. 48 | * 49 | * E.g. w may be corrected to q,e, the mapping will contain {'w': ['q', 50 | * 'e']}. 51 | * 52 | * @param mapping pinyin character and the corresponding possible wrong key. 53 | */ 54 | explicit PinyinCorrectionProfile( 55 | const std::unordered_map> &mapping); 56 | 57 | virtual ~PinyinCorrectionProfile(); 58 | 59 | /** 60 | * Return the updated pinyin map 61 | * 62 | * New entries will be marked with PinyinFuzzyFlag::Correction 63 | * 64 | * @see getPinyinMapV2 65 | */ 66 | const PinyinMap &pinyinMap() const; 67 | /** 68 | * Return the correction mapping. 69 | * 70 | * E.g. w may be corrected to q,e, the mapping will contain {'w': ['q', 71 | * 'e']}. 72 | * 73 | * @see getPinyinMapV2 74 | */ 75 | const std::unordered_map> &correctionMap() const; 76 | 77 | private: 78 | FCITX_DECLARE_PRIVATE(PinyinCorrectionProfile); 79 | std::unique_ptr d_ptr; 80 | }; 81 | 82 | } // namespace libime 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /src/libime/table/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(LIBIME_TABLE_HDRS 2 | tablebaseddictionary.h 3 | tablecontext.h 4 | tableoptions.h 5 | tablerule.h 6 | tabledecoder.h 7 | autophrasedict.h 8 | ${CMAKE_CURRENT_BINARY_DIR}/libimetable_export.h 9 | ) 10 | 11 | set(LIBIME_TABLE_SRCS 12 | tablebaseddictionary.cpp 13 | tablecontext.cpp 14 | tableoptions.cpp 15 | tabledecoder.cpp 16 | autophrasedict.cpp 17 | log.cpp 18 | tablerule.cpp 19 | ) 20 | 21 | ecm_setup_version(PROJECT 22 | VARIABLE_PREFIX IMETable 23 | PACKAGE_VERSION_FILE "${CMAKE_CURRENT_BINARY_DIR}/LibIMETableConfigVersion.cmake") 24 | set(IMETable_SOVERSION 0) 25 | 26 | add_library(IMETable ${LIBIME_TABLE_SRCS}) 27 | set_target_properties(IMETable 28 | PROPERTIES VERSION ${IMETable_VERSION} 29 | SOVERSION ${IMETable_SOVERSION} 30 | EXPORT_NAME Table 31 | ) 32 | 33 | target_include_directories(IMETable PUBLIC 34 | $ 35 | $ 36 | $) 37 | 38 | target_link_libraries(IMETable PUBLIC Fcitx5::Utils Boost::boost LibIME::Core PRIVATE Boost::iostreams PkgConfig::ZSTD) 39 | 40 | install(TARGETS IMETable EXPORT LibIMETableTargets LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib) 41 | install(FILES ${LIBIME_TABLE_HDRS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/LibIME/libime/table" COMPONENT header) 42 | 43 | add_library(LibIME::Table ALIAS IMETable) 44 | 45 | configure_package_config_file("${CMAKE_CURRENT_SOURCE_DIR}/LibIMETableConfig.cmake.in" 46 | "${CMAKE_CURRENT_BINARY_DIR}/LibIMETableConfig.cmake" 47 | INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/LibIMETable" 48 | ) 49 | 50 | generate_export_header(IMETable BASE_NAME LibIMETable) 51 | 52 | install(EXPORT LibIMETableTargets 53 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/LibIMETable" 54 | FILE LibIMETableTargets.cmake 55 | NAMESPACE LibIME:: 56 | COMPONENT Devel) 57 | 58 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/LibIMETableConfig.cmake" 59 | "${CMAKE_CURRENT_BINARY_DIR}/LibIMETableConfigVersion.cmake" 60 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/LibIMETable" 61 | COMPONENT Devel) 62 | 63 | 64 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyindata.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINDATA_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINDATA_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace libime { 24 | struct LIBIMEPINYIN_EXPORT PinyinHash { 25 | std::size_t operator()(std::string_view const &val) const { 26 | return boost::hash_range(val.begin(), val.end()); 27 | } 28 | }; 29 | 30 | class LIBIMEPINYIN_EXPORT PinyinEntry { 31 | public: 32 | PinyinEntry(const char *pinyin, PinyinInitial initial, PinyinFinal final, 33 | PinyinFuzzyFlags flags) 34 | : pinyin_(pinyin), initial_(initial), final_(final), flags_(flags) {} 35 | 36 | std::string_view pinyinView() const { return pinyin_; } 37 | constexpr const std::string &pinyin() const { return pinyin_; } 38 | constexpr PinyinInitial initial() const { return initial_; } 39 | constexpr PinyinFinal final() const { return final_; } 40 | constexpr PinyinFuzzyFlags flags() const { return flags_; } 41 | 42 | private: 43 | std::string pinyin_; 44 | PinyinInitial initial_; 45 | PinyinFinal final_; 46 | PinyinFuzzyFlags flags_; 47 | }; 48 | 49 | using PinyinMap = boost::multi_index_container< 50 | PinyinEntry, 51 | boost::multi_index::indexed_by, 54 | PinyinHash>>>; 55 | 56 | LIBIMEPINYIN_EXPORT const PinyinMap &getPinyinMap(); 57 | LIBIMEPINYIN_EXPORT const PinyinMap &getPinyinMapV2(); 58 | LIBIMEPINYIN_EXPORT const std::vector &getEncodedInitialFinal(); 59 | 60 | LIBIMEPINYIN_EXPORT const 61 | std::unordered_map> & 62 | getInnerSegment(); 63 | } // namespace libime 64 | 65 | #endif // _FCITX_LIBIME_PINYIN_PINYINDATA_H_ 66 | -------------------------------------------------------------------------------- /src/libime/core/triedictionary.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2018-2018 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | * 6 | */ 7 | #ifndef _LIBIME_LIBIME_CORE_TRIEDICTIONARY_H_ 8 | #define _LIBIME_LIBIME_CORE_TRIEDICTIONARY_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace libime { 20 | 21 | class TrieDictionaryPrivate; 22 | 23 | class LIBIMECORE_EXPORT TrieDictionary : public Dictionary, 24 | public fcitx::ConnectableObject { 25 | public: 26 | using TrieType = DATrie; 27 | 28 | static const size_t SystemDict = 0; 29 | static const size_t UserDict = 1; 30 | explicit TrieDictionary(); 31 | ~TrieDictionary(); 32 | 33 | // Append a dictionary at the end. 34 | void addEmptyDict(); 35 | 36 | // Remove all dictionary except system and user. 37 | void removeAll(); 38 | 39 | /** 40 | * Remove all dictionary from given index. 41 | * 42 | * @param idx the index need to be within [UserDict + 1, dictSize()) 43 | * @since 1.0.10 44 | */ 45 | void removeFrom(size_t idx); 46 | 47 | // Clear dictionary. 48 | void clear(size_t idx); 49 | 50 | const TrieType *trie(size_t idx) const; 51 | 52 | /** 53 | * Set trie from external source. 54 | * 55 | * There is no validation on the data within it, subclass may expect a 56 | * certain way of organize the key and value. 57 | * 58 | * @param idx the index need to be within [0, dictSize()) 59 | * @param trie new trie. 60 | * @since 1.1.7 61 | */ 62 | void setTrie(size_t idx, TrieType trie); 63 | 64 | // Total number to dictionary. 65 | size_t dictSize() const; 66 | 67 | FCITX_DECLARE_SIGNAL(TrieDictionary, dictionaryChanged, void(size_t)); 68 | FCITX_DECLARE_SIGNAL(TrieDictionary, dictSizeChanged, void(size_t)); 69 | 70 | protected: 71 | TrieType *mutableTrie(size_t idx); 72 | void addWord(size_t idx, std::string_view key, float cost = 0.0F); 73 | bool removeWord(size_t idx, std::string_view key); 74 | 75 | std::unique_ptr d_ptr; 76 | FCITX_DECLARE_PRIVATE(TrieDictionary); 77 | }; 78 | } // namespace libime 79 | 80 | #endif // _LIBIME_LIBIME_CORE_TRIEDICTIONARY_H_ 81 | -------------------------------------------------------------------------------- /src/libime/table/tabledecoder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_TABLE_TABLEDECODER_H_ 7 | #define _FCITX_LIBIME_TABLE_TABLEDECODER_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace libime { 22 | 23 | class TableLatticeNodePrivate; 24 | 25 | class LIBIMETABLE_EXPORT TableLatticeNode : public LatticeNode { 26 | public: 27 | TableLatticeNode(std::string_view word, WordIndex idx, 28 | SegmentGraphPath path, const State &state, float cost, 29 | std::unique_ptr data); 30 | virtual ~TableLatticeNode(); 31 | 32 | uint32_t index() const; 33 | PhraseFlag flag() const; 34 | const std::string &code() const; 35 | size_t codeLength() const; 36 | 37 | private: 38 | std::unique_ptr d_ptr; 39 | }; 40 | 41 | class LIBIMETABLE_EXPORT TableDecoder : public Decoder { 42 | public: 43 | TableDecoder(const TableBasedDictionary *dict, 44 | const LanguageModelBase *model) 45 | : Decoder(dict, model) {} 46 | 47 | protected: 48 | LatticeNode *createLatticeNodeImpl(const SegmentGraphBase &graph, 49 | const LanguageModelBase *model, 50 | std::string_view word, WordIndex idx, 51 | SegmentGraphPath path, 52 | const State &state, float cost, 53 | std::unique_ptr data, 54 | bool onlyPath) const override; 55 | 56 | // When segment graph is extremely simple, no need to sort because context 57 | // will sort it anyway. 58 | bool needSort(const SegmentGraph &graph, 59 | const SegmentGraphNode *node) const override; 60 | }; 61 | 62 | LIBIMETABLE_EXPORT SegmentGraph graphForCode(std::string_view s, 63 | const TableBasedDictionary &dict); 64 | } // namespace libime 65 | 66 | #endif // _FCITX_LIBIME_TABLE_TABLEDECODER_H_ 67 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyindecoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include "libime/pinyin/pinyindecoder.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "libime/core/languagemodel.h" 13 | #include "libime/core/lattice.h" 14 | #include "libime/core/segmentgraph.h" 15 | #include "pinyindecoder_p.h" 16 | 17 | namespace libime { 18 | 19 | PinyinLatticeNode::PinyinLatticeNode( 20 | std::string_view word, WordIndex idx, SegmentGraphPath path, 21 | const State &state, float cost, 22 | std::unique_ptr data) 23 | : LatticeNode(word, idx, std::move(path), state, cost), 24 | d_ptr(std::move(data)) {} 25 | 26 | PinyinLatticeNode::~PinyinLatticeNode() = default; 27 | 28 | const std::string &PinyinLatticeNode::encodedPinyin() const { 29 | static const std::string empty; 30 | if (!d_ptr) { 31 | return empty; 32 | } 33 | return d_ptr->encodedPinyin_; 34 | } 35 | 36 | bool PinyinLatticeNode::isCorrection() const { 37 | if (!d_ptr) { 38 | return false; 39 | } 40 | return d_ptr->isCorrection_; 41 | } 42 | 43 | bool PinyinLatticeNode::anyCorrectionOnPath() const { 44 | const auto *pivot = this; 45 | while (pivot) { 46 | if (pivot->isCorrection()) { 47 | return true; 48 | } 49 | pivot = static_cast(pivot->prev()); 50 | } 51 | return false; 52 | } 53 | 54 | LatticeNode *PinyinDecoder::createLatticeNodeImpl( 55 | const SegmentGraphBase &graph, const LanguageModelBase *model, 56 | std::string_view word, WordIndex idx, SegmentGraphPath path, 57 | const State &state, float cost, std::unique_ptr data, 58 | bool onlyPath) const { 59 | std::unique_ptr pinyinData( 60 | static_cast(data.release())); 61 | if (model->isUnknown(idx, word)) { 62 | // we don't really care about a lot of unknown single character 63 | // which is not used for candidates 64 | if ((pinyinData && pinyinData->encodedPinyin_.size() == 2) && 65 | path.front() != &graph.start() && !onlyPath) { 66 | return nullptr; 67 | } 68 | } 69 | 70 | return new PinyinLatticeNode(word, idx, std::move(path), state, cost, 71 | std::move(pinyinData)); 72 | } 73 | } // namespace libime 74 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyinmatchstate.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include "libime/pinyin/pinyinmatchstate.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "libime/pinyin/pinyinencoder.h" 13 | #include "pinyincontext.h" 14 | #include "pinyinime.h" 15 | #include "pinyinmatchstate_p.h" 16 | 17 | namespace libime { 18 | 19 | PinyinMatchState::PinyinMatchState(PinyinContext *context) 20 | : d_ptr(std::make_unique(context)) {} 21 | PinyinMatchState::~PinyinMatchState() {} 22 | 23 | void PinyinMatchState::clear() { 24 | FCITX_D(); 25 | d->matchedPaths_.clear(); 26 | d->nodeCacheMap_.clear(); 27 | d->matchCacheMap_.clear(); 28 | } 29 | 30 | void PinyinMatchState::discardNode( 31 | const std::unordered_set &nodes) { 32 | FCITX_D(); 33 | for (const auto *node : nodes) { 34 | d->matchedPaths_.erase(node); 35 | } 36 | for (auto &p : d->matchedPaths_) { 37 | auto &l = p.second; 38 | auto iter = l.begin(); 39 | while (iter != l.end()) { 40 | if (nodes.contains(iter->path_.front())) { 41 | iter = l.erase(iter); 42 | } else { 43 | iter++; 44 | } 45 | } 46 | } 47 | } 48 | 49 | PinyinFuzzyFlags PinyinMatchState::fuzzyFlags() const { 50 | FCITX_D(); 51 | return d->context_->ime()->fuzzyFlags(); 52 | } 53 | 54 | std::shared_ptr 55 | PinyinMatchState::shuangpinProfile() const { 56 | FCITX_D(); 57 | if (d->context_->useShuangpin()) { 58 | return d->context_->ime()->shuangpinProfile(); 59 | } 60 | return {}; 61 | } 62 | 63 | std::shared_ptr 64 | PinyinMatchState::correctionProfile() const { 65 | FCITX_D(); 66 | if (d->context_->ime()->fuzzyFlags().test(PinyinFuzzyFlag::Correction)) { 67 | return d->context_->ime()->correctionProfile(); 68 | } 69 | return {}; 70 | } 71 | 72 | size_t PinyinMatchState::partialLongWordLimit() const { 73 | FCITX_D(); 74 | return d->context_->ime()->partialLongWordLimit(); 75 | } 76 | 77 | void PinyinMatchState::discardDictionary(size_t idx) { 78 | FCITX_D(); 79 | d->matchCacheMap_.erase(d->context_->ime()->dict()->trie(idx)); 80 | d->nodeCacheMap_.erase(d->context_->ime()->dict()->trie(idx)); 81 | } 82 | } // namespace libime 83 | -------------------------------------------------------------------------------- /src/libime/table/tablerule.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2015-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #ifndef _FCITX_LIBIME_TABLE_TABLERULE_H_ 8 | #define _FCITX_LIBIME_TABLE_TABLERULE_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace libime { 19 | enum class TableRuleEntryFlag : std::uint32_t { FromFront, FromBack }; 20 | 21 | enum class TableRuleFlag : std::uint32_t { LengthLongerThan, LengthEqual }; 22 | 23 | class LIBIMETABLE_EXPORT TableRuleEntry { 24 | public: 25 | TableRuleEntry(TableRuleEntryFlag flag = TableRuleEntryFlag::FromFront, 26 | uint8_t character = 0, uint8_t encodingIndex = 0); 27 | 28 | explicit TableRuleEntry(std::istream &in); 29 | 30 | FCITX_INLINE_DEFINE_DEFAULT_DTOR_COPY_AND_MOVE(TableRuleEntry); 31 | 32 | bool isPlaceHolder() const; 33 | 34 | TableRuleEntryFlag flag() const { return flag_; } 35 | uint8_t character() const { return character_; } 36 | 37 | uint8_t encodingIndex() const { return encodingIndex_; } 38 | int index() const; 39 | 40 | private: 41 | TableRuleEntryFlag flag_; 42 | uint8_t character_; 43 | uint8_t encodingIndex_; 44 | }; 45 | 46 | class LIBIMETABLE_EXPORT TableRule { 47 | public: 48 | TableRule(const std::string &ruleString, unsigned int maxLength); 49 | 50 | TableRule(TableRuleFlag _flag = TableRuleFlag::LengthEqual, 51 | int _phraseLength = 0, std::vector _entries = {}); 52 | 53 | explicit TableRule(std::istream &in); 54 | 55 | FCITX_INLINE_DEFINE_DEFAULT_DTOR_COPY_AND_MOVE(TableRule) 56 | 57 | std::string name() const; 58 | 59 | std::string toString() const; 60 | 61 | TableRuleFlag flag() const { return flag_; } 62 | uint8_t phraseLength() const { return phraseLength_; } 63 | const std::vector &entries() const { return entries_; } 64 | size_t codeLength() const; 65 | 66 | private: 67 | TableRuleFlag flag_ = TableRuleFlag::LengthLongerThan; 68 | uint8_t phraseLength_ = 0; 69 | std::vector entries_; 70 | }; 71 | 72 | LIBIMETABLE_EXPORT std::ostream &operator<<(std::ostream &out, 73 | const TableRuleEntry &r); 74 | 75 | LIBIMETABLE_EXPORT std::ostream &operator<<(std::ostream &out, 76 | const TableRule &r); 77 | 78 | } // namespace libime 79 | 80 | #endif // _FCITX_LIBIME_TABLE_TABLERULE_H_ 81 | -------------------------------------------------------------------------------- /src/libime/pinyin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(LIBIME_PINYIN_HDRS 2 | pinyincontext.h 3 | pinyindata.h 4 | pinyindecoder.h 5 | pinyindictionary.h 6 | pinyinencoder.h 7 | pinyinime.h 8 | pinyinmatchstate.h 9 | pinyinmatchstate_p.h 10 | pinyinprediction.h 11 | shuangpindata.h 12 | shuangpinprofile.h 13 | pinyincorrectionprofile.h 14 | ${CMAKE_CURRENT_BINARY_DIR}/libimepinyin_export.h 15 | ) 16 | 17 | set(LIBIME_PINYIN_SRCS 18 | pinyincontext.cpp 19 | pinyindata.cpp 20 | pinyindecoder.cpp 21 | pinyindictionary.cpp 22 | pinyinencoder.cpp 23 | pinyinime.cpp 24 | pinyinmatchstate.cpp 25 | shuangpinprofile.cpp 26 | pinyinprediction.cpp 27 | pinyincorrectionprofile.cpp 28 | ) 29 | 30 | ecm_setup_version(PROJECT 31 | VARIABLE_PREFIX IMEPinyin 32 | PACKAGE_VERSION_FILE "${CMAKE_CURRENT_BINARY_DIR}/LibIMEPinyinConfigVersion.cmake") 33 | set(IMEPinyin_SOVERSION 0) 34 | 35 | add_library(IMEPinyin ${LIBIME_PINYIN_SRCS}) 36 | set_target_properties(IMEPinyin PROPERTIES 37 | VERSION ${IMEPinyin_VERSION} 38 | SOVERSION ${IMEPinyin_SOVERSION} 39 | EXPORT_NAME Pinyin 40 | ) 41 | target_include_directories(IMEPinyin PUBLIC 42 | $ 43 | $ 44 | $) 45 | 46 | target_link_libraries(IMEPinyin PUBLIC Fcitx5::Utils Boost::boost LibIME::Core PRIVATE Boost::iostreams PkgConfig::ZSTD) 47 | 48 | install(TARGETS IMEPinyin EXPORT LibIMEPinyinTargets LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib) 49 | install(FILES ${LIBIME_PINYIN_HDRS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/LibIME/libime/pinyin" COMPONENT header) 50 | 51 | add_library(LibIME::Pinyin ALIAS IMEPinyin) 52 | 53 | configure_package_config_file("${CMAKE_CURRENT_SOURCE_DIR}/LibIMEPinyinConfig.cmake.in" 54 | "${CMAKE_CURRENT_BINARY_DIR}/LibIMEPinyinConfig.cmake" 55 | INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/LibIMEPinyin" 56 | ) 57 | 58 | generate_export_header(IMEPinyin BASE_NAME LibIMEPinyin) 59 | 60 | install(EXPORT LibIMEPinyinTargets 61 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/LibIMEPinyin" 62 | FILE LibIMEPinyinTargets.cmake 63 | NAMESPACE LibIME:: 64 | COMPONENT Devel) 65 | 66 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/LibIMEPinyinConfig.cmake" 67 | "${CMAKE_CURRENT_BINARY_DIR}/LibIMEPinyinConfigVersion.cmake" 68 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/LibIMEPinyin" 69 | COMPONENT Devel) 70 | 71 | -------------------------------------------------------------------------------- /src/libime/core/decoder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_DECODER_H_ 7 | #define _FCITX_LIBIME_CORE_DECODER_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace libime { 22 | 23 | class DecoderPrivate; 24 | class Dictionary; 25 | 26 | class LIBIMECORE_EXPORT Decoder { 27 | friend class DecoderPrivate; 28 | 29 | public: 30 | Decoder(const Dictionary *dict, const LanguageModelBase *model); 31 | virtual ~Decoder(); 32 | 33 | constexpr static const size_t beamSizeDefault = 20; 34 | constexpr static const size_t frameSizeDefault = 40; 35 | 36 | const Dictionary *dict() const; 37 | const LanguageModelBase *model() const; 38 | 39 | bool decode(Lattice &lattice, const SegmentGraph &graph, size_t nbest, 40 | const State &state, 41 | float max = std::numeric_limits::max(), 42 | float min = -std::numeric_limits::max(), 43 | size_t beamSize = beamSizeDefault, 44 | size_t frameSize = frameSizeDefault, 45 | void *helper = nullptr) const; 46 | 47 | protected: 48 | LatticeNode * 49 | createLatticeNode(const SegmentGraph &graph, const LanguageModelBase *model, 50 | std::string_view word, WordIndex idx, 51 | SegmentGraphPath path, const State &state, float cost = 0, 52 | std::unique_ptr data = nullptr, 53 | bool onlyPath = false) const { 54 | return createLatticeNodeImpl(graph, model, word, idx, std::move(path), 55 | state, cost, std::move(data), onlyPath); 56 | } 57 | virtual LatticeNode *createLatticeNodeImpl( 58 | const SegmentGraphBase &graph, const LanguageModelBase *model, 59 | std::string_view word, WordIndex idx, SegmentGraphPath path, 60 | const State &state, float cost, std::unique_ptr data, 61 | bool onlyPath) const; 62 | 63 | virtual bool needSort(const SegmentGraph & /*graph*/, 64 | const SegmentGraphNode * /*node*/) const { 65 | return true; 66 | } 67 | 68 | private: 69 | std::unique_ptr d_ptr; 70 | FCITX_DECLARE_PRIVATE(Decoder); 71 | }; 72 | } // namespace libime 73 | 74 | #endif // _FCITX_LIBIME_CORE_DECODER_H_ 75 | -------------------------------------------------------------------------------- /test/testinputbuffer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include "libime/core/inputbuffer.h" 10 | 11 | void test_basic(bool ascii) { 12 | using namespace libime; 13 | InputBuffer buffer(ascii ? fcitx::InputBufferOption::AsciiOnly 14 | : fcitx::InputBufferOption::NoOption); 15 | FCITX_ASSERT(buffer.empty()); 16 | FCITX_ASSERT(buffer.cursor() == 0); 17 | FCITX_ASSERT(buffer.cursorByChar() == 0); 18 | buffer.type('a'); 19 | FCITX_ASSERT(buffer.size() == 1); 20 | FCITX_ASSERT(buffer.cursor() == 1); 21 | buffer.type('b'); 22 | FCITX_ASSERT(buffer.size() == 2); 23 | FCITX_ASSERT(buffer.cursor() == 2); 24 | FCITX_ASSERT(buffer.userInput() == "ab"); 25 | buffer.setCursor(1); 26 | buffer.type("cdefg"); 27 | FCITX_ASSERT(buffer.size() == 7); 28 | FCITX_ASSERT(buffer.cursor() == 6); 29 | FCITX_ASSERT(buffer.userInput() == "acdefgb"); 30 | buffer.erase(1, 3); 31 | FCITX_ASSERT(buffer.size() == 5); 32 | FCITX_ASSERT(buffer.cursor() == 4); 33 | FCITX_ASSERT(buffer.userInput() == "aefgb"); 34 | FCITX_ASSERT(buffer[2] == "f"); 35 | buffer.erase(2, 5); 36 | FCITX_ASSERT(buffer.size() == 2); 37 | FCITX_ASSERT(buffer.cursor() == 2); 38 | int idx = 0; 39 | for (auto c : buffer) { 40 | FCITX_ASSERT(c == buffer[idx]); 41 | idx++; 42 | } 43 | } 44 | 45 | void test_utf8() { 46 | using namespace libime; 47 | InputBuffer buffer; 48 | buffer.type("\xe4\xbd\xa0\xe5\xa5\xbd"); 49 | FCITX_ASSERT(buffer.size() == 2); 50 | FCITX_ASSERT(buffer.cursor() == 2); 51 | buffer.erase(1, 2); 52 | FCITX_ASSERT(buffer.size() == 1); 53 | FCITX_ASSERT(buffer.cursor() == 1); 54 | FCITX_ASSERT(buffer.userInput() == "\xe4\xbd\xa0"); 55 | bool throwed = false; 56 | try { 57 | buffer.type("\xe4\xbd"); 58 | } catch (const std::invalid_argument &e) { 59 | throwed = true; 60 | } 61 | FCITX_ASSERT(throwed); 62 | int idx = 0; 63 | for (auto c : buffer) { 64 | FCITX_ASSERT(c == buffer[idx]); 65 | idx++; 66 | } 67 | buffer.type("a\xe5\x95\x8a"); 68 | FCITX_ASSERT(buffer.size() == 3); 69 | FCITX_ASSERT(buffer.cursor() == 3); 70 | FCITX_ASSERT(buffer.cursorByChar() == 7); 71 | buffer.setCursor(0); 72 | FCITX_ASSERT(buffer.cursorByChar() == 0); 73 | buffer.setCursor(1); 74 | FCITX_ASSERT(buffer.cursorByChar() == 3); 75 | buffer.setCursor(2); 76 | FCITX_ASSERT(buffer.cursorByChar() == 4); 77 | buffer.clear(); 78 | FCITX_ASSERT(buffer.cursorByChar() == 0); 79 | FCITX_ASSERT(buffer.cursor() == 0); 80 | FCITX_ASSERT(buffer.empty()); 81 | } 82 | 83 | int main() { 84 | test_basic(true); 85 | test_basic(false); 86 | test_utf8(); 87 | return 0; 88 | } 89 | -------------------------------------------------------------------------------- /test/testtablerule.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2023-2023 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include "libime/table/tablerule.h" 9 | 10 | using namespace libime; 11 | 12 | int main() { 13 | { 14 | TableRule rule("e2=p11+p1z+p21+p2y+p2z", 5); 15 | FCITX_ASSERT(rule.codeLength() == 5); 16 | FCITX_ASSERT(rule.entries().size() == 5); 17 | FCITX_ASSERT(rule.flag() == TableRuleFlag::LengthEqual); 18 | FCITX_ASSERT(rule.toString() == "e2=p11+p1z+p21+p2y+p2z"); 19 | FCITX_ASSERT(rule.phraseLength() == 2); 20 | FCITX_ASSERT(rule.entries()[0].flag() == TableRuleEntryFlag::FromFront); 21 | FCITX_ASSERT(rule.entries()[0].index() == 1); 22 | FCITX_ASSERT(rule.entries()[0].character() == 1); 23 | FCITX_ASSERT(rule.entries()[1].flag() == TableRuleEntryFlag::FromFront); 24 | FCITX_ASSERT(rule.entries()[1].index() == -1); 25 | FCITX_ASSERT(rule.entries()[1].character() == 1); 26 | FCITX_ASSERT(rule.entries()[2].flag() == TableRuleEntryFlag::FromFront); 27 | FCITX_ASSERT(rule.entries()[2].index() == 1); 28 | FCITX_ASSERT(rule.entries()[2].character() == 2); 29 | FCITX_ASSERT(rule.entries()[3].flag() == TableRuleEntryFlag::FromFront); 30 | FCITX_ASSERT(rule.entries()[3].index() == -2); 31 | FCITX_ASSERT(rule.entries()[3].character() == 2); 32 | FCITX_ASSERT(rule.entries()[4].flag() == TableRuleEntryFlag::FromFront); 33 | FCITX_ASSERT(rule.entries()[4].index() == -1); 34 | FCITX_ASSERT(rule.entries()[4].character() == 2); 35 | } 36 | { 37 | TableRule rule("E3=P11+P1Z+P21+P2Z+P3Z", 5); 38 | FCITX_ASSERT(rule.codeLength() == 5); 39 | FCITX_ASSERT(rule.entries().size() == 5); 40 | FCITX_ASSERT(rule.flag() == TableRuleFlag::LengthEqual); 41 | FCITX_ASSERT(rule.toString() == "e3=p11+p1z+p21+p2z+p3z"); 42 | FCITX_ASSERT(rule.phraseLength() == 3); 43 | FCITX_ASSERT(rule.entries()[0].flag() == TableRuleEntryFlag::FromFront); 44 | FCITX_ASSERT(rule.entries()[0].index() == 1); 45 | FCITX_ASSERT(rule.entries()[0].character() == 1); 46 | FCITX_ASSERT(rule.entries()[1].flag() == TableRuleEntryFlag::FromFront); 47 | FCITX_ASSERT(rule.entries()[1].index() == -1); 48 | FCITX_ASSERT(rule.entries()[1].character() == 1); 49 | FCITX_ASSERT(rule.entries()[2].flag() == TableRuleEntryFlag::FromFront); 50 | FCITX_ASSERT(rule.entries()[2].index() == 1); 51 | FCITX_ASSERT(rule.entries()[2].character() == 2); 52 | FCITX_ASSERT(rule.entries()[3].flag() == TableRuleEntryFlag::FromFront); 53 | FCITX_ASSERT(rule.entries()[3].index() == -1); 54 | FCITX_ASSERT(rule.entries()[3].character() == 2); 55 | FCITX_ASSERT(rule.entries()[4].flag() == TableRuleEntryFlag::FromFront); 56 | FCITX_ASSERT(rule.entries()[4].index() == -1); 57 | FCITX_ASSERT(rule.entries()[4].character() == 3); 58 | } 59 | return 0; 60 | } -------------------------------------------------------------------------------- /src/libime/pinyin/pinyinime.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINIME_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINIME_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace libime { 19 | 20 | class PinyinIMEPrivate; 21 | class PinyinDecoder; 22 | class PinyinDictionary; 23 | class UserLanguageModel; 24 | 25 | enum class PinyinPreeditMode { RawText, Pinyin }; 26 | 27 | /// \brief Provides shared data for PinyinContext. 28 | class LIBIMEPINYIN_EXPORT PinyinIME : public fcitx::ConnectableObject { 29 | public: 30 | PinyinIME(std::unique_ptr dict, 31 | std::unique_ptr model); 32 | virtual ~PinyinIME(); 33 | 34 | PinyinFuzzyFlags fuzzyFlags() const; 35 | void setFuzzyFlags(PinyinFuzzyFlags flags); 36 | size_t nbest() const; 37 | void setNBest(size_t n); 38 | size_t beamSize() const; 39 | void setBeamSize(size_t n); 40 | size_t frameSize() const; 41 | void setFrameSize(size_t n); 42 | size_t partialLongWordLimit() const; 43 | void setPartialLongWordLimit(size_t n); 44 | /** 45 | * \brief The maximum number of candidates that is a word. 46 | * 47 | * Limit the non single character candidates to avoid need to scroll/next 48 | * page too many characters. 49 | * 50 | * When is 0, it means no limit. 51 | * 52 | * Since 1.1.12 53 | */ 54 | size_t wordCandidateLimit() const; 55 | /** 56 | * \brief Set the maximum number of candidates that is a word. 57 | * Since 1.1.12 58 | */ 59 | void setWordCandidateLimit(size_t n); 60 | void setScoreFilter(float maxDistance = std::numeric_limits::max(), 61 | float minPath = -std::numeric_limits::max()); 62 | void setShuangpinProfile(std::shared_ptr profile); 63 | std::shared_ptr shuangpinProfile() const; 64 | void setPreeditMode(PinyinPreeditMode mode); 65 | PinyinPreeditMode preeditMode() const; 66 | 67 | void setCorrectionProfile( 68 | std::shared_ptr profile); 69 | std::shared_ptr correctionProfile() const; 70 | 71 | float maxDistance() const; 72 | float minPath() const; 73 | 74 | PinyinDictionary *dict(); 75 | const PinyinDictionary *dict() const; 76 | const PinyinDecoder *decoder() const; 77 | UserLanguageModel *model(); 78 | const UserLanguageModel *model() const; 79 | 80 | FCITX_DECLARE_SIGNAL(PinyinIME, optionChanged, void()); 81 | 82 | private: 83 | std::unique_ptr d_ptr; 84 | FCITX_DECLARE_PRIVATE(PinyinIME); 85 | }; 86 | } // namespace libime 87 | 88 | #endif // _FCITX_LIBIME_PINYIN_PINYINIME_H_ 89 | -------------------------------------------------------------------------------- /src/libime/core/triedictionary.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2018-2018 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | * 6 | */ 7 | #include "triedictionary.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace libime { 17 | 18 | class TrieDictionaryPrivate : fcitx::QPtrHolder { 19 | public: 20 | TrieDictionaryPrivate(TrieDictionary *q) 21 | : fcitx::QPtrHolder(q) {} 22 | 23 | FCITX_DEFINE_SIGNAL_PRIVATE(TrieDictionary, dictionaryChanged); 24 | FCITX_DEFINE_SIGNAL_PRIVATE(TrieDictionary, dictSizeChanged); 25 | 26 | boost::ptr_vector tries_; 27 | }; 28 | 29 | TrieDictionary::TrieDictionary() 30 | : d_ptr(std::make_unique(this)) { 31 | addEmptyDict(); 32 | addEmptyDict(); 33 | } 34 | 35 | TrieDictionary::~TrieDictionary() {} 36 | 37 | void TrieDictionary::addEmptyDict() { 38 | FCITX_D(); 39 | d->tries_.push_back(new TrieType); 40 | emit(d->tries_.size()); 41 | } 42 | 43 | void TrieDictionary::removeFrom(size_t idx) { 44 | FCITX_D(); 45 | if (idx < UserDict + 1 || idx >= d->tries_.size()) { 46 | return; 47 | } 48 | 49 | for (auto i = idx; i < d->tries_.size(); i++) { 50 | emit(i); 51 | } 52 | d->tries_.erase(d->tries_.begin() + idx, d->tries_.end()); 53 | emit(d->tries_.size()); 54 | } 55 | 56 | void TrieDictionary::removeAll() { removeFrom(UserDict + 1); } 57 | 58 | void TrieDictionary::clear(size_t idx) { 59 | FCITX_D(); 60 | d->tries_[idx].clear(); 61 | emit(idx); 62 | } 63 | 64 | const TrieDictionary::TrieType *TrieDictionary::trie(size_t idx) const { 65 | FCITX_D(); 66 | return &d->tries_[idx]; 67 | } 68 | 69 | void TrieDictionary::setTrie(size_t idx, TrieType trie) { 70 | FCITX_D(); 71 | *mutableTrie(idx) = std::move(trie); 72 | emit(idx); 73 | } 74 | 75 | TrieDictionary::TrieType *TrieDictionary::mutableTrie(size_t idx) { 76 | FCITX_D(); 77 | return &d->tries_[idx]; 78 | } 79 | 80 | size_t TrieDictionary::dictSize() const { 81 | FCITX_D(); 82 | return d->tries_.size(); 83 | } 84 | 85 | void TrieDictionary::addWord(size_t idx, std::string_view key, float cost) { 86 | FCITX_D(); 87 | d->tries_[idx].set(key.data(), key.size(), cost); 88 | emit(idx); 89 | } 90 | 91 | bool TrieDictionary::removeWord(size_t idx, std::string_view key) { 92 | FCITX_D(); 93 | if (d->tries_[idx].erase(key.data(), key.size())) { 94 | emit(idx); 95 | return true; 96 | } 97 | return false; 98 | } 99 | } // namespace libime 100 | -------------------------------------------------------------------------------- /tools/libime_tabledict.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "libime/table/tablebaseddictionary.h" 13 | 14 | void usage(const char *argv0) { 15 | std::cout << "Usage: " << argv0 16 | << " [-due] [-m
] " << std::endl 17 | << "-d: Dump binary to text" << std::endl 18 | << "-u: User dict" << std::endl 19 | << "-e: Extra dict" << std::endl 20 | << "-m : Main dict to be used with extra dict" 21 | << std::endl 22 | << "-h: Show this help" << std::endl; 23 | } 24 | 25 | int main(int argc, char *argv[]) { 26 | 27 | bool dump = false; 28 | bool user = false; 29 | bool extra = false; 30 | std::optional extraMain = std::nullopt; 31 | int c; 32 | while ((c = getopt(argc, argv, "dhuem:")) != -1) { 33 | switch (c) { 34 | case 'd': 35 | dump = true; 36 | break; 37 | case 'u': 38 | user = true; 39 | break; 40 | case 'e': 41 | extra = true; 42 | break; 43 | case 'm': 44 | extraMain = std::string(optarg); 45 | break; 46 | case 'h': 47 | usage(argv[0]); 48 | return 0; 49 | default: 50 | usage(argv[0]); 51 | return 1; 52 | } 53 | } 54 | 55 | if (optind + 2 != argc) { 56 | usage(argv[0]); 57 | return 1; 58 | } 59 | using namespace libime; 60 | TableBasedDictionary dict; 61 | 62 | std::ifstream fin; 63 | std::istream *in; 64 | if (strcmp(argv[optind], "-") == 0) { 65 | in = &std::cin; 66 | } else { 67 | fin.open(argv[optind], std::ios::in | std::ios::binary); 68 | in = &fin; 69 | } 70 | 71 | const auto inputFormat = dump ? TableFormat::Binary : TableFormat::Text; 72 | const auto outputFormat = dump ? TableFormat::Text : TableFormat::Binary; 73 | size_t extraIndex = 0; 74 | if (extra && extraMain) { 75 | dict.load(extraMain->c_str(), libime::TableFormat::Binary); 76 | } 77 | 78 | if (extra) { 79 | extraIndex = dict.loadExtra(*in, inputFormat); 80 | } else if (user) { 81 | dict.loadUser(*in, inputFormat); 82 | } else { 83 | dict.load(*in, inputFormat); 84 | } 85 | 86 | std::ofstream fout; 87 | std::ostream *out; 88 | if (strcmp(argv[optind + 1], "-") == 0) { 89 | out = &std::cout; 90 | } else { 91 | fout.open(argv[optind + 1], std::ios::out | std::ios::binary); 92 | out = &fout; 93 | } 94 | 95 | if (extra) { 96 | dict.saveExtra(extraIndex, *out, outputFormat); 97 | } else if (user) { 98 | dict.saveUser(*out, outputFormat); 99 | } else { 100 | dict.save(*out, outputFormat); 101 | } 102 | return 0; 103 | } 104 | -------------------------------------------------------------------------------- /test/triebench.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "libime/core/datrie.h" 15 | 16 | using namespace libime; 17 | 18 | int main() { 19 | typedef DATrie TestTrie; 20 | TestTrie tree; 21 | std::string key; 22 | std::unordered_map map; 23 | 24 | int count = 1; 25 | // key can be same as other 26 | while (std::cin >> key) { 27 | map[key] = count; 28 | tree.update(key, 29 | [count, &map, 30 | &key](TestTrie::value_type v) -> TestTrie::value_type { 31 | if (v != 0) { 32 | // this is a key inserted twice 33 | FCITX_ASSERT(map.find(key) != map.end()); 34 | } 35 | // std::cout << key << " " << v << " " << count << 36 | // std::endl; 37 | return count; 38 | }); 39 | FCITX_ASSERT(tree.exactMatchSearch(key) == count); 40 | count++; 41 | } 42 | 43 | std::vector d; 44 | d.resize(tree.size()); 45 | tree.dump(d.data(), d.size()); 46 | 47 | FCITX_ASSERT(tree.size() == map.size()); 48 | for (auto &p : map) { 49 | // std::cout << p.first << " " << tree.exactMatchSearch(p.first) << " " 50 | // << p.second << 51 | // std::endl; 52 | FCITX_ASSERT(tree.exactMatchSearch(p.first) == p.second); 53 | } 54 | 55 | std::string tempKey; 56 | size_t foreach_count = 0; 57 | tree.foreach([&tree, &map, &tempKey, &foreach_count]( 58 | TestTrie::value_type value, size_t len, uint64_t pos) { 59 | (void)value; 60 | tree.suffix(tempKey, len, pos); 61 | FCITX_ASSERT(map.find(tempKey) != map.end()); 62 | FCITX_ASSERT(tree.exactMatchSearch(tempKey) == value); 63 | FCITX_ASSERT(map[tempKey] == value); 64 | tree.update(tempKey, [](int32_t v) { return v + 1; }); 65 | foreach_count++; 66 | return true; 67 | }); 68 | 69 | tree.erase(map.begin()->first); 70 | FCITX_ASSERT(tree.size() == foreach_count - 1); 71 | 72 | tree.save("trie_data"); 73 | 74 | tree.clear(); 75 | 76 | FCITX_ASSERT(!tree.erase(map.begin()->first)); 77 | FCITX_ASSERT(tree.empty()); 78 | decltype(tree) trie2("trie_data"); 79 | using std::swap; 80 | swap(tree, trie2); 81 | 82 | foreach_count = 0; 83 | tree.foreach([&tree, &map, &tempKey, 84 | &foreach_count](int32_t value, size_t len, uint64_t pos) { 85 | (void)value; 86 | tree.suffix(tempKey, len, pos); 87 | FCITX_ASSERT(map.find(tempKey) != map.end()); 88 | FCITX_ASSERT(tree.exactMatchSearch(tempKey) == value); 89 | FCITX_ASSERT(map[tempKey] + 1 == value); 90 | foreach_count++; 91 | return true; 92 | }); 93 | 94 | FCITX_ASSERT(tree.size() == foreach_count); 95 | 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.12) 2 | project(libime VERSION 1.1.13) 3 | set(LibIME_VERSION ${PROJECT_VERSION}) 4 | 5 | set(REQUIRED_FCITX_VERSION 5.1.13) 6 | find_package(ECM 1.0 REQUIRED) 7 | set(CMAKE_MODULE_PATH ${ECM_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) 8 | 9 | ######################################### 10 | # Options 11 | ######################################### 12 | option(BUILD_SHARED_LIBS "Build library as shared libs" On) 13 | option(ENABLE_TEST "Build Test" On) 14 | option(ENABLE_COVERAGE "Build the project with gcov support (Need ENABLE_TEST=On)" Off) 15 | set(GCOV_TOOL "gcov" CACHE STRING "Path to gcov tool used by coverage.") 16 | option(ENABLE_DOC "Build doxygen" Off) 17 | option(ENABLE_DATA "Build data" On) 18 | option(ENABLE_TOOLS "Build tools" On) 19 | 20 | ######################################### 21 | # Dependency 22 | ######################################### 23 | 24 | include(GNUInstallDirs) 25 | include(FeatureSummary) 26 | include(GenerateExportHeader) 27 | include(CMakePackageConfigHelpers) 28 | include(ECMSetupVersion) 29 | include(ECMGenerateHeaders) 30 | include(ECMGeneratePkgConfigFile) 31 | include(ECMUninstallTarget) 32 | include(CheckLibraryExists) 33 | find_package(PkgConfig REQUIRED) 34 | 35 | pkg_check_modules(ZSTD REQUIRED IMPORTED_TARGET "libzstd") 36 | 37 | find_package(Fcitx5Utils ${REQUIRED_FCITX_VERSION} REQUIRED CONFIG) 38 | 39 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") 40 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") 41 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") 42 | 43 | include("${FCITX_INSTALL_CMAKECONFIG_DIR}/Fcitx5Utils/Fcitx5CompilerSettings.cmake") 44 | 45 | find_package(Boost 1.61 CONFIG REQUIRED COMPONENTS iostreams) 46 | 47 | set(LIBIME_INSTALL_PKGDATADIR "${CMAKE_INSTALL_FULL_DATADIR}/libime") 48 | set(LIBIME_INSTALL_LIBDATADIR "${CMAKE_INSTALL_FULL_LIBDIR}/libime") 49 | 50 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) 51 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 52 | 53 | if(ENABLE_TEST) 54 | enable_testing() 55 | add_subdirectory(test) 56 | 57 | if (ENABLE_COVERAGE) 58 | add_custom_target(coverage 59 | COMMAND "${CMAKE_CTEST_COMMAND}" 60 | COMMAND lcov --gcov-tool "${GCOV_TOOL}" --no-external --capture --directory ./ -b "${CMAKE_CURRENT_SOURCE_DIR}" --output-file coverage.info 61 | COMMAND lcov --remove coverage.info "*/kenlm/*" -o coverage.info 62 | COMMAND genhtml coverage.info --output-directory "coverage_pages" 63 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) 64 | endif() 65 | endif() 66 | 67 | add_subdirectory(src) 68 | 69 | if (ENABLE_TOOLS) 70 | add_subdirectory(tools) 71 | endif() 72 | 73 | if (ENABLE_DATA) 74 | add_subdirectory(data) 75 | endif() 76 | 77 | if (ENABLE_DOC) 78 | find_package(Doxygen REQUIRED) 79 | file(READ "${CMAKE_CURRENT_SOURCE_DIR}/.codedocs" FCITX_DOXYGEN_CONFIGURATION) 80 | configure_file( 81 | ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in 82 | ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile 83 | @ONLY) 84 | add_custom_target(doc 85 | COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile 86 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 87 | COMMENT "Generating API documentation with Doxygen" 88 | VERBATIM) 89 | endif() 90 | 91 | feature_summary(WHAT ALL FATAL_ON_MISSING_REQUIRED_PACKAGES) 92 | -------------------------------------------------------------------------------- /test/testpinyinime_unit.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "libime/core/historybigram.h" 12 | #include "libime/core/userlanguagemodel.h" 13 | #include "libime/pinyin/pinyincontext.h" 14 | #include "libime/pinyin/pinyincorrectionprofile.h" 15 | #include "libime/pinyin/pinyindictionary.h" 16 | #include "libime/pinyin/pinyinencoder.h" 17 | #include "libime/pinyin/pinyinime.h" 18 | #include "libime/pinyin/shuangpinprofile.h" 19 | #include "testdir.h" 20 | 21 | using namespace libime; 22 | 23 | int main() { 24 | fcitx::Log::setLogRule("libime=5"); 25 | libime::PinyinIME ime( 26 | std::make_unique(), 27 | std::make_unique(LIBIME_BINARY_DIR "/data/sc.lm")); 28 | ime.setNBest(2); 29 | ime.dict()->load(PinyinDictionary::SystemDict, 30 | LIBIME_BINARY_DIR "/data/sc.dict", 31 | PinyinDictFormat::Binary); 32 | PinyinFuzzyFlags flags = PinyinFuzzyFlag::Inner; 33 | ime.setFuzzyFlags(flags); 34 | ime.setScoreFilter(1.0F); 35 | ime.setShuangpinProfile( 36 | std::make_shared(ShuangpinBuiltinProfile::Xiaohe)); 37 | PinyinContext c(&ime); 38 | 39 | c.type("nihaozhongguo"); 40 | FCITX_ASSERT(c.candidates().size() == c.candidateSet().size()); 41 | FCITX_ASSERT(c.candidateSet().count("你好中国")); 42 | c.setCursor(5); 43 | FCITX_ASSERT(c.candidates().size() == c.candidateSet().size()); 44 | FCITX_ASSERT(c.candidatesToCursor().size() == 45 | c.candidatesToCursorSet().size()); 46 | FCITX_ASSERT(c.candidates().size() != c.candidatesToCursor().size()) 47 | << c.candidatesToCursorSet(); 48 | FCITX_ASSERT(!c.candidatesToCursorSet().count("你好中国")); 49 | FCITX_ASSERT(c.candidatesToCursorSet().count("你好")); 50 | c.setCursor(0); 51 | auto iter = std::find_if( 52 | c.candidates().begin(), c.candidates().end(), 53 | [](const auto &cand) { return cand.toString() == "你好中国"; }); 54 | FCITX_ASSERT(iter != c.candidates().end()); 55 | FCITX_ASSERT(!ime.dict()->lookupWord(PinyinDictionary::UserDict, 56 | "ni'hao'zhong'guo", "你好中国")); 57 | c.select(std::distance(c.candidates().begin(), iter)); 58 | c.learn(); 59 | FCITX_ASSERT(ime.model()->history().containsBigram("你", "好")); 60 | FCITX_ASSERT(ime.model()->history().containsBigram("好", "中国")); 61 | 62 | c.setUseShuangpin(true); 63 | 64 | c.type("bkqilb"); 65 | FCITX_ASSERT(c.candidates().size() == c.candidateSet().size()); 66 | FCITX_ASSERT(c.candidateSet().count("冰淇淋")); 67 | c.clear(); 68 | 69 | c.type("bkqiln"); 70 | FCITX_ASSERT(c.candidates().size() == c.candidateSet().size()); 71 | FCITX_ASSERT(!c.candidateSet().count("冰淇淋")); 72 | c.clear(); 73 | 74 | ime.setCorrectionProfile(std::make_shared( 75 | BuiltinPinyinCorrectionProfile::Qwerty)); 76 | ime.setShuangpinProfile(std::make_shared( 77 | ShuangpinBuiltinProfile::Xiaohe, ime.correctionProfile().get())); 78 | ime.setFuzzyFlags(flags | PinyinFuzzyFlag::Correction); 79 | 80 | c.type("bkqiln"); 81 | FCITX_ASSERT(c.candidates().size() == c.candidateSet().size()); 82 | FCITX_ASSERT(c.candidateSet().count("冰淇淋")); 83 | c.clear(); 84 | 85 | return 0; 86 | } 87 | -------------------------------------------------------------------------------- /test/testtableime_unit.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include "libime/core/languagemodel.h" 11 | #include "libime/core/userlanguagemodel.h" 12 | #include "libime/table/tablebaseddictionary.h" 13 | #include "libime/table/tablecontext.h" 14 | #include "libime/table/tableoptions.h" 15 | #include "testdir.h" 16 | 17 | using namespace libime; 18 | 19 | class TestLmResolver : public LanguageModelResolver { 20 | public: 21 | TestLmResolver(std::string_view path) : path_(path) {} 22 | 23 | protected: 24 | std::string 25 | languageModelFileNameForLanguage(const std::string &language) override { 26 | if (language == "zh_CN") { 27 | return path_; 28 | } 29 | return {}; 30 | } 31 | 32 | private: 33 | std::string path_; 34 | }; 35 | 36 | int main() { 37 | fcitx::Log::setLogRule("*=5"); 38 | TestLmResolver lmresolver(LIBIME_BINARY_DIR "/data/sc.lm"); 39 | auto lm = lmresolver.languageModelFileForLanguage("zh_CN"); 40 | TableBasedDictionary dict; 41 | UserLanguageModel model(lm); 42 | dict.load(LIBIME_BINARY_DIR "/data/wbx.main.dict"); 43 | TableOptions options; 44 | options.setLanguageCode("zh_CN"); 45 | options.setLearning(true); 46 | options.setAutoPhraseLength(-1); 47 | options.setAutoSelect(true); 48 | options.setAutoSelectLength(-1); 49 | options.setNoMatchAutoSelectLength(-1); 50 | options.setNoSortInputLength(2); 51 | options.setAutoRuleSet({}); 52 | options.setMatchingKey('z'); 53 | options.setOrderPolicy(OrderPolicy::Freq); 54 | dict.setTableOptions(options); 55 | TableContext c(dict, model); 56 | 57 | // This candidate does not exist in table, should not be selected. 58 | c.type("qfgo"); 59 | FCITX_ASSERT(!c.selected()); 60 | 61 | c.clear(); 62 | c.type("qfgop"); 63 | FCITX_ASSERT(!c.selected()); 64 | c.clear(); 65 | 66 | c.type("vb"); 67 | 68 | for (const auto &candidate : c.candidates()) { 69 | FCITX_INFO() << candidate.toString() << candidate.score(); 70 | } 71 | c.select(0); 72 | c.learn(); 73 | c.clear(); 74 | 75 | c.type("bbh"); 76 | 77 | for (const auto &candidate : c.candidates()) { 78 | FCITX_INFO() << candidate.toString() << candidate.score(); 79 | } 80 | c.select(0); 81 | c.learn(); 82 | c.clear(); 83 | c.learnAutoPhrase("好耶"); 84 | FCITX_ASSERT(c.dict().wordExists("vbbb", "好耶") == 85 | libime::PhraseFlag::Auto); 86 | c.learnAutoPhrase("好耶", {"ky", "cy"}); 87 | FCITX_ASSERT(c.dict().wordExists("kycy", "好耶") == 88 | libime::PhraseFlag::Auto); 89 | c.learnAutoPhrase("萌豚萌", {"mbsd", "tdk,", "mbsd"}); 90 | FCITX_ASSERT(c.dict().wordExists("mtmb", "萌豚萌") == 91 | libime::PhraseFlag::Auto); 92 | FCITX_ASSERT(c.dict().wordExists("tdmb", "豚萌") == 93 | libime::PhraseFlag::Auto); 94 | 95 | for (int i = 0; i < 2; i++) { 96 | c.type("vbbb"); 97 | 98 | for (const auto &candidate : c.candidates()) { 99 | FCITX_INFO() << candidate.toString() << candidate.score(); 100 | } 101 | c.select(1); 102 | c.learn(); 103 | c.clear(); 104 | FCITX_INFO() << "========================"; 105 | } 106 | 107 | return 0; 108 | } 109 | -------------------------------------------------------------------------------- /src/libime/table/tableoptions.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include "tableoptions.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace libime { 15 | 16 | // mostly from table.desc, certain options are not related to libime 17 | class TableOptionsPrivate { 18 | public: 19 | std::string languageCode_; 20 | std::string autoSelectRegex_; 21 | std::string noMatchAutoSelectRegex_; 22 | std::set endKey_; 23 | OrderPolicy orderPolicy_ = OrderPolicy::No; 24 | uint32_t noSortInputLength_ = 0; 25 | uint32_t pinyinKey_ = 0; 26 | uint32_t matchingKey_ = 0; 27 | int autoSelectLength_ = 0; 28 | int noMatchAutoSelectLength_ = 0; 29 | 30 | int autoPhraseLength_ = -1; 31 | int saveAutoPhraseAfter_ = -1; 32 | std::unordered_set autoRuleSet_; 33 | 34 | bool commitRawInput_ = false; 35 | bool exactMatch_ = false; 36 | bool learning_ = true; 37 | bool autoSelect_ = false; 38 | // show hint for word. 39 | bool prompt_ = false; 40 | // use prompt table 41 | bool displayCustomPromptSymbol_ = false; 42 | bool sortByCodeLength_ = true; 43 | }; 44 | 45 | TableOptions::TableOptions() : d_ptr(std::make_unique()) {} 46 | 47 | FCITX_DEFINE_DPTR_COPY_AND_DEFAULT_DTOR_AND_MOVE(TableOptions) 48 | 49 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, OrderPolicy, orderPolicy, 50 | setOrderPolicy); 51 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, uint32_t, noSortInputLength, 52 | setNoSortInputLength); 53 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, bool, autoSelect, setAutoSelect); 54 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, int, autoSelectLength, 55 | setAutoSelectLength); 56 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, std::string, autoSelectRegex, 57 | setAutoSelectRegex); 58 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, int, noMatchAutoSelectLength, 59 | setNoMatchAutoSelectLength); 60 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, std::string, noMatchAutoSelectRegex, 61 | setNoMatchAutoSelectRegex); 62 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, bool, commitRawInput, 63 | setCommitRawInput); 64 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, std::set, endKey, 65 | setEndKey); 66 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, uint32_t, matchingKey, 67 | setMatchingKey); 68 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, bool, exactMatch, setExactMatch); 69 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, bool, learning, setLearning); 70 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, int, autoPhraseLength, 71 | setAutoPhraseLength); 72 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, int, saveAutoPhraseAfter, 73 | setSaveAutoPhraseAfter); 74 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, std::string, languageCode, 75 | setLanguageCode); 76 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, std::unordered_set, 77 | autoRuleSet, setAutoRuleSet); 78 | FCITX_DEFINE_PROPERTY_PRIVATE(TableOptions, bool, sortByCodeLength, 79 | setSortByCodeLength); 80 | } // namespace libime 81 | -------------------------------------------------------------------------------- /data/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(LM_TAR "lm_sc.arpa-20250113.tar.zst") 2 | # Use our own file server for convenience. 3 | set(LM_URL "https://download.fcitx-im.org/data/${LM_TAR}") 4 | 5 | fcitx5_download(lm-download ${LM_URL} ${LM_TAR} 6 | ee83ecf20d52e8bccdba4cf6cd57183d53c257713a5eb77ee3a63d50fc3796dd) 7 | fcitx5_extract(lm-extract ${LM_TAR} DEPENDS lm-download 8 | OUTPUT lm_sc.arpa) 9 | 10 | set(LM_SRC "${CMAKE_CURRENT_BINARY_DIR}/lm_sc.arpa") 11 | set(LM_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/sc.lm") 12 | set(LM_PREDICT_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/sc.lm.predict") 13 | add_custom_command( 14 | OUTPUT "${LM_OUTPUT}" 15 | DEPENDS "${LM_SRC}" LibIME::slm_build_binary 16 | COMMAND LibIME::slm_build_binary -s -a 22 -q 8 trie "${LM_SRC}" "${LM_OUTPUT}") 17 | add_custom_target(lm ALL DEPENDS "${LM_OUTPUT}") 18 | 19 | add_custom_command( 20 | OUTPUT "${LM_PREDICT_OUTPUT}" 21 | DEPENDS "${LM_SRC}" lm LibIME::prediction 22 | COMMAND LibIME::prediction "${LM_OUTPUT}" "${LM_SRC}" "${LM_PREDICT_OUTPUT}") 23 | add_custom_target(lm-predict ALL DEPENDS "${LM_PREDICT_OUTPUT}") 24 | 25 | install(FILES "${LM_OUTPUT}" RENAME zh_CN.lm DESTINATION "${LIBIME_INSTALL_LIBDATADIR}") 26 | install(FILES "${LM_PREDICT_OUTPUT}" RENAME zh_CN.lm.predict DESTINATION "${LIBIME_INSTALL_LIBDATADIR}") 27 | 28 | set(DICT_TAR "dict-20250327.tar.zst") 29 | set(DICT_URL "https://download.fcitx-im.org/data/${DICT_TAR}") 30 | 31 | fcitx5_download(dict-download ${DICT_URL} ${DICT_TAR} 32 | 7ca6be4754c0d4c27ba7702c0dce651659bd2ca1faa5cbf2848d81a0053c8c13) 33 | fcitx5_extract(dict-extract ${DICT_TAR} DEPENDS dict-download 34 | OUTPUT dict_sc.txt dict_extb.txt) 35 | 36 | set(DICT_SRC "${CMAKE_CURRENT_BINARY_DIR}/dict_sc.txt") 37 | set(DICT_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/sc.dict") 38 | add_custom_command( 39 | OUTPUT "${DICT_OUTPUT}" 40 | DEPENDS "${DICT_SRC}" LibIME::pinyindict 41 | COMMAND LibIME::pinyindict "${DICT_SRC}" "${DICT_OUTPUT}") 42 | 43 | set(DICT_EXTB_SRC "${CMAKE_CURRENT_BINARY_DIR}/dict_extb.txt") 44 | set(DICT_EXTB_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/extb.dict") 45 | add_custom_command( 46 | OUTPUT "${DICT_EXTB_OUTPUT}" 47 | DEPENDS "${DICT_EXTB_SRC}" LibIME::pinyindict 48 | COMMAND LibIME::pinyindict "${DICT_EXTB_SRC}" "${DICT_EXTB_OUTPUT}") 49 | add_custom_target(dict ALL DEPENDS "${DICT_OUTPUT}" "${DICT_EXTB_OUTPUT}") 50 | install(FILES "${DICT_OUTPUT}" "${DICT_EXTB_OUTPUT}" DESTINATION "${LIBIME_INSTALL_PKGDATADIR}") 51 | 52 | set(TABLE_DICT_TAR "table-20240108.tar.zst") 53 | set(TABLE_DICT_URL "https://download.fcitx-im.org/data/${TABLE_DICT_TAR}") 54 | set(TABLE_TXT_FILES db.txt erbi.txt qxm.txt wanfeng.txt 55 | wbpy.txt wbx.txt zrm.txt cj.txt) 56 | fcitx5_download(table-dict-download ${TABLE_DICT_URL} ${TABLE_DICT_TAR} 57 | 3e9d87b04a393f131723472c8eaa860dd23c378a3d4f6a9005513b2a95b3614b) 58 | fcitx5_extract(table-dict-extract ${TABLE_DICT_TAR} DEPENDS table-dict-download 59 | OUTPUT ${TABLE_TXT_FILES}) 60 | 61 | set(TABLE_DICT_FILES) 62 | foreach(TABLE_TXT_FILE ${TABLE_TXT_FILES}) 63 | string(REPLACE .txt .main.dict TABLE_DICT_FILE ${TABLE_TXT_FILE}) 64 | add_custom_command(OUTPUT ${TABLE_DICT_FILE} 65 | DEPENDS ${TABLE_TXT_FILE} LibIME::tabledict 66 | COMMAND LibIME::tabledict ${TABLE_TXT_FILE} ${CMAKE_CURRENT_BINARY_DIR}/${TABLE_DICT_FILE}) 67 | list(APPEND TABLE_DICT_FILES ${CMAKE_CURRENT_BINARY_DIR}/${TABLE_DICT_FILE}) 68 | endforeach() 69 | 70 | add_custom_target(table-dict ALL DEPENDS ${TABLE_DICT_FILES}) 71 | install(FILES ${TABLE_DICT_FILES} DESTINATION "${LIBIME_INSTALL_PKGDATADIR}") 72 | -------------------------------------------------------------------------------- /test/testtableime.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "libime/core/languagemodel.h" 14 | #include "libime/core/userlanguagemodel.h" 15 | #include "libime/table/tablebaseddictionary.h" 16 | #include "libime/table/tablecontext.h" 17 | #include "libime/table/tabledecoder.h" 18 | #include "libime/table/tableoptions.h" 19 | #include "testdir.h" 20 | #include "testutils.h" 21 | 22 | using namespace libime; 23 | 24 | class TestLmResolver : public LanguageModelResolver { 25 | public: 26 | TestLmResolver(std::string_view path) : path_(path) {} 27 | 28 | protected: 29 | std::string 30 | languageModelFileNameForLanguage(const std::string &language) override { 31 | if (language == "zh_CN") { 32 | return path_; 33 | } 34 | return {}; 35 | } 36 | 37 | private: 38 | std::string path_; 39 | }; 40 | 41 | int main() { 42 | fcitx::Log::setLogRule("*=5"); 43 | TestLmResolver lmresolver(LIBIME_BINARY_DIR "/data/sc.lm"); 44 | auto lm = lmresolver.languageModelFileForLanguage("zh_CN"); 45 | TableBasedDictionary dict; 46 | UserLanguageModel model(lm); 47 | dict.load(LIBIME_BINARY_DIR "/data/wbpy.main.dict"); 48 | TableOptions options; 49 | options.setLanguageCode("zh_CN"); 50 | options.setAutoSelect(true); 51 | options.setAutoSelectLength(-1); 52 | options.setNoMatchAutoSelectLength(-1); 53 | options.setNoSortInputLength(2); 54 | options.setAutoRuleSet({"e2"}); 55 | options.setMatchingKey('z'); 56 | options.setOrderPolicy(OrderPolicy::Freq); 57 | dict.setTableOptions(options); 58 | TableContext c(dict, model); 59 | auto printTime = [](int t) { 60 | std::cout << "Time: " << t / 1000000.0 << " ms" << std::endl; 61 | }; 62 | 63 | std::string word; 64 | while (std::cin >> word) { 65 | bool printAll = false; 66 | ScopedNanoTimer t(printTime); 67 | if (word == "back") { 68 | c.backspace(); 69 | } else if (word == "reset") { 70 | c.clear(); 71 | } else if (word.size() == 1 && ('0' <= word[0] && word[0] <= '9')) { 72 | size_t idx; 73 | if (word[0] == '0') { 74 | idx = 9; 75 | } else { 76 | idx = word[0] - '1'; 77 | } 78 | if (c.candidates().size() >= idx) { 79 | c.select(idx); 80 | } 81 | } else if (word.size() == 1 && c.isValidInput(word[0])) { 82 | c.type(word); 83 | } else if (word == "all") { 84 | printAll = true; 85 | } else if (word == "commit") { 86 | c.autoSelect(); 87 | c.learn(); 88 | c.clear(); 89 | } 90 | 91 | size_t count = 1; 92 | std::cout << "Preedit: " << c.preedit() << std::endl; 93 | for (const auto &candidate : c.candidates()) { 94 | std::cout << (count % 10) << ": "; 95 | for (const auto *node : candidate.sentence()) { 96 | std::cout 97 | << node->word() << " " 98 | << static_cast(node)->code(); 99 | } 100 | std::cout << " " << candidate.score() << std::endl; 101 | count++; 102 | if (!printAll && count > 10) { 103 | break; 104 | } 105 | } 106 | } 107 | 108 | return 0; 109 | } 110 | -------------------------------------------------------------------------------- /src/libime/core/lrucache.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_LRU_H_ 7 | #define _FCITX_LIBIME_CORE_LRU_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace libime { 16 | 17 | // A simple LRU cache. 18 | template > 19 | class LRUCache { 20 | public: 21 | using key_type = K; 22 | using value_type = V; 23 | // we use boost's unordered_map is for the heterogeneous lookup 24 | // functionality. 25 | using dict_type = 26 | boost::unordered_map::iterator>, 27 | H>; 28 | 29 | LRUCache(size_t sz = 80) : sz_(sz) {} 30 | 31 | size_t size() const { return dict_.size(); } 32 | 33 | size_t capacity() const { return sz_; } 34 | 35 | bool empty() const { return dict_.empty(); } 36 | 37 | bool contains(const key_type &key) { 38 | return dict_.find(key) != dict_.end(); 39 | } 40 | 41 | template 42 | value_type *insert(const key_type &key, Args &&...args) { 43 | auto iter = dict_.find(key); 44 | if (iter == dict_.end()) { 45 | if (size() >= sz_) { 46 | evict(); 47 | } 48 | 49 | order_.push_front(key); 50 | auto r = dict_.emplace( 51 | key, std::make_pair(value_type(std::forward(args)...), 52 | order_.begin())); 53 | return &r.first->second.first; 54 | } 55 | return nullptr; 56 | } 57 | 58 | void erase(const key_type &key) { 59 | auto i = dict_.find(key); 60 | if (i == dict_.end()) { 61 | return; 62 | } 63 | order_.erase(i->second.second); 64 | dict_.erase(i); 65 | } 66 | 67 | // find will refresh the item, so it is not const. 68 | value_type *find(const key_type &key) { 69 | // lookup value in the cache 70 | auto i = dict_.find(key); 71 | return find_helper(i); 72 | } 73 | 74 | template 76 | value_type *find(CompatibleKey const &k, CompatibleHash const &h, 77 | CompatiblePredicate const &p) { 78 | return find_helper(dict_.find(k, h, p)); 79 | } 80 | 81 | void clear() { 82 | dict_.clear(); 83 | order_.clear(); 84 | } 85 | 86 | private: 87 | void evict() { 88 | // evict item from the end of most recently used list 89 | auto i = std::prev(order_.end()); 90 | dict_.erase(*i); 91 | order_.erase(i); 92 | } 93 | 94 | value_type *find_helper(typename dict_type::iterator i) { 95 | if (i == dict_.end()) { 96 | // value not in cache 97 | return nullptr; 98 | } 99 | 100 | // return the value, but first update its place in the most 101 | // recently used list 102 | auto j = i->second.second; 103 | if (j != order_.begin()) { 104 | order_.splice(order_.begin(), order_, j, std::next(j)); 105 | j = order_.begin(); 106 | i->second.second = j; 107 | } 108 | return &i->second.first; 109 | } 110 | 111 | dict_type dict_; 112 | std::list order_; 113 | // Maximum size of the cache. 114 | size_t sz_; 115 | }; 116 | } // namespace libime 117 | 118 | #endif // _FCITX_LIBIME_CORE_LRU_H_ 119 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyincorrectionprofile.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2024-2024 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include "pinyincorrectionprofile.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "pinyindata.h" 15 | #include "pinyinencoder.h" 16 | 17 | namespace libime { 18 | 19 | namespace { 20 | 21 | /* 22 | * Helper function to create mapping based on keyboard rows. 23 | * Function assume that the key can only be corrected to the key adjcent to it. 24 | */ 25 | std::unordered_map> 26 | mappingFromRows(const std::vector &rows) { 27 | std::unordered_map> result; 28 | for (const auto &row : rows) { 29 | for (size_t i = 0; i < row.size(); i++) { 30 | std::vector items; 31 | if (i > 0) { 32 | items.push_back(row[i - 1]); 33 | } 34 | if (i + 1 < row.size()) { 35 | items.push_back(row[i + 1]); 36 | } 37 | result[row[i]] = std::move(items); 38 | } 39 | } 40 | return result; 41 | } 42 | 43 | std::unordered_map> 44 | getProfileMapping(BuiltinPinyinCorrectionProfile profile) { 45 | switch (profile) { 46 | case BuiltinPinyinCorrectionProfile::Qwerty: 47 | return mappingFromRows({"qwertyuiop", "asdfghjkl", "zxcvbnm"}); 48 | } 49 | 50 | return {}; 51 | } 52 | } // namespace 53 | 54 | class PinyinCorrectionProfilePrivate { 55 | public: 56 | PinyinMap pinyinMap_; 57 | std::unordered_map> correctionMap_; 58 | }; 59 | 60 | PinyinCorrectionProfile::PinyinCorrectionProfile( 61 | BuiltinPinyinCorrectionProfile profile) 62 | : PinyinCorrectionProfile(getProfileMapping(profile)) {} 63 | 64 | PinyinCorrectionProfile::PinyinCorrectionProfile( 65 | const std::unordered_map> &mapping) 66 | : d_ptr(std::make_unique()) { 67 | FCITX_D(); 68 | d->correctionMap_ = mapping; 69 | // Fill with the original pinyin map. 70 | d->pinyinMap_ = getPinyinMapV2(); 71 | if (mapping.empty()) { 72 | return; 73 | } 74 | // Re-map all entry with the correction mapping. 75 | std::vector newEntries; 76 | for (const auto &item : d->pinyinMap_) { 77 | for (size_t i = 0; i < item.pinyin().size(); i++) { 78 | auto chr = item.pinyin()[i]; 79 | auto swap = mapping.find(chr); 80 | if (swap == mapping.end() || swap->second.empty()) { 81 | continue; 82 | } 83 | auto newEntry = item.pinyin(); 84 | for (auto sub : swap->second) { 85 | newEntry[i] = sub; 86 | newEntries.push_back( 87 | PinyinEntry(newEntry.data(), item.initial(), item.final(), 88 | item.flags() | PinyinFuzzyFlag::Correction)); 89 | newEntry[i] = chr; 90 | } 91 | } 92 | } 93 | for (const auto &newEntry : newEntries) { 94 | d->pinyinMap_.insert(newEntry); 95 | } 96 | } 97 | 98 | PinyinCorrectionProfile::~PinyinCorrectionProfile() = default; 99 | 100 | const PinyinMap &PinyinCorrectionProfile::pinyinMap() const { 101 | FCITX_D(); 102 | return d->pinyinMap_; 103 | } 104 | 105 | const std::unordered_map> & 106 | PinyinCorrectionProfile::correctionMap() const { 107 | FCITX_D(); 108 | return d->correctionMap_; 109 | } 110 | } // namespace libime 111 | -------------------------------------------------------------------------------- /src/libime/table/tablebaseddictionary_p.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2020~2020 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | * 6 | */ 7 | #ifndef _LIBIME_LIBIME_TABLE_TABLEBASEDDICTIONARY_P_H_ 8 | #define _LIBIME_LIBIME_TABLE_TABLEBASEDDICTIONARY_P_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "libime/core/datrie.h" 23 | #include "autophrasedict.h" 24 | #include "constants.h" 25 | #include "tablebaseddictionary.h" 26 | #include "tableoptions.h" 27 | #include "tablerule.h" 28 | 29 | namespace libime { 30 | class TableBasedDictionaryPrivate 31 | : public fcitx::QPtrHolder { 32 | public: 33 | std::vector rules_; 34 | std::set inputCode_; 35 | std::set ignoreChars_; 36 | uint32_t pinyinKey_ = 0; 37 | uint32_t promptKey_ = 0; 38 | uint32_t phraseKey_ = 0; 39 | uint32_t codeLength_ = 0; 40 | 41 | DATrie phraseTrie_; // base dictionary 42 | uint32_t phraseTrieIndex_ = 0; 43 | 44 | DATrie userTrie_; // user dictionary 45 | uint32_t userTrieIndex_ = 0; 46 | 47 | DATrie deletionTrie_; // mask over base dictionary 48 | 49 | std::vector, uint32_t>> extraTries_; 50 | 51 | DATrie singleCharTrie_; // reverse lookup from single character 52 | DATrie singleCharConstTrie_; // lookup char for new phrase 53 | DATrie singleCharLookupTrie_; 54 | DATrie promptTrie_; // lookup for prompt; 55 | AutoPhraseDict autoPhraseDict_{TABLE_AUTOPHRASE_SIZE}; 56 | TableOptions options_; 57 | std::optional autoSelectRegex_; 58 | std::optional noMatchAutoSelectRegex_; 59 | 60 | TableBasedDictionaryPrivate(TableBasedDictionary *q) : QPtrHolder(q) {} 61 | 62 | FCITX_DEFINE_SIGNAL_PRIVATE(TableBasedDictionary, tableOptionsChanged); 63 | 64 | std::pair *, uint32_t *> trieByFlag(PhraseFlag flag); 65 | 66 | std::pair *, const uint32_t *> 67 | trieByFlag(PhraseFlag flag) const; 68 | 69 | bool insert(std::string_view key, std::string_view value, PhraseFlag flag); 70 | 71 | bool matchTrie(std::string_view code, TableMatchMode mode, PhraseFlag flag, 72 | const TableMatchCallback &callback) const; 73 | bool matchTrie(const DATrie &trie, uint32_t indexOffset, 74 | std::string_view code, TableMatchMode mode, PhraseFlag flag, 75 | const TableMatchCallback &callback) const; 76 | 77 | void reset(); 78 | bool validate() const; 79 | 80 | void loadBinary(std::istream &in); 81 | void loadUserBinary(std::istream &in, uint32_t version); 82 | 83 | bool validateKeyValue(std::string_view key, std::string_view value, 84 | PhraseFlag flag) const; 85 | 86 | FCITX_NODISCARD 87 | std::optional> 88 | parseDataLine(std::string_view buf, bool user); 89 | void insertDataLine(std::string_view buf, bool user); 90 | bool matchWordsInternal(std::string_view code, TableMatchMode mode, 91 | bool onlyChecking, 92 | const TableMatchCallback &callback) const; 93 | 94 | bool validateHints(std::vector &hints, 95 | const TableRule &rule) const; 96 | 97 | bool hasExactMatchInPhraseTrie(std::string_view entry) const; 98 | }; 99 | 100 | } // namespace libime 101 | 102 | #endif // _LIBIME_LIBIME_TABLE_TABLEBASEDDICTIONARY_P_H_ 103 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyindictionary.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINDICTIONARY_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINDICTIONARY_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | namespace libime { 26 | 27 | enum class PinyinDictFormat { Text, Binary }; 28 | 29 | class PinyinDictionaryPrivate; 30 | 31 | using PinyinMatchCallback = 32 | std::function; 33 | 34 | using PinyinTrie = typename TrieDictionary::TrieType; 35 | 36 | /** 37 | * Flag for a given sub dictionary in PinyinDictionary. 38 | */ 39 | enum class PinyinDictFlag { 40 | /// No Flag 41 | NoFlag = 0, 42 | /// The dictionary can only be used to search the whole match sentence 43 | FullMatch = (1 << 1), 44 | /** 45 | * The dictionary is disabled and should be skipped for matching. 46 | * @since 1.0.10 47 | */ 48 | Disabled = (1 << 2) 49 | }; 50 | 51 | using PinyinDictFlags = fcitx::Flags; 52 | 53 | /** 54 | * PinyinDictionary is a set of dictionaries for Pinyin. 55 | */ 56 | class LIBIMEPINYIN_EXPORT PinyinDictionary : public TrieDictionary { 57 | public: 58 | explicit PinyinDictionary(); 59 | ~PinyinDictionary(); 60 | 61 | // Load dicitonary for a specific dict. 62 | void load(size_t idx, std::istream &in, PinyinDictFormat format); 63 | void load(size_t idx, const char *filename, PinyinDictFormat format); 64 | 65 | // Match the word by encoded pinyin. 66 | void matchWords(const char *data, size_t size, 67 | PinyinMatchCallback callback) const; 68 | // Match the word by encoded pinyin. 69 | void matchWordsPrefix(const char *data, size_t size, 70 | PinyinMatchCallback callback) const; 71 | 72 | void save(size_t idx, const char *filename, PinyinDictFormat format); 73 | void save(size_t idx, std::ostream &out, PinyinDictFormat format); 74 | 75 | void addWord(size_t idx, std::string_view fullPinyin, 76 | std::string_view hanzi, float cost = 0.0F); 77 | bool removeWord(size_t idx, std::string_view fullPinyin, 78 | std::string_view hanzi); 79 | std::optional lookupWord(size_t idx, std::string_view fullPinyin, 80 | std::string_view hanzi) const; 81 | 82 | void setFlags(size_t idx, PinyinDictFlags flags); 83 | 84 | /** 85 | * Load text format into the Trie 86 | * 87 | * @param in input stream 88 | * @param format dict format. 89 | * @see TrieDictionary::setTrie 90 | * @since 1.1.7 91 | */ 92 | static TrieType load(std::istream &in, PinyinDictFormat format); 93 | 94 | using dictionaryChanged = TrieDictionary::dictionaryChanged; 95 | 96 | protected: 97 | void 98 | matchPrefixImpl(const SegmentGraph &graph, 99 | const GraphMatchCallback &callback, 100 | const std::unordered_set &ignore, 101 | void *helper) const override; 102 | 103 | private: 104 | void loadText(size_t idx, std::istream &in); 105 | void loadBinary(size_t idx, std::istream &in); 106 | void saveText(size_t idx, std::ostream &out); 107 | 108 | std::unique_ptr d_ptr; 109 | FCITX_DECLARE_PRIVATE(PinyinDictionary); 110 | }; 111 | } // namespace libime 112 | 113 | #endif // _FCITX_LIBIME_PINYIN_PINYINDICTIONARY_H_ 114 | -------------------------------------------------------------------------------- /test/testpinyindictionary.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "libime/pinyin/pinyindictionary.h" 14 | #include "libime/pinyin/pinyinencoder.h" 15 | #include "testdir.h" 16 | 17 | using namespace libime; 18 | 19 | constexpr char testPinyin1[] = "ni'hui"; 20 | constexpr char testHanzi1[] = "倪辉"; 21 | 22 | constexpr char testPinyin2[] = "xiao'qi'e"; 23 | constexpr char testHanzi2[] = "小企鹅"; 24 | 25 | bool searchWord(const PinyinDictionary &dict, const char *data, size_t size, 26 | std::string_view expectedPinyin, 27 | std::string_view expectedHanzi) { 28 | bool seenWord = false; 29 | dict.matchWords(data, size, 30 | [&seenWord, expectedHanzi, 31 | expectedPinyin](std::string_view encodedPinyin, 32 | std::string_view hanzi, float cost) { 33 | std::cout 34 | << PinyinEncoder::decodeFullPinyin(encodedPinyin) 35 | << " " << hanzi << " " << cost << std::endl; 36 | if (hanzi == expectedHanzi && 37 | PinyinEncoder::decodeFullPinyin(encodedPinyin) == 38 | expectedPinyin) { 39 | seenWord = true; 40 | } 41 | return true; 42 | }); 43 | return seenWord; 44 | } 45 | 46 | bool searchWordPrefix(const PinyinDictionary &dict, const char *data, 47 | size_t size, std::string_view expectedPinyin, 48 | std::string_view expectedHanzi) { 49 | bool seenWord = false; 50 | dict.matchWordsPrefix( 51 | data, size, 52 | [&seenWord, expectedHanzi, 53 | expectedPinyin](std::string_view encodedPinyin, std::string_view hanzi, 54 | float cost) { 55 | std::cout << PinyinEncoder::decodeFullPinyin(encodedPinyin) << " " 56 | << hanzi << " " << cost << std::endl; 57 | if (hanzi == expectedHanzi && 58 | PinyinEncoder::decodeFullPinyin(encodedPinyin) == 59 | expectedPinyin) { 60 | seenWord = true; 61 | } 62 | return true; 63 | }); 64 | return seenWord; 65 | } 66 | 67 | int main() { 68 | PinyinDictionary dict; 69 | dict.load(PinyinDictionary::SystemDict, 70 | LIBIME_BINARY_DIR "/data/dict_sc.txt", PinyinDictFormat::Text); 71 | 72 | // add a manual dict 73 | std::stringstream ss; 74 | ss << testHanzi1 << " " << testPinyin1 << " 0.0" << std::endl 75 | << testHanzi2 << " " << testPinyin2 << std::endl; 76 | dict.load(PinyinDictionary::UserDict, ss, PinyinDictFormat::Text); 77 | // dict.dump(std::cout); 78 | char c[] = {static_cast(PinyinInitial::N), 0, 79 | static_cast(PinyinInitial::H), 0}; 80 | char c2[] = {static_cast(PinyinInitial::X), 0, 81 | static_cast(PinyinInitial::Q), 0, 82 | static_cast(PinyinInitial::Zero), 0}; 83 | 84 | FCITX_ASSERT(searchWord(dict, c, sizeof(c), testPinyin1, testHanzi1)); 85 | FCITX_ASSERT(searchWord(dict, c2, sizeof(c2), testPinyin2, testHanzi2)); 86 | // Search x q as prefix and see we we can find xiao qi e. 87 | FCITX_ASSERT(searchWordPrefix(dict, c2, 4, testPinyin2, testHanzi2)); 88 | 89 | // Remove the word and check again. 90 | dict.removeWord(PinyinDictionary::UserDict, testPinyin1, testHanzi1); 91 | FCITX_ASSERT(!searchWord(dict, c, sizeof(c), testPinyin1, testHanzi1)); 92 | FCITX_ASSERT(!searchWordPrefix(dict, c, 2, testPinyin1, testHanzi1)); 93 | 94 | dict.save(0, LIBIME_BINARY_DIR "/test/testpinyindictionary.dict", 95 | PinyinDictFormat::Binary); 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /src/libime/table/tablecontext.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_TABLE_TABLECONTEXT_H_ 7 | #define _FCITX_LIBIME_TABLE_TABLECONTEXT_H_ 8 | 9 | /// \file 10 | /// \brief Class provide input method support for table-based ones, like wubi. 11 | 12 | // Workaround a boost missing include bug. 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "libime/core/lattice.h" 29 | 30 | namespace libime { 31 | 32 | class TableContextPrivate; 33 | class TableBasedDictionary; 34 | class UserLanguageModel; 35 | 36 | /// \brief Input context for table input method. 37 | class LIBIMETABLE_EXPORT TableContext : public InputBuffer { 38 | public: 39 | using CandidateRange = boost::any_range; 41 | 42 | TableContext(TableBasedDictionary &dict, UserLanguageModel &model); 43 | virtual ~TableContext(); 44 | 45 | void erase(size_t from, size_t to) override; 46 | 47 | void select(size_t idx); 48 | 49 | bool isValidInput(uint32_t c) const; 50 | 51 | CandidateRange candidates() const; 52 | 53 | std::string candidateHint(size_t idx, bool custom = false) const; 54 | 55 | static std::string code(const SentenceResult &sentence); 56 | static PhraseFlag flag(const SentenceResult &sentence); 57 | static bool isPinyin(const SentenceResult &sentence); 58 | static bool isAuto(const SentenceResult &sentence); 59 | 60 | bool selected() const; 61 | size_t selectedSize() const; 62 | std::tuple selectedSegment(size_t idx) const; 63 | std::string selectedCode(size_t idx) const; 64 | size_t selectedSegmentLength(size_t idx) const; 65 | 66 | /// \brief A simple preedit implementation. 67 | /// The value is derived from function selectedSegment and currentCode. 68 | std::string preedit() const; 69 | 70 | /// \brief Current unselected code. 71 | const std::string ¤tCode() const; 72 | 73 | /// \brief The concatenation of all selectedSegment where bool == true. 74 | std::string selectedSentence() const; 75 | size_t selectedLength() const; 76 | 77 | /// \brief Save the current selected text. 78 | void learn(); 79 | 80 | /// \brief Save the last selected text. 81 | void learnLast(); 82 | 83 | /// \brief Learn auto word from string. 84 | /// 85 | /// Depending on the tableOptions, it will try to learn the word in history. 86 | void learnAutoPhrase(std::string_view history); 87 | 88 | /// \brief Learn auto word from string 89 | /// 90 | /// Similar to its overload, but with hint of given code. 91 | void learnAutoPhrase(std::string_view history, 92 | const std::vector &hints); 93 | 94 | const TableBasedDictionary &dict() const; 95 | TableBasedDictionary &mutableDict(); 96 | 97 | const UserLanguageModel &model() const; 98 | UserLanguageModel &mutableModel(); 99 | void autoSelect(); 100 | 101 | /// Set the auto select index, usually, this is the candidate cursor index. 102 | /// 103 | /// \since 1.0.12 104 | void setAutoSelectIndex(size_t index); 105 | 106 | protected: 107 | bool typeImpl(const char *s, size_t length) override; 108 | 109 | private: 110 | void update(); 111 | bool typeOneChar(std::string_view chr); 112 | 113 | std::unique_ptr d_ptr; 114 | FCITX_DECLARE_PRIVATE(TableContext); 115 | }; 116 | } // namespace libime 117 | 118 | #endif // _FCITX_LIBIME_TABLE_TABLECONTEXT_H_ 119 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: LLVM 4 | AccessModifierOffset: -4 5 | ConstructorInitializerIndentWidth: 4 6 | AlignEscapedNewlinesLeft: false 7 | AlignTrailingComments: true 8 | AllowAllParametersOfDeclarationOnNextLine: true 9 | AllowShortBlocksOnASingleLine: false 10 | AllowShortIfStatementsOnASingleLine: false 11 | AllowShortLoopsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: All 13 | AlwaysBreakTemplateDeclarations: true 14 | AlwaysBreakBeforeMultilineStrings: false 15 | BreakBeforeBinaryOperators: false 16 | BreakBeforeTernaryOperators: true 17 | BreakConstructorInitializersBeforeComma: false 18 | BinPackParameters: true 19 | ColumnLimit: 80 20 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 21 | DerivePointerAlignment: false 22 | ExperimentalAutoDetectBinPacking: false 23 | IndentCaseLabels: false 24 | IndentWrappedFunctionNames: false 25 | IndentFunctionDeclarationAfterType: false 26 | MaxEmptyLinesToKeep: 1 27 | KeepEmptyLinesAtTheStartOfBlocks: true 28 | NamespaceIndentation: None 29 | ObjCSpaceAfterProperty: false 30 | ObjCSpaceBeforeProtocolList: true 31 | PenaltyBreakBeforeFirstCallParameter: 19 32 | PenaltyBreakComment: 300 33 | PenaltyBreakString: 1000 34 | PenaltyBreakFirstLessLess: 120 35 | PenaltyExcessCharacter: 1000000 36 | PenaltyReturnTypeOnItsOwnLine: 60 37 | PointerAlignment: Right 38 | SpacesBeforeTrailingComments: 1 39 | Cpp11BracedListStyle: true 40 | Standard: Cpp11 41 | IndentWidth: 4 42 | TabWidth: 4 43 | UseTab: Never 44 | LineEnding: LF 45 | BreakBeforeBraces: Attach 46 | SpacesInParentheses: false 47 | SpacesInAngles: false 48 | SpaceInEmptyParentheses: false 49 | SpacesInCStyleCastParentheses: false 50 | SpacesInContainerLiterals: true 51 | SpaceBeforeAssignmentOperators: true 52 | ContinuationIndentWidth: 4 53 | CommentPragmas: '^ IWYU pragma:' 54 | ForEachMacros: [ Q_FOREACH, BOOST_FOREACH ] 55 | AttributeMacros: [ LIBIMECORE_EXPORT, LIBIMETABLE_EXPORT, LIBIMEPINYIN_EXPORT ] 56 | SpaceBeforeParens: ControlStatements 57 | DisableFormat: false 58 | SortIncludes: true 59 | IncludeCategories: 60 | # C system headers. 61 | - Regex: '^[<"](aio|arpa/inet|assert|complex|cpio|ctype|curses|dirent|dlfcn|errno|fcntl|fenv|float|fmtmsg|fnmatch|ftw|glob|grp|iconv|inttypes|iso646|langinfo|libgen|limits|locale|math|monetary|mqueue|ndbm|netdb|net/if|netinet/in|netinet/tcp|nl_types|poll|pthread|pwd|regex|sched|search|semaphore|setjmp|signal|spawn|stdalign|stdarg|stdatomic|stdbool|stddef|stdint|stdio|stdlib|stdnoreturn|string|strings|stropts|sys/ipc|syslog|sys/mman|sys/msg|sys/resource|sys/select|sys/sem|sys/shm|sys/socket|sys/stat|sys/statvfs|sys/time|sys/times|sys/types|sys/uio|sys/un|sys/utsname|sys/wait|tar|term|termios|tgmath|threads|time|trace|uchar|ulimit|uncntrl|unistd|utime|utmpx|wchar|wctype|wordexp)\.h[">]$' 62 | Priority: 20 63 | # C++ system headers (as of C++23). 64 | - Regex: '^[<"](algorithm|any|array|atomic|barrier|bit|bitset|cassert|ccomplex|cctype|cerrno|cfenv|cfloat|charconv|chrono|cinttypes|ciso646|climits|clocale|cmath|codecvt|compare|complex|concepts|condition_variable|coroutine|csetjmp|csignal|cstdalign|cstdarg|cstdbool|cstddef|cstdint|cstdio|cstdlib|cstring|ctgmath|ctime|cuchar|cwchar|cwctype|deque|exception|execution|expected|filesystem|flat_map|flat_set|format|forward_list|fstream|functional|future|generator|initializer_list|iomanip|ios|iosfwd|iostream|istream|iterator|latch|limits|list|locale|map|mdspan|memory|memory_resource|mutex|new|numbers|numeric|optional|ostream|print|queue|random|ranges|ratio|regex|scoped_allocator|semaphore|set|shared_mutex|source_location|span|spanstream|sstream|stack|stacktrace|stdexcept|stdfloat|stop_token|streambuf|string|string_view|strstream|syncstream|system_error|thread|tuple|type_traits|typeindex|typeinfo|unordered_map|unordered_set|utility|valarray|variant|vector|version)[">]$' 65 | Priority: 30 66 | # Other libraries' h files (with angles). 67 | - Regex: '^<' 68 | Priority: 40 69 | # Friend project's h files. 70 | - Regex: '^[<"]fcitx' 71 | Priority: 44 72 | # Your project's h files. 73 | - Regex: '^[<"]libime' 74 | Priority: 45 75 | # Other libraries' h files (with quotes). 76 | - Regex: '^"' 77 | Priority: 50 78 | ... 79 | 80 | -------------------------------------------------------------------------------- /src/libime/core/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # logic from setup.py 2 | file(GLOB __KENLM_SRCS kenlm/lm/*.cc kenlm/util/*.cc kenlm/util/double-conversion/*.cc) 3 | set(KENLM_SRCS) 4 | foreach(f ${__KENLM_SRCS}) 5 | string(REGEX MATCH "test\\.cc" IS_TEST ${f}) 6 | string(REGEX MATCH "main\\.cc" IS_MAIN ${f}) 7 | if(NOT IS_TEST AND NOT IS_MAIN) 8 | set(KENLM_SRCS ${KENLM_SRCS} ${f}) 9 | endif() 10 | endforeach() 11 | 12 | add_library(kenlm OBJECT ${KENLM_SRCS}) 13 | target_include_directories(kenlm PUBLIC $) 14 | target_compile_definitions(kenlm PUBLIC -DKENLM_MAX_ORDER=3 PRIVATE -DNDEBUG) 15 | target_link_libraries(kenlm PUBLIC Boost::boost PkgConfig::ZSTD) 16 | set_target_properties(kenlm PROPERTIES 17 | POSITION_INDEPENDENT_CODE ON) 18 | 19 | include(CheckCXXSymbolExists) 20 | check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP) 21 | if(LIBCPP) 22 | target_compile_definitions(kenlm PUBLIC -D_LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION) 23 | endif() 24 | 25 | if(UNIX) 26 | check_library_exists(rt clock_gettime "clock_gettime from librt" HAVE_CLOCKGETTIME_RT) 27 | if (HAVE_CLOCKGETTIME_RT) 28 | target_link_libraries(kenlm PUBLIC rt) 29 | else() 30 | check_library_exists(c clock_gettime "clock_gettime from the libc" HAVE_CLOCKGETTIME) 31 | endif() 32 | 33 | if (HAVE_CLOCKGETTIME_RT OR HAVE_CLOCKGETTIME) 34 | target_compile_definitions(kenlm PRIVATE -DHAVE_CLOCKGETTIME) 35 | endif() 36 | endif() 37 | 38 | set(LIBIME_HDRS 39 | datrie.h 40 | decoder.h 41 | languagemodel.h 42 | inputbuffer.h 43 | segmentgraph.h 44 | lattice.h 45 | languagemodel.h 46 | historybigram.h 47 | dictionary.h 48 | userlanguagemodel.h 49 | lrucache.h 50 | prediction.h 51 | triedictionary.h 52 | utils.h 53 | ${CMAKE_CURRENT_BINARY_DIR}/libimecore_export.h 54 | ) 55 | 56 | set(LIBIME_SRCS 57 | datrie.cpp 58 | dictionary.cpp 59 | decoder.cpp 60 | languagemodel.cpp 61 | inputbuffer.cpp 62 | lattice.cpp 63 | userlanguagemodel.cpp 64 | historybigram.cpp 65 | segmentgraph.cpp 66 | utils.cpp 67 | prediction.cpp 68 | triedictionary.cpp 69 | ) 70 | 71 | ecm_setup_version(PROJECT 72 | VARIABLE_PREFIX IMECore 73 | PACKAGE_VERSION_FILE "${CMAKE_CURRENT_BINARY_DIR}/LibIMECoreConfigVersion.cmake") 74 | # the cmake if will 75 | set(IMECore_SOVERSION 0) 76 | 77 | add_library(IMECore ${LIBIME_SRCS}) 78 | set_target_properties(IMECore 79 | PROPERTIES VERSION ${IMECore_VERSION} 80 | SOVERSION ${IMECore_SOVERSION} 81 | EXPORT_NAME Core 82 | ) 83 | target_include_directories(IMECore PUBLIC 84 | $ 85 | $ 86 | $) 87 | 88 | target_link_libraries(IMECore PUBLIC Fcitx5::Utils Boost::boost PRIVATE kenlm Boost::iostreams PkgConfig::ZSTD) 89 | 90 | install(TARGETS IMECore EXPORT LibIMECoreTargets LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT lib) 91 | install(FILES ${LIBIME_HDRS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/LibIME/libime/core" COMPONENT header) 92 | 93 | add_library(LibIME::Core ALIAS IMECore) 94 | 95 | configure_package_config_file("${CMAKE_CURRENT_SOURCE_DIR}/LibIMECoreConfig.cmake.in" 96 | "${CMAKE_CURRENT_BINARY_DIR}/LibIMECoreConfig.cmake" 97 | INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/LibIMECore" 98 | ) 99 | 100 | generate_export_header(IMECore BASE_NAME LibIMECore) 101 | 102 | install(EXPORT LibIMECoreTargets 103 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/LibIMECore" 104 | FILE LibIMECoreTargets.cmake 105 | NAMESPACE LibIME:: 106 | COMPONENT Devel) 107 | 108 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/LibIMECoreConfig.cmake" 109 | "${CMAKE_CURRENT_BINARY_DIR}/LibIMECoreConfigVersion.cmake" 110 | DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/LibIMECore" 111 | COMPONENT Devel) 112 | -------------------------------------------------------------------------------- /src/libime/core/prediction.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include "prediction.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "datrie.h" 17 | #include "historybigram.h" 18 | #include "languagemodel.h" 19 | 20 | namespace libime { 21 | 22 | class PredictionPrivate { 23 | public: 24 | const LanguageModel *model_ = nullptr; 25 | const HistoryBigram *bigram_ = nullptr; 26 | }; 27 | 28 | Prediction::Prediction() : d_ptr(std::make_unique()) {} 29 | 30 | Prediction::~Prediction() = default; 31 | 32 | void Prediction::setLanguageModel(const LanguageModel *model) { 33 | FCITX_D(); 34 | d->model_ = model; 35 | } 36 | 37 | void Prediction::setHistoryBigram(const HistoryBigram *bigram) { 38 | FCITX_D(); 39 | d->bigram_ = bigram; 40 | } 41 | 42 | const LanguageModel *Prediction::model() const { 43 | FCITX_D(); 44 | return d->model_; 45 | } 46 | 47 | const HistoryBigram *Prediction::historyBigram() const { 48 | FCITX_D(); 49 | return d->bigram_; 50 | } 51 | 52 | std::vector 53 | Prediction::predict(const std::vector &sentence, 54 | size_t realMaxSize) { 55 | FCITX_D(); 56 | if (!d->model_) { 57 | return {}; 58 | } 59 | 60 | State state = d->model_->nullState(); 61 | State outState; 62 | std::vector nodes; 63 | nodes.reserve(sentence.size()); 64 | for (const auto &word : sentence) { 65 | auto idx = d->model_->index(word); 66 | nodes.emplace_back(word, idx); 67 | d->model_->score(state, nodes.back(), outState); 68 | state = outState; 69 | } 70 | return predict(state, sentence, realMaxSize); 71 | } 72 | 73 | std::vector> 74 | Prediction::predictWithScore(const State &state, 75 | const std::vector &sentence, 76 | size_t realMaxSize) { 77 | FCITX_D(); 78 | if (!d->model_) { 79 | return {}; 80 | } 81 | // Search more get less. 82 | size_t maxSize = realMaxSize * 2; 83 | std::unordered_set words; 84 | 85 | if (auto file = d->model_->languageModelFile()) { 86 | std::string search = ""; 87 | if (!sentence.empty()) { 88 | search = sentence.back(); 89 | } 90 | search += "|"; 91 | const auto &trie = file->predictionTrie(); 92 | trie.foreach(search, [&trie, &words, 93 | maxSize](DATrie::value_type, size_t len, 94 | DATrie::position_type pos) { 95 | std::string buf; 96 | trie.suffix(buf, len, pos); 97 | words.emplace(std::move(buf)); 98 | 99 | return maxSize <= 0 || words.size() < maxSize; 100 | }); 101 | } 102 | 103 | if (d->bigram_) { 104 | d->bigram_->fillPredict(words, sentence, maxSize); 105 | } 106 | 107 | std::vector> temps; 108 | for (auto word : words) { 109 | auto score = d->model_->singleWordScore(state, word); 110 | temps.emplace_back(std::move(word), score); 111 | } 112 | std::sort(temps.begin(), temps.end(), [](auto &lhs, auto &rhs) { 113 | if (lhs.second != rhs.second) { 114 | return lhs.second > rhs.second; 115 | } 116 | return lhs.first < rhs.first; 117 | }); 118 | 119 | if (realMaxSize && temps.size() > realMaxSize) { 120 | temps.resize(realMaxSize); 121 | } 122 | return temps; 123 | } 124 | 125 | std::vector 126 | Prediction::predict(const State &state, 127 | const std::vector &sentence, 128 | size_t realMaxSize) { 129 | 130 | auto temps = predictWithScore(state, sentence, realMaxSize); 131 | std::vector result; 132 | result.reserve(temps.size()); 133 | for (auto &temp : temps) { 134 | result.emplace_back(std::move(temp.first)); 135 | } 136 | return result; 137 | } 138 | 139 | } // namespace libime 140 | -------------------------------------------------------------------------------- /src/libime/table/autophrasedict.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include "autophrasedict.h" 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "libime/core/utils_p.h" 24 | 25 | namespace libime { 26 | 27 | struct AutoPhrase { 28 | AutoPhrase(std::string entry, uint32_t hit) 29 | : entry_(std::move(entry)), hit_(hit) {} 30 | 31 | std::string_view entry() const { return entry_; } 32 | 33 | std::string entry_; 34 | uint32_t hit_ = 0; 35 | }; 36 | 37 | class AutoPhraseDictPrivate { 38 | using item_list = boost::multi_index_container< 39 | AutoPhrase, 40 | boost::multi_index::indexed_by< 41 | boost::multi_index::sequenced<>, 42 | boost::multi_index::ordered_unique< 43 | boost::multi_index::const_mem_fun>>>; 45 | 46 | public: 47 | using iterator = item_list::iterator; 48 | 49 | AutoPhraseDictPrivate(size_t maxItem) : maxItems_(maxItem) {} 50 | FCITX_INLINE_DEFINE_DEFAULT_DTOR_AND_COPY(AutoPhraseDictPrivate); 51 | 52 | item_list il_; 53 | std::size_t maxItems_; 54 | }; 55 | 56 | AutoPhraseDict::AutoPhraseDict(size_t maxItems) 57 | : d_ptr(std::make_unique(maxItems)) {} 58 | 59 | AutoPhraseDict::AutoPhraseDict(size_t maxItems, std::istream &in) 60 | : AutoPhraseDict(maxItems) { 61 | load(in); 62 | } 63 | 64 | FCITX_DEFINE_DPTR_COPY_AND_DEFAULT_DTOR_AND_MOVE(AutoPhraseDict) 65 | 66 | void AutoPhraseDict::insert(const std::string &entry, uint32_t value) { 67 | FCITX_D(); 68 | auto &il = d->il_; 69 | auto p = il.push_front(AutoPhrase{entry, value}); 70 | 71 | auto iter = p.first; 72 | if (!p.second) { 73 | il.relocate(il.begin(), p.first); 74 | iter = il.begin(); 75 | } 76 | if (value == 0) { 77 | il.modify(iter, [](AutoPhrase &phrase) { phrase.hit_ += 1; }); 78 | } 79 | if (il.size() > d->maxItems_) { 80 | il.pop_back(); 81 | } 82 | } 83 | 84 | bool AutoPhraseDict::search( 85 | std::string_view s, 86 | const std::function &callback) const { 87 | FCITX_D(); 88 | const auto &idx = d->il_.get<1>(); 89 | auto iter = idx.lower_bound(s); 90 | while (iter != idx.end() && iter->entry().starts_with(s)) { 91 | if (!callback(iter->entry(), iter->hit_)) { 92 | return false; 93 | } 94 | ++iter; 95 | } 96 | return true; 97 | } 98 | 99 | uint32_t AutoPhraseDict::exactSearch(std::string_view s) const { 100 | FCITX_D(); 101 | const auto &idx = d->il_.get<1>(); 102 | auto iter = idx.find(s); 103 | if (iter == idx.end()) { 104 | return 0; 105 | } 106 | return iter->hit_; 107 | } 108 | 109 | void AutoPhraseDict::erase(std::string_view s) { 110 | FCITX_D(); 111 | auto &idx = d->il_.get<1>(); 112 | idx.erase(s); 113 | } 114 | 115 | void AutoPhraseDict::clear() { 116 | FCITX_D(); 117 | d->il_.clear(); 118 | } 119 | 120 | bool AutoPhraseDict::empty() const { 121 | FCITX_D(); 122 | return d->il_.empty(); 123 | } 124 | 125 | void AutoPhraseDict::load(std::istream &in) { 126 | uint32_t size = 0; 127 | throw_if_io_fail(unmarshall(in, size)); 128 | while (size--) { 129 | std::string text; 130 | uint32_t hit = 0; 131 | throw_if_io_fail(unmarshallString(in, text)); 132 | throw_if_io_fail(unmarshall(in, hit)); 133 | insert(text, hit); 134 | } 135 | } 136 | 137 | void AutoPhraseDict::save(std::ostream &out) { 138 | FCITX_D(); 139 | uint32_t size = d->il_.size(); 140 | throw_if_io_fail(marshall(out, size)); 141 | for (const auto &phrase : d->il_ | std::views::reverse) { 142 | throw_if_io_fail(marshallString(out, phrase.entry_)); 143 | throw_if_io_fail(marshall(out, phrase.hit_)); 144 | } 145 | } 146 | } // namespace libime 147 | -------------------------------------------------------------------------------- /src/libime/pinyin/shuangpindata.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2011-2020 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | * 6 | */ 7 | 8 | #ifndef SPDATA_H 9 | #define SPDATA_H 10 | 11 | struct SP_C { 12 | char strQP[5]; 13 | char cJP; 14 | }; 15 | 16 | struct SP_S { 17 | char strQP[3]; 18 | char cJP; 19 | }; 20 | 21 | static const SP_C SPMap_C_MS[] = { 22 | {"ai", 'l'}, {"an", 'j'}, {"ang", 'h'}, {"ao", 'k'}, {"ei", 'z'}, 23 | {"en", 'f'}, {"eng", 'g'}, {"er", 'r'}, {"ia", 'w'}, {"ian", 'm'}, 24 | {"iang", 'd'}, {"iao", 'c'}, {"ie", 'x'}, {"in", 'n'}, {"ing", ';'}, 25 | {"iong", 's'}, {"iu", 'q'}, {"ong", 's'}, {"ou", 'b'}, {"ua", 'w'}, 26 | {"uai", 'y'}, {"uan", 'r'}, {"uang", 'd'}, {"ue", 't'}, {"ui", 'v'}, 27 | {"un", 'p'}, {"uo", 'o'}, {"ve", 'v'}, {"v", 'y'}, {"\0", '\0'}}; 28 | 29 | static const SP_S SPMap_S_MS[] = { 30 | {"ch", 'i'}, {"sh", 'u'}, {"zh", 'v'}, {"\0", '\0'}}; 31 | 32 | static const SP_C SPMap_C_Ziguang[] = { 33 | {"ai", 'p'}, {"an", 'r'}, {"ang", 's'}, {"ao", 'q'}, {"ei", 'k'}, 34 | {"en", 'w'}, {"eng", 't'}, {"er", 'j'}, {"ia", 'x'}, {"ian", 'f'}, 35 | {"iang", 'g'}, {"iao", 'b'}, {"ie", 'd'}, {"in", 'y'}, {"ing", ';'}, 36 | {"iong", 'h'}, {"iu", 'j'}, {"ong", 'h'}, {"ou", 'z'}, {"ua", 'x'}, 37 | {"uai", 'y'}, {"uan", 'l'}, {"uang", 'g'}, {"ue", 'n'}, {"ui", 'n'}, 38 | {"un", 'm'}, {"uo", 'o'}, {"ve", 'n'}, {"v", 'v'}, {"\0", '\0'}}; 39 | 40 | static const SP_S SPMap_S_Ziguang[] = { 41 | {"ch", 'a'}, {"sh", 'i'}, {"zh", 'u'}, {"\0", '\0'}}; 42 | 43 | static const SP_C SPMap_C_ABC[] = { 44 | {"ai", 'l'}, {"an", 'j'}, {"ang", 'h'}, {"ao", 'k'}, {"ei", 'q'}, 45 | {"en", 'f'}, {"eng", 'g'}, {"er", 'r'}, {"ia", 'd'}, {"ian", 'w'}, 46 | {"iang", 't'}, {"iao", 'z'}, {"ie", 'x'}, {"in", 'c'}, {"ing", 'y'}, 47 | {"iong", 's'}, {"iu", 'r'}, {"ong", 's'}, {"ou", 'b'}, {"ua", 'd'}, 48 | {"uai", 'c'}, {"uan", 'p'}, {"uang", 't'}, {"ue", 'm'}, {"ui", 'm'}, 49 | {"un", 'n'}, {"uo", 'o'}, {"ve", 'm'}, {"v", 'v'}, {"\0", '\0'}}; 50 | 51 | static const SP_S SPMap_S_ABC[] = { 52 | {"ch", 'e'}, {"sh", 'v'}, {"zh", 'a'}, {"\0", '\0'}}; 53 | 54 | static const SP_C SPMap_C_Zhongwenzhixing[] = { 55 | {"ai", 's'}, {"an", 'f'}, {"ang", 'g'}, {"ao", 'd'}, {"ei", 'w'}, 56 | {"en", 'r'}, {"eng", 't'}, {"er", 'q'}, {"ia", 'b'}, {"ian", 'j'}, 57 | {"iang", 'h'}, {"iao", 'k'}, {"ie", 'm'}, {"in", 'l'}, {"ing", 'q'}, 58 | {"iong", 'y'}, {"iu", 'n'}, {"ong", 'y'}, {"ou", 'p'}, {"ua", 'b'}, 59 | {"uai", 'x'}, {"uan", 'c'}, {"uang", 'h'}, {"ue", 'x'}, {"ui", 'v'}, 60 | {"un", 'z'}, {"uo", 'o'}, {"ve", 'x'}, {"v", 'v'}, {"\0", '\0'}}; 61 | 62 | static const SP_S SPMap_S_Zhongwenzhixing[] = { 63 | {"ch", 'u'}, {"sh", 'i'}, {"zh", 'v'}, {"\0", '\0'}}; 64 | 65 | static const SP_C SPMap_C_PinyinJiaJia[] = { 66 | {"ai", 's'}, {"an", 'f'}, {"ang", 'g'}, {"ao", 'd'}, {"ei", 'w'}, 67 | {"en", 'r'}, {"eng", 't'}, {"er", 'q'}, {"ia", 'b'}, {"ian", 'j'}, 68 | {"iang", 'h'}, {"iao", 'k'}, {"ie", 'm'}, {"in", 'l'}, {"ing", 'q'}, 69 | {"iong", 'y'}, {"iu", 'n'}, {"ong", 'y'}, {"ou", 'p'}, {"ua", 'b'}, 70 | {"uai", 'x'}, {"uan", 'c'}, {"uang", 'h'}, {"ue", 'x'}, {"ui", 'v'}, 71 | {"un", 'z'}, {"uo", 'o'}, {"ve", 'x'}, {"v", 'v'}, {"\0", '\0'}}; 72 | 73 | static const SP_S SPMap_S_PinyinJiaJia[] = { 74 | {"ch", 'u'}, {"sh", 'i'}, {"zh", 'v'}, {"\0", '\0'}}; 75 | 76 | static const SP_C SPMap_C_Ziranma[] = { 77 | {"ai", 'l'}, {"an", 'j'}, {"ang", 'h'}, {"ao", 'k'}, {"ei", 'z'}, 78 | {"en", 'f'}, {"eng", 'g'}, {"er", 'r'}, {"ia", 'w'}, {"ian", 'm'}, 79 | {"iang", 'd'}, {"iao", 'c'}, {"ie", 'x'}, {"in", 'n'}, {"ing", 'y'}, 80 | {"iong", 's'}, {"iu", 'q'}, {"ong", 's'}, {"ou", 'b'}, {"ua", 'w'}, 81 | {"uai", 'y'}, {"uan", 'r'}, {"uang", 'd'}, {"ue", 't'}, {"ui", 'v'}, 82 | {"un", 'p'}, {"uo", 'o'}, {"ve", 't'}, {"v", 'v'}, {"\0", '\0'}}; 83 | 84 | static const SP_S SPMap_S_Ziranma[] = { 85 | {"ch", 'i'}, {"sh", 'u'}, {"zh", 'v'}, {"\0", '\0'}}; 86 | 87 | static const SP_C SPMap_C_XIAOHE[] = { 88 | {"ai", 'd'}, {"an", 'j'}, {"ang", 'h'}, {"ao", 'c'}, {"ei", 'w'}, 89 | {"en", 'f'}, {"eng", 'g'}, {"ia", 'x'}, {"ian", 'm'}, {"iang", 'l'}, 90 | {"iao", 'n'}, {"ie", 'p'}, {"in", 'b'}, {"ing", 'k'}, {"iong", 's'}, 91 | {"iu", 'q'}, {"ong", 's'}, {"ou", 'z'}, {"ua", 'x'}, {"uai", 'k'}, 92 | {"uan", 'r'}, {"uang", 'l'}, {"ue", 't'}, {"ui", 'v'}, {"un", 'y'}, 93 | {"uo", 'o'}, {"ve", 't'}, {"v", 'v'}, {"\0", '\0'}}; 94 | 95 | static const SP_S SPMap_S_XIAOHE[] = { 96 | {"ch", 'i'}, {"sh", 'u'}, {"zh", 'v'}, {"\0", '\0'}}; 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyinmatchstate_p.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINMATCHSTATE_P_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINMATCHSTATE_P_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include "libime/core/languagemodel.h" 24 | #include "libime/core/segmentgraph.h" 25 | 26 | namespace libime { 27 | 28 | using PinyinTriePosition = std::pair; 29 | using PinyinTriePositions = std::vector; 30 | 31 | // Matching result for a specific PinyinTrie. 32 | struct MatchedPinyinTrieNodes { 33 | MatchedPinyinTrieNodes(const PinyinTrie *trie, size_t size) 34 | : trie_(trie), size_(size) {} 35 | FCITX_INLINE_DEFINE_DEFAULT_DTOR_COPY_AND_MOVE(MatchedPinyinTrieNodes) 36 | 37 | const PinyinTrie *trie_; 38 | PinyinTriePositions triePositions_; 39 | 40 | // Size of syllables. 41 | size_t size_; 42 | }; 43 | 44 | // A cache to store the matched word, encoded Full Pinyin for this word and the 45 | // adjustment score. 46 | struct PinyinMatchResult { 47 | PinyinMatchResult(std::string_view s, float value, 48 | std::string_view encodedPinyin, bool isCorrection) 49 | : word_(s, InvalidWordIndex), value_(value), 50 | encodedPinyin_(encodedPinyin), isCorrection_(isCorrection) {} 51 | WordNode word_; 52 | float value_ = 0.0F; 53 | std::string encodedPinyin_; 54 | bool isCorrection_ = false; 55 | }; 56 | 57 | // class to store current SegmentGraphPath leads to this match and the match 58 | // reuslt. 59 | struct MatchedPinyinPath { 60 | MatchedPinyinPath(const PinyinTrie *trie, size_t size, 61 | SegmentGraphPath path, PinyinDictFlags flags) 62 | : result_(std::make_shared(trie, size)), 63 | path_(std::move(path)), flags_(flags) {} 64 | 65 | MatchedPinyinPath(std::shared_ptr result, 66 | SegmentGraphPath path, PinyinDictFlags flags) 67 | : result_(std::move(result)), path_(std::move(path)), flags_(flags) {} 68 | 69 | FCITX_INLINE_DEFINE_DEFAULT_DTOR_COPY_AND_MOVE(MatchedPinyinPath) 70 | 71 | auto &triePositions() { return result_->triePositions_; } 72 | const auto &triePositions() const { return result_->triePositions_; } 73 | const PinyinTrie *trie() const { return result_->trie_; } 74 | 75 | // Size of syllables. not necessarily equal to size of path_, because there 76 | // may be separators. 77 | auto size() const { return result_->size_; } 78 | 79 | std::shared_ptr result_; 80 | SegmentGraphPath path_; 81 | PinyinDictFlags flags_; 82 | }; 83 | 84 | // This need to be keep sync with PinyinSegmentGraphPathHasher 85 | class PinyinStringHasher { 86 | public: 87 | size_t operator()(const std::string &s) const { 88 | boost::hash hasher; 89 | 90 | size_t seed = 0; 91 | for (char c : s) { 92 | boost::hash_combine(seed, hasher(c)); 93 | } 94 | return seed; 95 | } 96 | }; 97 | 98 | // A list of all search paths 99 | using MatchedPinyinPaths = std::vector; 100 | 101 | // Map from SegmentGraphNode to Search Paths. 102 | using NodeToMatchedPinyinPathsMap = 103 | std::unordered_map; 104 | 105 | // A cache for all PinyinTries. From a pinyin string to its matched 106 | // PinyinTrieNode 107 | using PinyinTrieNodeCache = std::unordered_map< 108 | const PinyinTrie *, 109 | LRUCache, 110 | PinyinStringHasher>>; 111 | 112 | // A cache for PinyinMatchResult. 113 | using PinyinMatchResultCache = std::unordered_map< 114 | const PinyinTrie *, 115 | LRUCache, PinyinStringHasher>>; 116 | 117 | class PinyinMatchStatePrivate { 118 | public: 119 | PinyinMatchStatePrivate(PinyinContext *context) : context_(context) {} 120 | 121 | PinyinContext *context_; 122 | NodeToMatchedPinyinPathsMap matchedPaths_; 123 | PinyinTrieNodeCache nodeCacheMap_; 124 | PinyinMatchResultCache matchCacheMap_; 125 | }; 126 | } // namespace libime 127 | 128 | #endif // _FCITX_LIBIME_PINYIN_PINYINMATCHSTATE_P_H_ 129 | -------------------------------------------------------------------------------- /src/libime/core/languagemodel.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_CORE_LANGUAGEMODEL_H_ 7 | #define _FCITX_LIBIME_CORE_LANGUAGEMODEL_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace libime { 21 | 22 | using WordIndex = unsigned int; 23 | constexpr const unsigned int InvalidWordIndex = 24 | std::numeric_limits::max(); 25 | constexpr size_t StateSize = 20 + sizeof(void *); 26 | using State = std::array; 27 | 28 | class WordNode; 29 | class LatticeNode; 30 | class LanguageModelPrivate; 31 | class LanguageModelResolverPrivate; 32 | 33 | class LIBIMECORE_EXPORT LanguageModelBase { 34 | public: 35 | virtual ~LanguageModelBase(); 36 | 37 | virtual WordIndex beginSentence() const = 0; 38 | virtual WordIndex endSentence() const = 0; 39 | virtual WordIndex unknown() const = 0; 40 | virtual const State &beginState() const = 0; 41 | virtual const State &nullState() const = 0; 42 | virtual WordIndex index(std::string_view view) const = 0; 43 | virtual float score(const State &state, const WordNode &word, 44 | State &out) const = 0; 45 | virtual bool isUnknown(WordIndex idx, std::string_view view) const = 0; 46 | bool isNodeUnknown(const LatticeNode &node) const; 47 | float singleWordScore(std::string_view word) const; 48 | float singleWordScore(const State &state, std::string_view word) const; 49 | float wordsScore(const State &state, 50 | const std::vector &word) const; 51 | }; 52 | 53 | class StaticLanguageModelFilePrivate; 54 | 55 | class LIBIMECORE_EXPORT StaticLanguageModelFile { 56 | friend class LanguageModelPrivate; 57 | 58 | public: 59 | explicit StaticLanguageModelFile(const char *file); 60 | virtual ~StaticLanguageModelFile(); 61 | 62 | const DATrie &predictionTrie() const; 63 | 64 | private: 65 | std::unique_ptr d_ptr; 66 | FCITX_DECLARE_PRIVATE(StaticLanguageModelFile); 67 | }; 68 | 69 | class LIBIMECORE_EXPORT LanguageModel : public LanguageModelBase { 70 | public: 71 | explicit LanguageModel(const char *file); 72 | LanguageModel( 73 | std::shared_ptr file = nullptr); 74 | virtual ~LanguageModel(); 75 | 76 | static size_t maxOrder(); 77 | 78 | std::shared_ptr languageModelFile() const; 79 | 80 | WordIndex beginSentence() const override; 81 | WordIndex endSentence() const override; 82 | WordIndex unknown() const override; 83 | const State &beginState() const override; 84 | const State &nullState() const override; 85 | WordIndex index(std::string_view word) const override; 86 | float score(const State &state, const WordNode &node, 87 | State &out) const override; 88 | bool isUnknown(WordIndex idx, std::string_view word) const override; 89 | void setUnknownPenalty(float unknown); 90 | float unknownPenalty() const; 91 | 92 | unsigned int maxNgramLength(const std::vector &words) const; 93 | 94 | private: 95 | std::unique_ptr d_ptr; 96 | FCITX_DECLARE_PRIVATE(LanguageModel); 97 | }; 98 | 99 | /// \brief a class that provides language model data for different languages. 100 | /// 101 | /// The resolver will also hold a weak reference to the language model file. 102 | /// If the language model file is still alive no new file will be constructed. 103 | class LIBIMECORE_EXPORT LanguageModelResolver { 104 | public: 105 | LanguageModelResolver(); 106 | FCITX_DECLARE_VIRTUAL_DTOR_MOVE(LanguageModelResolver) 107 | std::shared_ptr 108 | languageModelFileForLanguage(const std::string &language); 109 | 110 | protected: 111 | virtual std::string 112 | languageModelFileNameForLanguage(const std::string &language) = 0; 113 | 114 | private: 115 | std::unique_ptr d_ptr; 116 | FCITX_DECLARE_PRIVATE(LanguageModelResolver); 117 | }; 118 | 119 | class LIBIMECORE_EXPORT DefaultLanguageModelResolver 120 | : public LanguageModelResolver { 121 | public: 122 | static DefaultLanguageModelResolver &instance(); 123 | 124 | protected: 125 | std::string 126 | languageModelFileNameForLanguage(const std::string &language) override; 127 | 128 | private: 129 | DefaultLanguageModelResolver(); 130 | ~DefaultLanguageModelResolver(); 131 | }; 132 | } // namespace libime 133 | 134 | #endif // _FCITX_LIBIME_CORE_LANGUAGEMODEL_H_ 135 | -------------------------------------------------------------------------------- /test/testdecoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "libime/core/decoder.h" 13 | #include "libime/core/languagemodel.h" 14 | #include "libime/core/lattice.h" 15 | #include "libime/core/segmentgraph.h" 16 | #include "libime/pinyin/pinyindecoder.h" 17 | #include "libime/pinyin/pinyindictionary.h" 18 | #include "libime/pinyin/pinyinencoder.h" 19 | #include "testdir.h" 20 | #include "testutils.h" 21 | 22 | using namespace libime; 23 | 24 | void testTime(PinyinDictionary & /*unused*/, Decoder &decoder, 25 | const char *pinyin, PinyinFuzzyFlags flags, int nbest = 1) { 26 | auto printTime = [](int t) { 27 | std::cout << "Time: " << t / 1000000.0 << " ms" << std::endl; 28 | }; 29 | ScopedNanoTimer timer(printTime); 30 | auto graph = PinyinEncoder::parseUserPinyin(pinyin, flags); 31 | Lattice lattice; 32 | decoder.decode(lattice, graph, nbest, decoder.model()->nullState(), 33 | std::numeric_limits::max(), 34 | -std::numeric_limits::max(), Decoder::beamSizeDefault, 35 | Decoder::frameSizeDefault, nullptr); 36 | for (size_t i = 0, e = lattice.sentenceSize(); i < e; i++) { 37 | const auto &sentence = lattice.sentence(i); 38 | for (const auto &p : sentence.sentence()) { 39 | std::cout << p->word() << " "; 40 | } 41 | std::cout << sentence.score() << std::endl; 42 | } 43 | } 44 | 45 | int main() { 46 | PinyinDictionary dict; 47 | dict.load(PinyinDictionary::SystemDict, LIBIME_BINARY_DIR "/data/sc.dict", 48 | PinyinDictFormat::Binary); 49 | LanguageModel model(LIBIME_BINARY_DIR "/data/sc.lm"); 50 | PinyinDecoder decoder(&dict, &model); 51 | testTime(dict, decoder, "wojiushixiangceshi", PinyinFuzzyFlag::None); 52 | testTime(dict, decoder, "xian", PinyinFuzzyFlag::Inner); 53 | testTime(dict, decoder, "xiian", PinyinFuzzyFlag::Inner); 54 | testTime(dict, decoder, "tanan", PinyinFuzzyFlag::Inner); 55 | testTime(dict, decoder, "jin'an", PinyinFuzzyFlag::Inner); 56 | testTime(dict, decoder, "sh'a", PinyinFuzzyFlag::Inner); 57 | testTime(dict, decoder, "xiian", PinyinFuzzyFlag::Inner); 58 | testTime(dict, decoder, "anqilaibufangbian", PinyinFuzzyFlag::Inner); 59 | testTime(dict, decoder, "zhizuoxujibianchengleshunshuituizhoudeshiqing", 60 | PinyinFuzzyFlag::Inner, 2); 61 | testTime(dict, decoder, "xi'ian", PinyinFuzzyFlag::Inner); 62 | testTime(dict, decoder, "zuishengmengsi'''", PinyinFuzzyFlag::Inner); 63 | testTime(dict, decoder, "yongtiechuichuidanchuibupo", 64 | PinyinFuzzyFlag::Inner); 65 | testTime(dict, decoder, "feibenkerenyuanbunengrunei", 66 | PinyinFuzzyFlag::Inner); 67 | testTime(dict, decoder, "feibenkerenyuanbuderunei", PinyinFuzzyFlag::Inner); 68 | testTime(dict, decoder, "yongtiechuichuidanchuibupo", 69 | PinyinFuzzyFlag::Inner, 2); 70 | testTime(dict, decoder, "feibenkerenyuanbuderunei", PinyinFuzzyFlag::Inner, 71 | 2); 72 | testTime(dict, decoder, "tashiyigehaoren", PinyinFuzzyFlag::Inner, 3); 73 | testTime(dict, decoder, "xianshi", PinyinFuzzyFlag::Inner, 20); 74 | testTime(dict, decoder, "xianshi", PinyinFuzzyFlag::Inner, 1); 75 | testTime(dict, decoder, "'xianshi", PinyinFuzzyFlag::Inner, 1); 76 | testTime(dict, decoder, "zhuoyand", PinyinFuzzyFlag::Inner, 1); 77 | testTime(dict, decoder, "nd", PinyinFuzzyFlag::Inner, 1); 78 | testTime(dict, decoder, "zhzxjbchlshshtzhdshq", PinyinFuzzyFlag::Inner, 1); 79 | testTime(dict, decoder, "tashini", PinyinFuzzyFlag::Inner, 2); 80 | testTime(dict, decoder, "'''", PinyinFuzzyFlag::Inner, 2); 81 | // testTime(dict, decoder, "n", PinyinFuzzyFlag::Inner); 82 | 83 | auto printTime = [](int t) { 84 | std::cout << "Time: " << t / 1000000.0 << " ms" << std::endl; 85 | }; 86 | 87 | SegmentGraph graph; 88 | { 89 | ScopedNanoTimer timer(printTime); 90 | std::cout << "Parse Pinyin "; 91 | graph = PinyinEncoder::parseUserPinyin("sdfsdfsdfsdfsdfsdfsdf", 92 | PinyinFuzzyFlag::None); 93 | } 94 | { 95 | // try do nothing 96 | ScopedNanoTimer timer(printTime); 97 | std::cout << "Pure Match "; 98 | dict.matchPrefix(graph, [](const SegmentGraphPath &, WordNode &, float, 99 | std::unique_ptr) {}); 100 | } 101 | testTime(dict, decoder, "sdfsdfsdfsdfsdfsdfsdf", PinyinFuzzyFlag::None, 2); 102 | testTime(dict, decoder, "ceshiyixiayebuhuichucuo", PinyinFuzzyFlag::None, 103 | 2); 104 | return 0; 105 | } 106 | -------------------------------------------------------------------------------- /test/testpinyinime.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "libime/core/historybigram.h" 20 | #include "libime/core/lattice.h" 21 | #include "libime/core/userlanguagemodel.h" 22 | #include "libime/pinyin/pinyincontext.h" 23 | #include "libime/pinyin/pinyindecoder.h" 24 | #include "libime/pinyin/pinyindictionary.h" 25 | #include "libime/pinyin/pinyinencoder.h" 26 | #include "libime/pinyin/pinyinime.h" 27 | #include "libime/pinyin/shuangpinprofile.h" 28 | #include "testdir.h" 29 | #include "testutils.h" 30 | 31 | using namespace libime; 32 | 33 | int main(int argc, char *argv[]) { 34 | auto printTime = [](int64_t t) { 35 | std::cout << "Time: " << t / 1000000.0 << " ms" << std::endl; 36 | }; 37 | fcitx::Log::setLogRule("libime=5"); 38 | PinyinIME ime( 39 | std::make_unique(), 40 | std::make_unique(LIBIME_BINARY_DIR "/data/sc.lm")); 41 | ime.setNBest(2); 42 | ime.dict()->load(PinyinDictionary::SystemDict, 43 | LIBIME_BINARY_DIR "/data/sc.dict", 44 | PinyinDictFormat::Binary); 45 | if (argc >= 2) { 46 | ime.dict()->load(PinyinDictionary::UserDict, argv[1], 47 | PinyinDictFormat::Binary); 48 | } 49 | if (argc >= 3) { 50 | std::fstream fin(argv[2], std::ios::in | std::ios::binary); 51 | ime.model()->history().load(fin); 52 | } 53 | ime.setFuzzyFlags({PinyinFuzzyFlag::Inner, PinyinFuzzyFlag::CommonTypo, 54 | PinyinFuzzyFlag::AdvancedTypo}); 55 | ime.setScoreFilter(1.0F); 56 | ime.setShuangpinProfile( 57 | std::make_shared(ShuangpinBuiltinProfile::Xiaohe)); 58 | PinyinContext c(&ime); 59 | 60 | std::string word; 61 | while (std::cin >> word) { 62 | bool printAll = false; 63 | ScopedNanoTimer t(printTime); 64 | if (word == "back") { 65 | c.backspace(); 66 | } else if (word == "reset") { 67 | c.clear(); 68 | } else if (word == "cancel") { 69 | c.cancel(); 70 | } else if (word == "left") { 71 | if (c.cursor() > 0) { 72 | c.setCursor(c.cursor() - 1); 73 | } 74 | } else if (word == "right") { 75 | if (c.cursor() < c.size()) { 76 | c.setCursor(c.cursor() + 1); 77 | } 78 | } else if (word.size() == 1 && 79 | (('a' <= word[0] && word[0] <= 'z') || 80 | (!c.userInput().empty() && word[0] == '\''))) { 81 | c.type(word); 82 | } else if (word.size() == 1 && ('0' <= word[0] && word[0] <= '9')) { 83 | size_t idx; 84 | if (word[0] == '0') { 85 | idx = 9; 86 | } else { 87 | idx = word[0] - '1'; 88 | } 89 | if (c.candidates().size() >= idx) { 90 | c.select(idx); 91 | } 92 | } else if (word == "all") { 93 | printAll = true; 94 | } else if (word == "quit") { 95 | break; 96 | } 97 | if (c.selected()) { 98 | std::cout << "COMMIT: " << c.preedit() << std::endl; 99 | c.learn(); 100 | c.clear(); 101 | continue; 102 | } 103 | std::cout << "PREEDIT: " << c.preedit() << std::endl; 104 | std::cout << "SENTENCE: " << c.sentence() << std::endl; 105 | size_t count = 1; 106 | for (const auto &candidate : c.candidatesToCursor()) { 107 | std::cout << (count % 10) << ": "; 108 | for (const auto *node : candidate.sentence()) { 109 | const auto &pinyin = 110 | node->as().encodedPinyin(); 111 | std::cout << node->word(); 112 | if (!pinyin.empty()) { 113 | std::cout << " " << PinyinEncoder::decodeFullPinyin(pinyin); 114 | } 115 | } 116 | std::cout << " " << candidate.score() << std::endl; 117 | count++; 118 | if (!printAll && count > 10) { 119 | break; 120 | } 121 | } 122 | } 123 | 124 | boost::iostreams::stream nullOstream( 125 | (boost::iostreams::null_sink())); 126 | ime.dict()->save(PinyinDictionary::UserDict, nullOstream, 127 | PinyinDictFormat::Binary); 128 | ime.model()->history().dump(nullOstream); 129 | 130 | return 0; 131 | } 132 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | jobs: 10 | clang-format: 11 | name: Check clang-format 12 | runs-on: ubuntu-latest 13 | container: archlinux:latest 14 | steps: 15 | - name: Install dependencies 16 | run: | 17 | pacman -Syu --noconfirm git clang diffutils 18 | git config --global --add safe.directory $GITHUB_WORKSPACE 19 | - uses: actions/checkout@v4 20 | - uses: fcitx/github-actions@clang-format 21 | check: 22 | name: Build and test 23 | needs: clang-format 24 | runs-on: ubuntu-latest 25 | container: archlinux:latest 26 | strategy: 27 | fail-fast: false 28 | matrix: 29 | compiler: [gcc, clang] 30 | include: 31 | - compiler: gcc 32 | cxx_compiler: g++ 33 | - compiler: clang 34 | cxx_compiler: clang++ 35 | env: 36 | CC: ${{ matrix.compiler }} 37 | CXX: ${{ matrix.cxx_compiler }} 38 | steps: 39 | - name: Install dependencies 40 | run: | 41 | pacman -Syu --noconfirm base-devel clang cmake ninja extra-cmake-modules fmt libuv boost git 42 | - uses: actions/checkout@v4 43 | with: 44 | repository: fcitx/fcitx5 45 | path: fcitx5 46 | - name: Cache fcitx5 data files 47 | uses: actions/cache@v4 48 | with: 49 | path: 'fcitx5/**/*.tar.*' 50 | key: ${{ runner.os }}-${{ hashFiles('fcitx5/src/modules/spell/CMakeLists.txt') 51 | }} 52 | - name: Build and Install fcitx5 53 | uses: fcitx/github-actions@cmake 54 | with: 55 | path: fcitx5 56 | cmake-option: >- 57 | -DENABLE_KEYBOARD=Off -DENABLE_X11=Off -DENABLE_WAYLAND=Off -DENABLE_ENCHANT=Off 58 | -DENABLE_DBUS=Off -DENABLE_SERVER=Off -DENABLE_EMOJI=Off -DUSE_SYSTEMD=Off 59 | - uses: actions/checkout@v4 60 | with: 61 | path: libime 62 | submodules: true 63 | - name: Cache libime data files 64 | uses: actions/cache@v4 65 | with: 66 | path: 'libime/**/*.tar.*' 67 | key: ${{ runner.os }}-${{ hashFiles('libime/data/CMakeLists.txt') }} 68 | - name: Init CodeQL 69 | uses: github/codeql-action/init@v3 70 | with: 71 | languages: cpp 72 | source-root: libime 73 | - name: Build and Install libime 74 | uses: fcitx/github-actions@cmake 75 | with: 76 | path: libime 77 | - name: Test 78 | run: | 79 | ctest --test-dir libime/build 80 | - name: CodeQL Analysis 81 | uses: github/codeql-action/analyze@v2 82 | 83 | check-windows: 84 | name: Build on Windows 85 | needs: clang-format 86 | runs-on: windows-2025 87 | strategy: 88 | fail-fast: false 89 | 90 | steps: 91 | - name: Install dependencies 92 | run: | 93 | C:/msys64/usr/bin/pacman -Syu --noconfirm 94 | C:/msys64/usr/bin/pacman -S --noconfirm ` 95 | mingw-w64-clang-x86_64-extra-cmake-modules ` 96 | mingw-w64-clang-x86_64-dlfcn ` 97 | mingw-w64-clang-x86_64-libuv ` 98 | mingw-w64-clang-x86_64-clang ` 99 | mingw-w64-clang-x86_64-cmake ` 100 | mingw-w64-clang-x86_64-ninja ` 101 | mingw-w64-clang-x86_64-pkgconf ` 102 | mingw-w64-clang-x86_64-gettext-tools ` 103 | mingw-w64-clang-x86_64-boost 104 | Add-Content $env:GITHUB_PATH "C:/msys64/clang64/bin" 105 | Add-Content $env:GITHUB_PATH "${{ github.workspace }}/fcitx/bin" 106 | 107 | - uses: actions/checkout@v4 108 | with: 109 | repository: fcitx/fcitx5 110 | path: fcitx5 111 | 112 | - name: Cache fcitx5 data files 113 | uses: actions/cache@v4 114 | with: 115 | path: 'fcitx5/**/*.tar.*' 116 | key: ${{ runner.os }}-${{ hashFiles('fcitx5/src/modules/spell/CMakeLists.txt') }} 117 | 118 | - name: Build 119 | uses: fcitx/github-actions@cmake 120 | with: 121 | path: fcitx5 122 | cmake-option: >- 123 | -DENABLE_DBUS=Off -DENABLE_X11=Off -DENABLE_WAYLAND=Off -DENABLE_ENCHANT=Off -DENABLE_SERVER=Off -DENABLE_XDGAUTOSTART=Off -DENABLE_LIBUUID=Off -DENABLE_KEYBOARD=Off -DCMAKE_CXX_FLAGS=-fexperimental-library 124 | install-prefix: ${{ github.workspace }}/fcitx 125 | shell: pwsh 126 | 127 | - uses: actions/checkout@v4 128 | with: 129 | path: libime 130 | submodules: true 131 | 132 | - name: Cache libime data files 133 | uses: actions/cache@v4 134 | with: 135 | path: 'libime/**/*.tar.*' 136 | key: ${{ runner.os }}-${{ hashFiles('libime/data/CMakeLists.txt') }} 137 | 138 | - name: Build 139 | uses: fcitx/github-actions@cmake 140 | with: 141 | path: libime 142 | install-prefix: ${{ github.workspace }}/fcitx 143 | shell: pwsh 144 | 145 | - name: Test 146 | run: | 147 | ctest --test-dir libime/build 148 | -------------------------------------------------------------------------------- /src/libime/core/segmentgraph.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include "segmentgraph.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace libime { 18 | 19 | struct SegmentGraphNodePairGreater { 20 | bool operator()(const std::pair &lhs, 22 | const std::pair &rhs) const { 24 | return lhs.first->index() > rhs.first->index(); 25 | } 26 | }; 27 | struct SegmentGraphNodeGreater { 28 | bool operator()(const SegmentGraphNode *lhs, 29 | const SegmentGraphNode *rhs) const { 30 | return lhs->index() > rhs->index(); 31 | } 32 | }; 33 | 34 | bool SegmentGraphBase::bfs(const SegmentGraphNode *from, 35 | const SegmentGraphBFSCallback &callback) const { 36 | std::priority_queue, 38 | SegmentGraphNodeGreater> 39 | q; 40 | q.push(from); 41 | std::unordered_set visited; 42 | while (!q.empty()) { 43 | const auto *node = q.top(); 44 | q.pop(); 45 | if (!visited.contains(node)) { 46 | visited.insert(node); 47 | } else { 48 | continue; 49 | } 50 | 51 | if (!callback(*this, node)) { 52 | return false; 53 | } 54 | for (const auto &next : node->nexts()) { 55 | q.push(&next); 56 | } 57 | } 58 | return true; 59 | } 60 | 61 | size_t SegmentGraph::check(const SegmentGraph &graph) const { 62 | std::priority_queue< 63 | std::pair, 64 | std::vector< 65 | std::pair>, 66 | SegmentGraphNodePairGreater> 67 | q; 68 | 69 | q.emplace(&start(), &graph.start()); 70 | while (!q.empty()) { 71 | auto [old, now] = q.top(); 72 | q.pop(); 73 | do { 74 | assert(old->index() == now->index()); 75 | if (old->nextSize() != now->nextSize()) { 76 | return old->index(); 77 | } 78 | 79 | const SegmentGraphNode *nold; 80 | const SegmentGraphNode *nnow; 81 | for (auto t : boost::combine(old->nexts(), now->nexts())) { 82 | nold = &boost::get<0>(t); 83 | nnow = &boost::get<1>(t); 84 | if (nold->index() != nnow->index() || 85 | segment(*old, *nold) != graph.segment(*now, *nnow)) { 86 | return old->index(); 87 | } 88 | } 89 | 90 | for (auto t : boost::combine(old->nexts(), now->nexts())) { 91 | nold = &boost::get<0>(t); 92 | nnow = &boost::get<1>(t); 93 | q.emplace(nold, nnow); 94 | } 95 | } while (0); 96 | } 97 | 98 | return end().index() + 1; 99 | } 100 | 101 | void SegmentGraph::merge(SegmentGraph &graph, 102 | const DiscardCallback &discardCallback) { 103 | if (&graph == this) { 104 | return; 105 | } 106 | auto since = check(graph); 107 | std::unordered_set nodeToDiscard; 108 | for (size_t i = 0; i < since; i++) { 109 | for (auto &node : mutableNodes(i)) { 110 | std::vector newNext; 111 | for (auto &next : node.mutableNexts()) { 112 | SegmentGraphNode *n; 113 | if (next.index() >= since) { 114 | n = graph.graph_[next.index()].get(); 115 | } else { 116 | n = &next; 117 | } 118 | newNext.push_back(n); 119 | } 120 | while (node.nextSize()) { 121 | node.removeEdge(node.mutableNexts().front()); 122 | } 123 | for (auto *n : newNext) { 124 | node.addEdge(*n); 125 | } 126 | } 127 | graph.graph_[i].reset(); 128 | } 129 | 130 | mutableData() = graph.data(); 131 | 132 | // these nodes will be discarded by resize() 133 | if (data().size() + 1 < graph_.size()) { 134 | for (size_t i = data().size() + 1; i < graph_.size(); i++) { 135 | for (const auto &node : nodes(i)) { 136 | nodeToDiscard.insert(&node); 137 | } 138 | } 139 | } 140 | 141 | resize(data().size() + 1); 142 | for (size_t i = since; i <= size(); i++) { 143 | for (const auto &node : nodes(i)) { 144 | nodeToDiscard.insert(&node); 145 | } 146 | std::swap(graph_[i], graph.graph_[i]); 147 | graph.graph_[i].reset(); 148 | } 149 | 150 | if (discardCallback) { 151 | discardCallback(nodeToDiscard); 152 | } 153 | } 154 | } // namespace libime 155 | -------------------------------------------------------------------------------- /src/libime/table/tablebaseddictionary.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2015-2020 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | * 6 | */ 7 | 8 | #ifndef _FCITX_LIBIME_TABLE_TABLEBASEDDICTIONARY_H_ 9 | #define _FCITX_LIBIME_TABLE_TABLEBASEDDICTIONARY_H_ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | namespace libime { 29 | class TableBasedDictionaryPrivate; 30 | class TableOptions; 31 | 32 | enum class PhraseFlag { 33 | None = 1, 34 | Pinyin, 35 | Prompt, 36 | ConstructPhrase, 37 | User, 38 | Auto, 39 | Invalid 40 | }; 41 | 42 | using TableMatchCallback = std::function; 44 | 45 | enum class TableFormat { Text, Binary }; 46 | enum class TableMatchMode { Exact, Prefix }; 47 | 48 | class TableRule; 49 | 50 | class LIBIMETABLE_EXPORT TableBasedDictionary 51 | : public Dictionary, 52 | public fcitx::ConnectableObject { 53 | friend class TableContextPrivate; 54 | 55 | public: 56 | TableBasedDictionary(); 57 | virtual ~TableBasedDictionary(); 58 | 59 | TableBasedDictionary(const TableBasedDictionary &other) = delete; 60 | 61 | void load(const char *filename, TableFormat format = TableFormat::Binary); 62 | void load(std::istream &in, TableFormat format = TableFormat::Binary); 63 | void save(const char *filename, TableFormat format = TableFormat::Binary); 64 | void save(std::ostream &out, TableFormat format = TableFormat::Binary); 65 | 66 | void loadUser(const char *filename, 67 | TableFormat format = TableFormat::Binary); 68 | void loadUser(std::istream &in, TableFormat format = TableFormat::Binary); 69 | void saveUser(const char *filename, 70 | TableFormat format = TableFormat::Binary); 71 | void saveUser(std::ostream &out, TableFormat format = TableFormat::Binary); 72 | 73 | size_t loadExtra(const char *filename, 74 | TableFormat format = TableFormat::Binary); 75 | size_t loadExtra(std::istream &in, 76 | TableFormat format = TableFormat::Binary); 77 | void saveExtra(size_t index, const char *filename, 78 | TableFormat format = TableFormat::Binary); 79 | void saveExtra(size_t index, std::ostream &out, 80 | TableFormat format = TableFormat::Binary); 81 | 82 | void removeAllExtra(); 83 | 84 | bool hasRule() const noexcept; 85 | bool hasCustomPrompt() const noexcept; 86 | const TableRule *findRule(std::string_view name) const; 87 | bool insert(std::string_view key, std::string_view value, 88 | PhraseFlag flag = PhraseFlag::None, 89 | bool verifyWithRule = false); 90 | bool insert(std::string_view value, PhraseFlag flag = PhraseFlag::None); 91 | bool generate(std::string_view value, std::string &key) const; 92 | bool generateWithHint(std::string_view value, 93 | const std::vector &codeHint, 94 | std::string &key) const; 95 | 96 | bool isInputCode(uint32_t c) const; 97 | bool isAllInputCode(std::string_view code) const; 98 | bool isEndKey(uint32_t c) const; 99 | 100 | bool hasPinyin() const; 101 | uint32_t maxLength() const; 102 | bool isValidLength(size_t length) const; 103 | 104 | void statistic() const; 105 | 106 | void setTableOptions(TableOptions option); 107 | const TableOptions &tableOptions() const; 108 | 109 | bool matchWords(std::string_view code, TableMatchMode mode, 110 | const TableMatchCallback &callback) const; 111 | 112 | bool hasMatchingWords(std::string_view code) const; 113 | bool hasMatchingWords(std::string_view code, std::string_view next) const; 114 | 115 | bool hasOneMatchingWord(std::string_view code) const; 116 | 117 | PhraseFlag wordExists(std::string_view code, std::string_view word) const; 118 | void removeWord(std::string_view code, std::string_view word); 119 | 120 | std::string reverseLookup(std::string_view word, 121 | PhraseFlag flag = PhraseFlag::None) const; 122 | std::string hint(std::string_view key) const; 123 | 124 | FCITX_DECLARE_SIGNAL(TableBasedDictionary, tableOptionsChanged, void()); 125 | 126 | private: 127 | void loadText(std::istream &in); 128 | void loadBinary(std::istream &in); 129 | void saveText(std::ostream &out); 130 | void saveBinary(std::ostream &origOut); 131 | 132 | void 133 | matchPrefixImpl(const SegmentGraph &graph, 134 | const GraphMatchCallback &callback, 135 | const std::unordered_set &ignore, 136 | void *helper) const override; 137 | 138 | std::unique_ptr d_ptr; 139 | FCITX_DECLARE_PRIVATE(TableBasedDictionary); 140 | }; 141 | } // namespace libime 142 | 143 | #endif // _FCITX_LIBIME_TABLE_TABLEBASEDDICTIONARY_H_ 144 | -------------------------------------------------------------------------------- /src/libime/table/tabledecoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include "libime/table/tabledecoder.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "libime/core/languagemodel.h" 20 | #include "libime/core/lattice.h" 21 | #include "libime/core/segmentgraph.h" 22 | #include "libime/table/tablebaseddictionary.h" 23 | #include "tabledecoder_p.h" 24 | #include "tableoptions.h" 25 | #include "tablerule.h" 26 | 27 | namespace libime { 28 | 29 | namespace { 30 | 31 | bool isNotPlaceHolder(const TableRuleEntry &entry) { 32 | return !entry.isPlaceHolder(); 33 | } 34 | 35 | bool checkRuleCanBeUsedAsAutoRule(const TableRule &rule) { 36 | if (rule.flag() != TableRuleFlag::LengthEqual) { 37 | return false; 38 | } 39 | 40 | auto range = rule.entries() | std::views::filter(isNotPlaceHolder); 41 | auto iter = std::begin(range); 42 | auto end = std::end(range); 43 | int currentChar = 1; 44 | while (iter != end) { 45 | int currentIndex = 1; 46 | while (iter != end) { 47 | if (iter->character() == currentChar) { 48 | if (iter->flag() == TableRuleEntryFlag::FromFront && 49 | iter->index() == currentIndex) { 50 | currentIndex++; 51 | } else { 52 | // reset to invalid. 53 | currentIndex = 1; 54 | break; 55 | } 56 | } else { 57 | break; 58 | } 59 | ++iter; 60 | } 61 | 62 | if (currentIndex == 1) { 63 | return false; 64 | } 65 | currentChar++; 66 | } 67 | return currentChar == rule.phraseLength() + 1; 68 | } 69 | } // namespace 70 | 71 | uint32_t TableLatticeNode::index() const { 72 | return d_ptr ? d_ptr->index_ : 0xFFFFFFFFU; 73 | } 74 | 75 | PhraseFlag TableLatticeNode::flag() const { 76 | return d_ptr ? d_ptr->flag_ : PhraseFlag::None; 77 | } 78 | 79 | const std::string &TableLatticeNode::code() const { 80 | static const std::string empty; 81 | if (!d_ptr) { 82 | return empty; 83 | } 84 | return d_ptr->code_; 85 | } 86 | 87 | size_t TableLatticeNode::codeLength() const { 88 | if (!d_ptr) { 89 | return 0; 90 | } 91 | return d_ptr->codeLength_; 92 | } 93 | 94 | TableLatticeNode::TableLatticeNode( 95 | std::string_view word, WordIndex idx, SegmentGraphPath path, 96 | const State &state, float cost, 97 | std::unique_ptr data) 98 | : LatticeNode(word, idx, std::move(path), state, cost), 99 | d_ptr(std::move(data)) {} 100 | 101 | TableLatticeNode::~TableLatticeNode() = default; 102 | 103 | LatticeNode *TableDecoder::createLatticeNodeImpl( 104 | const SegmentGraphBase & /*graph*/, const LanguageModelBase * /*model*/, 105 | std::string_view word, WordIndex idx, SegmentGraphPath path, 106 | const State &state, float cost, std::unique_ptr data, 107 | bool /*onlyPath*/) const { 108 | std::unique_ptr tableData( 109 | static_cast(data.release())); 110 | return new TableLatticeNode(word, idx, std::move(path), state, cost, 111 | std::move(tableData)); 112 | } 113 | 114 | bool TableDecoder::needSort(const SegmentGraph &graph, 115 | const SegmentGraphNode * /*node*/) const { 116 | return graph.start().nextSize() != 1; 117 | } 118 | 119 | SegmentGraph graphForCode(std::string_view s, 120 | const TableBasedDictionary &dict) { 121 | SegmentGraph graph{std::string(s)}; 122 | if (s.empty()) { 123 | return graph; 124 | } 125 | graph.addNext(0, graph.size()); 126 | auto codeLength = fcitx::utf8::length(graph.data()); 127 | // Rule. 128 | if (dict.hasRule() && !dict.tableOptions().autoRuleSet().empty()) { 129 | const auto &ruleSet = dict.tableOptions().autoRuleSet(); 130 | for (const auto &ruleName : ruleSet) { 131 | const auto *rule = dict.findRule(ruleName); 132 | if (!rule || codeLength != rule->codeLength() || 133 | !checkRuleCanBeUsedAsAutoRule(*rule)) { 134 | continue; 135 | } 136 | 137 | std::vector charSizes(rule->phraseLength()); 138 | for (const auto &entry : 139 | rule->entries() | std::views::filter(isNotPlaceHolder)) { 140 | auto &charSize = charSizes[entry.character() - 1]; 141 | charSize = std::max(charSize, entry.index()); 142 | } 143 | 144 | int lastIndex = 0; 145 | for (auto charSize : charSizes) { 146 | graph.addNext(fcitx::utf8::ncharByteLength(graph.data().begin(), 147 | lastIndex), 148 | fcitx::utf8::ncharByteLength( 149 | graph.data().begin(), lastIndex + charSize)); 150 | lastIndex += charSize; 151 | } 152 | } 153 | } 154 | 155 | return graph; 156 | } 157 | } // namespace libime 158 | -------------------------------------------------------------------------------- /src/libime/core/userlanguagemodel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | 7 | #include "userlanguagemodel.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "constants.h" 21 | #include "historybigram.h" 22 | #include "languagemodel.h" 23 | #include "lm/state.hh" 24 | #include "utils_p.h" 25 | 26 | namespace libime { 27 | 28 | class UserLanguageModelPrivate { 29 | public: 30 | State beginState_; 31 | State nullState_; 32 | bool useOnlyUnigram_ = false; 33 | 34 | HistoryBigram history_; 35 | float weight_ = DEFAULT_USER_LANGUAGE_MODEL_USER_WEIGHT; 36 | // log(wa * exp(a) + wb * exp(b)) 37 | // log(exp(log(wa) + a) + exp(b + log(wb)) 38 | float wa_ = std::log10(1 - weight_), wb_ = std::log10(weight_); 39 | 40 | const WordNode *wordFromState(const State &state) const { 41 | return loadNative(reinterpret_cast( 42 | state.data() + sizeof(lm::ngram::State))); 43 | } 44 | 45 | void setWordToState(State &state, const WordNode *node) const { 46 | storeNative( 47 | reinterpret_cast(state.data() + sizeof(lm::ngram::State)), 48 | node); 49 | } 50 | }; 51 | UserLanguageModel::UserLanguageModel(const char *file) 52 | : UserLanguageModel(std::make_shared(file)) {} 53 | 54 | UserLanguageModel::UserLanguageModel( 55 | std::shared_ptr file) 56 | : LanguageModel(std::move(file)), 57 | d_ptr(std::make_unique()) { 58 | FCITX_D(); 59 | // resize will fill remaining with zero 60 | d->beginState_ = LanguageModel::beginState(); 61 | d->setWordToState(d->beginState_, nullptr); 62 | d->nullState_ = LanguageModel::nullState(); 63 | d->setWordToState(d->nullState_, nullptr); 64 | } 65 | 66 | UserLanguageModel::~UserLanguageModel() {} 67 | 68 | HistoryBigram &UserLanguageModel::history() { 69 | FCITX_D(); 70 | return d->history_; 71 | } 72 | 73 | const HistoryBigram &UserLanguageModel::history() const { 74 | FCITX_D(); 75 | return d->history_; 76 | } 77 | 78 | void UserLanguageModel::load(std::istream &in) { 79 | FCITX_D(); 80 | HistoryBigram history; 81 | history.setUnknownPenalty(d->history_.unknownPenalty()); 82 | history.load(in); 83 | d->history_ = std::move(history); 84 | } 85 | void UserLanguageModel::save(std::ostream &out) { 86 | FCITX_D(); 87 | d->history_.save(out); 88 | } 89 | 90 | void UserLanguageModel::setHistoryWeight(float w) { 91 | FCITX_D(); 92 | assert(w >= 0.0 && w <= 1.0); 93 | d->weight_ = w; 94 | d->wa_ = std::log10(1 - d->weight_); 95 | d->wb_ = std::log10(d->weight_); 96 | } 97 | 98 | const State &UserLanguageModel::beginState() const { 99 | FCITX_D(); 100 | return d->beginState_; 101 | } 102 | 103 | const State &UserLanguageModel::nullState() const { 104 | FCITX_D(); 105 | return d->nullState_; 106 | } 107 | 108 | static const float log_10 = std::log(10); 109 | 110 | // log10(exp10(a) + exp10(b)) 111 | // = log10(exp10(b) * (1 + exp10(a - b))) 112 | // = b + log10(1 + exp10(a - b)) 113 | // = b + log1p(exp10(a - b)) / log(10) 114 | inline float log1p10exp(float x) { 115 | return x < MIN_FLOAT_LOG10 ? 0. : std::log1p(std::pow(10, x)) / log_10; 116 | } 117 | inline float sum_log_prob(float a, float b) { 118 | return a > b ? (a + log1p10exp(b - a)) : (b + log1p10exp(a - b)); 119 | } 120 | 121 | float UserLanguageModel::score(const State &state, const WordNode &word, 122 | State &out) const { 123 | FCITX_D(); 124 | float score; 125 | if (d->useOnlyUnigram_) { 126 | score = LanguageModel::score(d->nullState_, word, out); 127 | } else { 128 | score = LanguageModel::score(state, word, out); 129 | } 130 | const auto *prev = d->wordFromState(state); 131 | float userScore = d->history_.score(prev, &word); 132 | d->setWordToState(out, &word); 133 | return std::max(score, sum_log_prob(score + d->wa_, userScore + d->wb_)); 134 | } 135 | 136 | bool UserLanguageModel::isUnknown(WordIndex idx, std::string_view view) const { 137 | FCITX_D(); 138 | return idx == unknown() && d->history_.isUnknown(view); 139 | } 140 | 141 | float UserLanguageModel::historyWeight() const { 142 | FCITX_D(); 143 | return d->weight_; 144 | } 145 | 146 | void UserLanguageModel::setUseOnlyUnigram(bool useOnlyUnigram) { 147 | FCITX_D(); 148 | d->useOnlyUnigram_ = useOnlyUnigram; 149 | d->history_.setUseOnlyUnigram(useOnlyUnigram); 150 | } 151 | 152 | bool UserLanguageModel::useOnlyUnigram() const { 153 | FCITX_D(); 154 | return d->useOnlyUnigram_; 155 | } 156 | 157 | bool UserLanguageModel::containsNonUnigram( 158 | const std::vector &words) const { 159 | FCITX_D(); 160 | if (words.size() <= 1 || d->useOnlyUnigram_) { 161 | return false; 162 | } 163 | 164 | for (auto iter = words.begin(); iter != std::prev(words.end()); ++iter) { 165 | if (d->history_.containsBigram(*iter, *(std::next(iter)))) { 166 | return true; 167 | } 168 | } 169 | 170 | return LanguageModel::maxNgramLength(words) > 1; 171 | } 172 | 173 | } // namespace libime 174 | -------------------------------------------------------------------------------- /src/libime/pinyin/pinyincontext.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: 2017-2017 CSSlayer 3 | * 4 | * SPDX-License-Identifier: LGPL-2.1-or-later 5 | */ 6 | #ifndef _FCITX_LIBIME_PINYIN_PINYINCONTEXT_H_ 7 | #define _FCITX_LIBIME_PINYIN_PINYINCONTEXT_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace libime { 24 | class PinyinIME; 25 | class PinyinContextPrivate; 26 | enum class PinyinPreeditMode; 27 | 28 | class LIBIMEPINYIN_EXPORT PinyinContext : public InputBuffer { 29 | public: 30 | PinyinContext(PinyinIME *ime); 31 | virtual ~PinyinContext(); 32 | 33 | void setUseShuangpin(bool sp); 34 | bool useShuangpin() const; 35 | 36 | void erase(size_t from, size_t to) override; 37 | void setCursor(size_t pos) override; 38 | 39 | int maxSentenceLength() const; 40 | void setMaxSentenceLength(int length); 41 | 42 | const std::vector &candidates() const; 43 | 44 | /** 45 | * Return the set of candidates, useful for deduplication. 46 | * 47 | * @see PinyinContext::candidates 48 | * @since 1.0.18 49 | */ 50 | const std::unordered_set &candidateSet() const; 51 | 52 | const std::vector &candidatesToCursor() const; 53 | 54 | /** 55 | * Return the set of candidates to current cursor. 56 | * 57 | * @see PinyinContext::candidatesToCursor 58 | * @since 1.0.18 59 | */ 60 | const std::unordered_set &candidatesToCursorSet() const; 61 | void select(size_t idx); 62 | void selectCandidatesToCursor(size_t idx); 63 | void cancel(); 64 | bool cancelTill(size_t pos); 65 | 66 | /** 67 | * Create a custom selection 68 | * 69 | * This allows Engine to do make a custom selection that is not pinyin. 70 | * 71 | * @param inputLength the length of characters to match in the input 72 | * @param segment segment 73 | * @param encodedPinyin whether this segment has a pinyin 74 | * @since 1.1.7 75 | */ 76 | void selectCustom(size_t inputLength, std::string_view segment, 77 | std::string_view encodedPinyin = ""); 78 | 79 | /// Whether the input is fully selected. 80 | bool selected() const; 81 | 82 | /// The sentence for this context, can be used as preedit. 83 | std::string sentence() const { 84 | const auto &c = candidates(); 85 | if (!c.empty()) { 86 | return selectedSentence() + c[0].toString(); 87 | } 88 | return selectedSentence(); 89 | } 90 | 91 | std::string preedit(PinyinPreeditMode mode) const; 92 | 93 | /// Mixed preedit (selected hanzi + pinyin). 94 | std::pair 95 | preeditWithCursor(PinyinPreeditMode mode) const; 96 | 97 | std::string preedit() const; 98 | 99 | /// Mixed preedit (selected hanzi + pinyin). 100 | std::pair preeditWithCursor() const; 101 | 102 | /// Selected hanzi. 103 | std::string selectedSentence() const; 104 | 105 | /// Selected pinyin length. 106 | size_t selectedLength() const; 107 | 108 | /// Selected hanzi segments. 109 | std::vector selectedWords() const; 110 | 111 | /// Selected hanzi with encoded pinyin 112 | std::vector> 113 | selectedWordsWithPinyin() const; 114 | 115 | /// Get the full pinyin string of the selected part. 116 | std::string selectedFullPinyin() const; 117 | 118 | /// Get the full pinyin string of certain candidate. 119 | std::string candidateFullPinyin(size_t i) const; 120 | 121 | /// Get the full pinyin string of certain candidate. 122 | std::string candidateFullPinyin(const SentenceResult &candidate) const; 123 | 124 | /// Add the selected part to history if selected() == true. 125 | void learn(); 126 | 127 | /// Return the position of last pinyin. E.g. 你h|ao, return the offset 128 | /// before h. 129 | int pinyinBeforeCursor() const; 130 | 131 | /// Return the position of last pinyin. E.g. 你h|ao, return the offset after 132 | /// h. 133 | int pinyinAfterCursor() const; 134 | 135 | PinyinIME *ime() const; 136 | 137 | /// Opaque language model state. 138 | State state() const; 139 | 140 | /** 141 | * Set context words for better prediction. 142 | * @param contextWords The context words. 143 | * @since 1.1.13 144 | */ 145 | void setContextWords(const std::vector &contextWords); 146 | 147 | /** 148 | * Clear context words. 149 | * @since 1.1.13 150 | */ 151 | void clearContextWords(); 152 | 153 | /** 154 | * Append context words for better prediction. 155 | * @param contextWords The context words. 156 | * @since 1.1.13 157 | */ 158 | void appendContextWords(const std::vector &contextWords); 159 | 160 | /** 161 | * Get context words for better prediction. 162 | * @return current context words 163 | * @since 1.1.13 164 | */ 165 | std::vector contextWords() const; 166 | 167 | protected: 168 | bool typeImpl(const char *s, size_t length) override; 169 | 170 | private: 171 | void update(); 172 | bool learnWord(); 173 | std::unique_ptr d_ptr; 174 | FCITX_DECLARE_PRIVATE(PinyinContext); 175 | }; 176 | } // namespace libime 177 | 178 | #endif // _FCITX_LIBIME_PINYIN_PINYINCONTEXT_H_ 179 | --------------------------------------------------------------------------------