├── .github ├── codecov.yml ├── ISSUE_TEMPLATE │ ├── 通用咨询.md │ ├── bug-反馈.md │ └── 需求建议.md └── workflows │ └── main.yml ├── .gitattributes ├── .clang-format ├── examples ├── go │ ├── go.mod │ ├── run.sh │ ├── go.sum │ └── main.go ├── cpp │ ├── CMakeLists.txt │ └── main.cc ├── node │ ├── README.md │ ├── package.json │ ├── better-sqlite3.js │ ├── just-task.js │ └── node-sqlite3.js └── python3 │ ├── journal.txt │ └── db_connector.py ├── test ├── main.cpp ├── pinyin_test.cc ├── CMakeLists.txt.in ├── CMakeLists.txt └── tokenizer_test.cc ├── contrib ├── README.md ├── sqlite3 │ ├── .gitignore │ ├── sqlite3_config.h.in │ ├── FindReadline.cmake │ ├── LICENSE │ ├── README.md │ ├── CMakeLists.txt │ └── sqlite3ext.h └── CMakeRC.cmake ├── .gitignore ├── example-jieba.sql ├── LICENSE ├── src ├── simple_tokenizer.h ├── simple_highlight.h ├── CMakeLists.txt ├── entry.cc ├── pinyin.cc ├── simple_tokenizer.cc ├── pinyin.h └── simple_highlight.cc ├── CMakeLists.txt ├── README.md └── example.sql /.github/codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | disable_default_path_fixes: false 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.h linguist-language=C++ 2 | 3 | contrib/** linguist-vendored 4 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | # Use the Google style in this project. 2 | BasedOnStyle: Google 3 | 4 | ColumnLimit: 120 -------------------------------------------------------------------------------- /examples/go/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/wangfenjin/simple/examples/go 2 | 3 | go 1.17 4 | 5 | require github.com/mattn/go-sqlite3 v1.14.12 6 | -------------------------------------------------------------------------------- /test/main.cpp: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | 3 | int main(int argc, char** argv) { 4 | ::testing::InitGoogleTest(&argc, argv); 5 | return RUN_ALL_TESTS(); 6 | } 7 | -------------------------------------------------------------------------------- /examples/go/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | wget -qO- https://github.com/wangfenjin/simple/releases/download/v0.1.0/libsimple-osx-x64.zip | tar xf - 4 | go build --tags fts5 -o gosimple 5 | ./gosimple 6 | -------------------------------------------------------------------------------- /examples/go/go.sum: -------------------------------------------------------------------------------- 1 | github.com/mattn/go-sqlite3 v1.14.12 h1:TJ1bhYJPV44phC+IMu1u2K/i5RriLTPe+yc68XDJ1Z0= 2 | github.com/mattn/go-sqlite3 v1.14.12/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/通用咨询.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 通用咨询 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 提问之前先看一下:https://github.com/wangfenjin/simple/issues/87 11 | -------------------------------------------------------------------------------- /contrib/README.md: -------------------------------------------------------------------------------- 1 | ## 拼音文件 2 | 3 | wget https://raw.githubusercontent.com/mozillazg/pinyin-data/master/pinyin.txt -O pinyin.txt 4 | sed -i '' 's/\(.*\) #.*/\1/g' pinyin.txt 5 | 6 | ## Mobile build 7 | 8 | https://github.com/leetal/ios-cmake 9 | https://github.com/taka-no-me/android-cmake 10 | -------------------------------------------------------------------------------- /examples/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | project(simple_cpp_example) 3 | 4 | include_directories(${SQLITE3_HEADERS_DIR}) 5 | set(SOURCE_FILES main.cc) 6 | 7 | add_executable(simple_cpp_example ${SOURCE_FILES}) 8 | target_link_libraries(simple_cpp_example PUBLIC SQLite3) 9 | install(TARGETS simple_cpp_example DESTINATION bin) 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | a.out 2 | .ccls* 3 | *.so 4 | compile_commands.json 5 | libsimple.* 6 | build/ 7 | build-ios/ 8 | *.gch 9 | bin/ 10 | output/ 11 | output-no-jieba/ 12 | chat.db 13 | examples/node/node_modules/ 14 | examples/node/lib/ 15 | examples/go/gosimple 16 | examples/go/libsimple-osx-x64/ 17 | 18 | # CLion 19 | .idea/ 20 | cmake-build-debug/ 21 | 22 | 23 | .vscode/ -------------------------------------------------------------------------------- /examples/node/README.md: -------------------------------------------------------------------------------- 1 | # node example 2 | 3 | ``` 4 | # update dependency 5 | ncu -u 6 | # install dependency 7 | npm install 8 | # download lib from github 9 | npm run download 10 | # run example using downloaded lib 11 | npm run p 12 | # run example and set the ext_path and dict_path 13 | npm run p -- --ext_path=/path/to/libsimple/ --dict_path=/path/to/dict/ 14 | # remove build folder 15 | npm run clean 16 | ``` 17 | -------------------------------------------------------------------------------- /examples/node/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "clean": "rm -rf ./node_modules ./lib", 4 | "download": "just install", 5 | "p": "node node-sqlite3.js", 6 | "b": "node better-sqlite3.js" 7 | }, 8 | "dependencies": { 9 | "better-sqlite3": "^7.6.2", 10 | "download": "^8.0.0", 11 | "just": "^0.1.8", 12 | "sqlite3": "^5.1.2", 13 | "validator": ">=13.7.0" 14 | }, 15 | "devDependencies": { 16 | "just-scripts": "^2.0.5" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /examples/python3/journal.txt: -------------------------------------------------------------------------------- 1 | '不想上学的举高手', '[emoji:255]\x1a\x0b\x00' 2 | 'Dianjixz', '/笑哭/笑哭' 3 | 'Drifter', 'rv64是啥' 4 | '哦仙人', '等等,现在有啥可以用的RV64?' 5 | 'L0/1/2_泽畔无材', '[QQ红包]' 6 | 'ㅤ', '/魔鬼笑/魔鬼笑/魔鬼笑' 7 | 'qq@Αρηδ', '64位ri scv' 8 | '名字不重要', 'Risc-v64' 9 | 'L0/1/2_泽畔无材', '可以跑linux的' 10 | 'QiqiStudio', '[图片]' 11 | '。。。。。', '难道是k510' 12 | '哦仙人', '最近有出这种板卡么?' 13 | '\u202d\u202d\u202d', '完全没听过 你们太厉害了' 14 | '林夕木易', '[图片]' 15 | '林夕木易', '正在编译' 16 | 'qq@Αρηδ', 'riscv的单片机 性能很垃圾。高端的不知道性能如何。' 17 | -------------------------------------------------------------------------------- /test/pinyin_test.cc: -------------------------------------------------------------------------------- 1 | #include "pinyin.h" 2 | 3 | #include "gtest/gtest.h" 4 | 5 | using namespace simple_tokenizer; 6 | 7 | TEST(simple, pinyin_split) { 8 | PinYin* pinyin = new PinYin(); 9 | auto res = pinyin->split_pinyin("a"); 10 | ASSERT_EQ(res.size(), 1); 11 | res = pinyin->split_pinyin("ba"); 12 | ASSERT_EQ(res.size(), 2); 13 | res = pinyin->split_pinyin("zhangliangy"); 14 | ASSERT_EQ(res.size(), 4); 15 | for (auto r : res) std::cout << r << "\t"; 16 | std::cout << std::endl; 17 | } 18 | -------------------------------------------------------------------------------- /test/CMakeLists.txt.in: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | cmake_policy(SET CMP0057 NEW) 3 | 4 | project(googletest-download NONE) 5 | 6 | include(ExternalProject) 7 | ExternalProject_Add(googletest 8 | URL https://github.com/google/googletest/archive/release-1.10.0.zip 9 | SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/googletest-src" 10 | BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/googletest-build" 11 | CONFIGURE_COMMAND "" 12 | BUILD_COMMAND "" 13 | INSTALL_COMMAND "" 14 | TEST_COMMAND "" 15 | LOG_DOWNLOAD OFF 16 | ) 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-反馈.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug 反馈 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Bug 描述** 11 | A clear and concise description of what the bug is. 12 | 13 | **重现步骤** 14 | 有问题的完整的 SQL 15 | 16 | **与预期不符合的地方** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **SQL 执行截图** 20 | If applicable, add screenshots to help explain your problem. 21 | 22 | **环境信息** 23 | - 操作系统: 24 | - 有问题的软件版本: 25 | 26 | **其他** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/需求建议.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 需求建议 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /examples/go/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "log" 7 | 8 | "github.com/mattn/go-sqlite3" 9 | ) 10 | 11 | func main() { 12 | sql.Register("sqlite3_simple", 13 | &sqlite3.SQLiteDriver{ 14 | Extensions: []string{ 15 | "libsimple-osx-x64/libsimple", 16 | }, 17 | }) 18 | 19 | db, err := sql.Open("sqlite3_simple", ":memory:") 20 | if err != nil { 21 | log.Fatal("open error: ", err) 22 | } 23 | defer db.Close() 24 | 25 | // db.Exec("create virtual table repo using github(id, full_name, description, html_url)") 26 | 27 | rows, err := db.Query(`select simple_query('pinyin')`) 28 | if err != nil { 29 | log.Fatal("query error: ", err) 30 | } 31 | defer rows.Close() 32 | for rows.Next() { 33 | var query string 34 | rows.Scan(&query) 35 | fmt.Printf("%s\n", query) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /contrib/sqlite3/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore all subdirectories 2 | **/ 3 | 4 | # YouCompleteMe 5 | .ycm_extra_conf.* 6 | 7 | # Object files 8 | *.o 9 | *.ko 10 | *.obj 11 | *.elf 12 | *.user 13 | *.autosave 14 | *.~ 15 | *.bak 16 | *.log 17 | *.swp 18 | 19 | # Precompiled Headers 20 | *.gch 21 | *.pch 22 | 23 | # Libraries 24 | *.lib 25 | *.a 26 | *.la 27 | *.lo 28 | 29 | # Shared objects (inc. Windows DLLs) 30 | *.dll 31 | *.so 32 | *.so.* 33 | *.dylib 34 | 35 | # Executables 36 | *.exe 37 | *.out 38 | *.app 39 | *.i*86 40 | *.x86_64 41 | *.hex 42 | 43 | # build systems 44 | Makefile 45 | *.make 46 | # Visual studio 47 | *.htm 48 | *.html 49 | *.ncb 50 | *.idb 51 | *.suo 52 | *.pdb 53 | *.vcxproj.filters 54 | *.vcproj 55 | *.vcxproj 56 | *.sln 57 | *.rc 58 | 59 | 60 | # Debug files 61 | *.dSYM/ 62 | 63 | # OSX shit 64 | .DS_Store 65 | .directory 66 | -------------------------------------------------------------------------------- /example-jieba.sql: -------------------------------------------------------------------------------- 1 | select ''; 2 | select ''; 3 | select '--------------------------------------------------------------------------------'; 4 | select '使用jieba分词:'; 5 | -- will match 6 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match simple_query('国中woai'); 7 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match jieba_query('中国woai'); 8 | -- will not match, in jieba_query, the order matters 9 | select ' !!!! should not match', simple_highlight(t1, 0, '[', ']') from t1 where x match simple_query('国中woai', 0); 10 | select ' !!!! should not match', simple_highlight(t1, 0, '[', ']') from t1 where x match jieba_query('国中woai'); 11 | select ' !!!! should not match', simple_highlight(t1, 0, '[', ']') from t1 where x match jieba_query('中国woai', 0); 12 | select ' !!!! should not match', simple_highlight(t1, 0, '[', ']') from t1 where x match jieba_query('aiwo', 0); 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Wang Fenjin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /contrib/sqlite3/sqlite3_config.h.in: -------------------------------------------------------------------------------- 1 | /* the compile-time options used to build SQLite3 library 2 | * 3 | * https://github.com/azadkuh/sqlite-amalgamation 4 | */ 5 | 6 | #ifndef SQLITE3_CONFIG_H 7 | #define SQLITE3_CONFIG_H 8 | 9 | 10 | #cmakedefine SQLITE_ENABLE_DBSTAT_VTAB 11 | #cmakedefine SQLITE_ENABLE_FTS3 12 | #cmakedefine SQLITE_ENABLE_FTS4 13 | #cmakedefine SQLITE_ENABLE_FTS5 14 | #cmakedefine SQLITE_ENABLE_GEOPOLY 15 | #cmakedefine SQLITE_ENABLE_ICU 16 | #cmakedefine SQLITE_ENABLE_JSON1 17 | #cmakedefine SQLITE_ENABLE_RBU 18 | #cmakedefine SQLITE_ENABLE_RTREE 19 | 20 | #cmakedefine SQLITE_RECOMMENDED_OPTIONS 21 | #if defined(SQLITE_RECOMMENDED_OPTIONS) 22 | # define SQLITE_DQS=0 23 | # define SQLITE_DEFAULT_MEMSTATUS=0 24 | # define SQLITE_DEFAULT_WAL_SYNCHRONOUS=1 25 | # define SQLITE_LIKE_DOESNT_MATCH_BLOBS 26 | # define SQLITE_MAX_EXPR_DEPTH=0 27 | # define SQLITE_OMIT_DECLTYPE 28 | # define SQLITE_OMIT_DEPRECATED 29 | # define SQLITE_OMIT_PROGRESS_CALLBACK 30 | # define SQLITE_OMIT_SHARED_CACHE 31 | # define SQLITE_USE_ALLOCA 32 | #endif /* SQLITE_RECOMMENDED_OPTIONS */ 33 | 34 | 35 | #endif /* SQLITE3_CONFIG_H */ 36 | -------------------------------------------------------------------------------- /examples/node/better-sqlite3.js: -------------------------------------------------------------------------------- 1 | var path = require("path"); 2 | const process = require( 'process' ); 3 | const argv = key => { 4 | // Return true if the key exists and a value is defined 5 | if ( process.argv.includes( `--${ key }` ) ) return true; 6 | const value = process.argv.find( element => element.startsWith( `--${ key }=` ) ); 7 | // Return null if the key does not exist and a value is not defined 8 | if ( !value ) return null; 9 | return value.replace( `--${ key }=` , '' ); 10 | } 11 | 12 | var ext_path = path.resolve("./lib/"); 13 | if (argv('ext_path')) { 14 | ext_path = argv('ext_path'); 15 | } 16 | var dict_path = path.join(ext_path, "dict"); 17 | if (argv('dict_path')) { 18 | dict_path = argv('dict_path'); 19 | } 20 | console.log("extension path: " + ext_path + ", dict path: " + dict_path); 21 | 22 | const db = require('better-sqlite3')(':memory:', { verbose: console.log }); 23 | db.loadExtension(path.join(ext_path, 'libsimple')); 24 | // test simple_query 25 | const row = db.prepare('select simple_query(\'pinyin\') as query').get(); 26 | console.log(row.query); 27 | 28 | // set the jieba dict file path 29 | db.prepare("select jieba_dict(?)").run(dict_path); 30 | -------------------------------------------------------------------------------- /contrib/sqlite3/FindReadline.cmake: -------------------------------------------------------------------------------- 1 | # https://dailycommit.blogspot.com/2016/08/how-to-search-for-readline-library-with.html 2 | # Search for the path containing library's headers 3 | find_path(Readline_ROOT_DIR 4 | NAMES include/readline/readline.h 5 | ) 6 | 7 | # Search for include directory 8 | find_path(Readline_INCLUDE_DIR 9 | NAMES readline/readline.h 10 | HINTS ${Readline_ROOT_DIR}/include 11 | ) 12 | 13 | # Search for library 14 | find_library(Readline_LIBRARY 15 | NAMES readline 16 | HINTS ${Readline_ROOT_DIR}/lib 17 | ) 18 | 19 | # Conditionally set READLINE_FOUND value 20 | if(Readline_INCLUDE_DIR AND Readline_LIBRARY 21 | AND Ncurses_LIBRARY) 22 | set(READLINE_FOUND TRUE) 23 | else(Readline_INCLUDE_DIR AND Readline_LIBRARY 24 | AND Ncurses_LIBRARY) 25 | FIND_LIBRARY(Readline_LIBRARY NAMES readline) 26 | include(FindPackageHandleStandardArgs) 27 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(Readline DEFAULT_MSG 28 | Readline_INCLUDE_DIR Readline_LIBRARY ) 29 | MARK_AS_ADVANCED(Readline_INCLUDE_DIR Readline_LIBRARY) 30 | endif(Readline_INCLUDE_DIR AND Readline_LIBRARY 31 | AND Ncurses_LIBRARY) 32 | 33 | # Hide these variables in cmake GUIs 34 | mark_as_advanced( 35 | Readline_ROOT_DIR 36 | Readline_INCLUDE_DIR 37 | Readline_LIBRARY 38 | ) -------------------------------------------------------------------------------- /contrib/sqlite3/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, amir zamani 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of sqlite-amalgamation nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /examples/node/just-task.js: -------------------------------------------------------------------------------- 1 | const { task, logger } = require("just-scripts"); 2 | const download = require("download"); 3 | const path = require("path"); 4 | task("install", () => { 5 | return new Promise((resolve, reject) => { 6 | const localPath = path.join(__dirname, "lib"); 7 | var platform = process.env.npm_config_target_platform || process.platform; 8 | var arch = process.env.npm_config_target_arch || process.arch; 9 | 10 | let downloadUrl = `https://github.com/wangfenjin/simple/releases/latest/download/libsimple-linux-ubuntu-18.04.zip`; 11 | if (platform === "darwin") { 12 | platform = "osx"; 13 | downloadUrl = `https://github.com/wangfenjin/simple/releases/latest/download/libsimple-osx-x64.zip`; 14 | } else if (platform === "win32") { 15 | platform = "windows"; 16 | if (arch === "x64") { 17 | downloadUrl = `https://github.com/wangfenjin/simple/releases/latest/download/libsimple-windows-x64.zip`; 18 | } else { 19 | downloadUrl = `https://github.com/wangfenjin/simple/releases/latest/download/libsimple-windows-x86.zip`; 20 | } 21 | } 22 | 23 | logger.info(`[install] Target platform: -${platform}-`); 24 | logger.info(`[install] Target arch: ${arch}`); 25 | logger.info(`[install] Download prebuilt binaries from ${downloadUrl}`); 26 | 27 | 28 | download(downloadUrl, localPath, { 29 | extract: true, 30 | strip: 1, 31 | }) 32 | .then(() => { 33 | resolve(); 34 | }) 35 | .catch((err) => { 36 | logger.warn( 37 | `[install] Failed to download package from: ${downloadUrl}, err: ${err}` 38 | ); 39 | reject(); 40 | }); 41 | }); 42 | }); 43 | -------------------------------------------------------------------------------- /src/simple_tokenizer.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLE_TOKENIZER_H_ 2 | #define SIMPLE_TOKENIZER_H_ 3 | 4 | #include 5 | 6 | #ifdef USE_JIEBA 7 | #include "cppjieba/Jieba.hpp" 8 | #endif 9 | #include "pinyin.h" 10 | #include "sqlite3ext.h" 11 | 12 | typedef int (*xTokenFn)(void *, int, const char *, int, int, int); 13 | 14 | namespace simple_tokenizer { 15 | 16 | #ifdef USE_JIEBA 17 | extern std::string jieba_dict_path; 18 | #endif 19 | 20 | enum class TokenCategory { 21 | SPACE, 22 | ASCII_ALPHABETIC, 23 | DIGIT, 24 | OTHER, 25 | }; 26 | 27 | class SimpleTokenizer { 28 | private: 29 | static PinYin *get_pinyin(); 30 | bool enable_pinyin = true; 31 | 32 | public: 33 | SimpleTokenizer(const char **zaArg, int nArg); 34 | int tokenize(void *pCtx, int flags, const char *text, int textLen, xTokenFn xToken) const; 35 | static std::string tokenize_query(const char *text, int textLen, int flags = 1); 36 | #ifdef USE_JIEBA 37 | static std::string tokenize_jieba_query(const char *text, int textLen, int flags = 1); 38 | #endif 39 | 40 | private: 41 | static void append_result(std::string &result, std::string part, TokenCategory category, int offset, int flags); 42 | }; 43 | 44 | } // namespace simple_tokenizer 45 | 46 | extern "C" int fts5_simple_xCreate(void *sqlite3, const char **azArg, int nArg, Fts5Tokenizer **ppOut); 47 | extern "C" int fts5_simple_xTokenize(Fts5Tokenizer *tokenizer_ptr, void *pCtx, int flags, const char *pText, int nText, 48 | xTokenFn xToken); 49 | extern "C" void fts5_simple_xDelete(Fts5Tokenizer *tokenizer_ptr); 50 | 51 | extern "C" int sqlite3_simple_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi); 52 | 53 | #endif // SIMPLE_TOKENIZER_H_ 54 | -------------------------------------------------------------------------------- /src/simple_highlight.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLE_HIGHLIGHT_H_ 2 | #define SIMPLE_HIGHLIGHT_H_ 3 | 4 | #include "sqlite3ext.h" 5 | 6 | extern "C" void simple_highlight(const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 7 | Fts5Context *pFts, /* First arg to pass to pApi functions */ 8 | sqlite3_context *pCtx, /* Context for returning result/error */ 9 | int nVal, /* Number of values in apVal[] array */ 10 | sqlite3_value **apVal /* Array of trailing arguments */ 11 | ); 12 | 13 | extern "C" void simple_snippet(const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 14 | Fts5Context *pFts, /* First arg to pass to pApi functions */ 15 | sqlite3_context *pCtx, /* Context for returning result/error */ 16 | int nVal, /* Number of values in apVal[] array */ 17 | sqlite3_value **apVal /* Array of trailing arguments */ 18 | ); 19 | 20 | extern "C" void simple_highlight_pos(const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 21 | Fts5Context *pFts, /* First arg to pass to pApi functions */ 22 | sqlite3_context *pCtx, /* Context for returning result/error */ 23 | int nVal, /* Number of values in apVal[] array */ 24 | sqlite3_value **apVal /* Array of trailing arguments */ 25 | ); 26 | 27 | #endif // SIMPLE_HIGHLIGHT_H_ 28 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | project(simple CXX) 3 | 4 | if(SIMPLE_WITH_JIEBA) 5 | include(ExternalProject) 6 | ExternalProject_Add( 7 | cppjieba 8 | PREFIX ${CMAKE_BINARY_DIR}/cppjieba 9 | GIT_REPOSITORY https://github.com/yanyiwu/cppjieba.git 10 | CONFIGURE_COMMAND "" 11 | GIT_TAG 194c144d8b5ed1baf3190d07c5226e804454ab47 12 | BUILD_COMMAND cmake -E echo "Skipping build cppjieba." 13 | INSTALL_COMMAND cmake -E echo "Skipping install cppjieba." 14 | LOG_DOWNLOAD ON 15 | ) 16 | ExternalProject_Get_Property(cppjieba source_dir) 17 | endif() 18 | 19 | set(SOURCE_FILES 20 | pinyin.h 21 | simple_highlight.h 22 | simple_tokenizer.h 23 | pinyin.cc 24 | simple_highlight.cc 25 | simple_tokenizer.cc 26 | entry.cc 27 | ) 28 | 29 | OPTION(BUILD_STATIC "Option to build static lib" OFF) 30 | if (IOS OR BUILD_STATIC) 31 | # iOS only support static library. 32 | add_library(simple STATIC ${SOURCE_FILES}) 33 | else() 34 | add_library(simple SHARED ${SOURCE_FILES}) 35 | endif() 36 | 37 | if(SIMPLE_WITH_JIEBA) 38 | add_dependencies(simple cppjieba) 39 | include_directories(${SQLITE3_HEADERS_DIR} ${source_dir}/include ${source_dir}/deps) 40 | target_include_directories(simple INTERFACE ${SQLITE3_HEADERS_DIR} ${source_dir}/include ${source_dir}/deps) 41 | # for tests only 42 | add_custom_command(TARGET simple PRE_BUILD 43 | COMMAND ${CMAKE_COMMAND} -E copy_directory 44 | ${source_dir}/dict/ $/../test/dict/) 45 | INSTALL(DIRECTORY ${source_dir}/dict/ DESTINATION bin/dict FILES_MATCHING PATTERN "*.utf8") 46 | else() 47 | include_directories(${SQLITE3_HEADERS_DIR}) 48 | target_include_directories(simple INTERFACE ${SQLITE3_HEADERS_DIR}) 49 | endif() 50 | 51 | target_link_libraries(simple PUBLIC coverage_config PRIVATE PINYIN_TEXT SQLite3) 52 | 53 | install(TARGETS simple DESTINATION bin) 54 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | project(simple_tests) 3 | 4 | # https://github.com/maps-gpu/MAPS/issues/7#issuecomment-418200278 5 | cmake_policy(SET CMP0057 NEW) 6 | 7 | # https://github.com/google/googletest/issues/2791 8 | set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/src) 9 | 10 | # Download and unpack googletest at configure time 11 | configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt) 12 | execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . 13 | RESULT_VARIABLE result 14 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) 15 | if(result) 16 | message(FATAL_ERROR "CMake step for googletest failed: ${result}") 17 | endif() 18 | execute_process(COMMAND ${CMAKE_COMMAND} --build . 19 | RESULT_VARIABLE result 20 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download ) 21 | if(result) 22 | message(FATAL_ERROR "Build step for googletest failed: ${result}") 23 | endif() 24 | 25 | # Prevent overriding the parent project's compiler/linker 26 | # settings on Windows 27 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 28 | 29 | # Add googletest directly to our build. This defines 30 | # the gtest and gtest_main targets. 31 | add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src 32 | ${CMAKE_CURRENT_BINARY_DIR}/googletest-build 33 | EXCLUDE_FROM_ALL) 34 | 35 | # The gtest/gtest_main targets carry header search path 36 | # dependencies automatically when using CMake 2.8.11 or 37 | # later. Otherwise we have to add them here ourselves. 38 | if (CMAKE_VERSION VERSION_LESS 2.8.11) 39 | include_directories("${gtest_SOURCE_DIR}/include") 40 | endif() 41 | 42 | include_directories(${SIMPLE_HEADERS_DIR}) 43 | include_directories(${SQLITE3_HEADERS_DIR}) 44 | set(SOURCE_FILES main.cpp pinyin_test.cc tokenizer_test.cc) 45 | 46 | include(GoogleTest) 47 | add_executable(simple_tests ${SOURCE_FILES}) 48 | target_link_libraries(simple_tests PUBLIC coverage_config PRIVATE gtest_main simple) 49 | target_compile_options(simple_tests PUBLIC -Wall -pedantic) 50 | # install(TARGETS simple_tests DESTINATION bin) 51 | gtest_add_tests(TARGET simple_tests ${SOURCE_FILES}) 52 | -------------------------------------------------------------------------------- /examples/node/node-sqlite3.js: -------------------------------------------------------------------------------- 1 | var path = require("path"); 2 | var sqlite3 = require('sqlite3').verbose(); 3 | var db = new sqlite3.Database(':memory:'); 4 | 5 | const process = require( 'process' ); 6 | 7 | const argv = key => { 8 | // Return true if the key exists and a value is defined 9 | if ( process.argv.includes( `--${ key }` ) ) return true; 10 | const value = process.argv.find( element => element.startsWith( `--${ key }=` ) ); 11 | // Return null if the key does not exist and a value is not defined 12 | if ( !value ) return null; 13 | return value.replace( `--${ key }=` , '' ); 14 | } 15 | 16 | db.serialize(function() { 17 | var ext_path = path.resolve("./lib/"); 18 | if (argv('ext_path')) { 19 | ext_path = argv('ext_path'); 20 | } 21 | var dict_path = "./lib/dict"; 22 | if (argv('dict_path')) { 23 | dict_path = argv('dict_path'); 24 | } 25 | console.log("extension path: " + ext_path + ", dict path: " + dict_path); 26 | // load extension 27 | var platform = process.env.npm_config_target_platform || process.platform 28 | if (platform === 'win32') { 29 | db.loadExtension(path.join(ext_path, "simple")); 30 | } else { 31 | db.loadExtension(path.join(ext_path, "libsimple")); 32 | } 33 | // set the jieba dict file path 34 | db.run("select jieba_dict(?)", dict_path); 35 | // create table 36 | db.run("CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'simple')"); 37 | // insert some data 38 | db.run("insert into t1(x) values ('周杰伦 Jay Chou:我已分不清,你是友情还是错过的爱情'), ('周杰伦 Jay Chou:最美的不是下雨天,是曾与你躲过雨的屋檐'), ('I love China! 我爱中国!我是中华人民共和国公民!'), ('@English &special _characters.\"''bacon-&and''-eggs%')"); 39 | 40 | // with match 周杰伦 41 | db.each("select rowid as id, simple_highlight(t1, 0, '[', ']') as info from t1 where x match simple_query('zjl')", function(err, row) { 42 | console.log(row.id + ": " + row.info); 43 | }); 44 | // will match 中国 and 中华人民共和国 45 | db.each("select rowid as id, simple_highlight(t1, 0, '[', ']') as info from t1 where x match simple_query('中国')", function(err, row) { 46 | console.log(row.id + ": " + row.info); 47 | }); 48 | // will match 中国 but not 中华人民共和国 49 | db.each("select rowid as id, simple_highlight(t1, 0, '[', ']') as info from t1 where x match jieba_query('中国')", function(err, row) { 50 | console.log(row.id + ": " + row.info); 51 | }); 52 | }); 53 | 54 | db.close(); 55 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | 3 | set(CMAKE_CXX_STANDARD 14) 4 | set(CXX_STANDARD_REQUIRED) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") 6 | if (APPLE) 7 | set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64" CACHE STRING "") 8 | set(CMAKE_OSX_DEPLOYMENT_TARGET "10.11" CACHE STRING "Minimum OS X deployment version") 9 | endif() 10 | 11 | if (IOS) 12 | list(APPEND CMAKE_ARGS 13 | "-DPLATFORM=${PLATFORM}" 14 | ) 15 | endif() 16 | 17 | # put project after CMAKE_OSX_DEPLOYMENT_TARGET 18 | project(simple-tokenizer) 19 | 20 | if (MSVC) 21 | # https://stackoverflow.com/a/65128497/1203241 22 | set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE) 23 | set(BUILD_SHARED_LIBS TRUE) 24 | add_compile_options("$<$:/utf-8>") 25 | endif() 26 | 27 | set(SQLITE3_HEADERS_DIR ${PROJECT_SOURCE_DIR}/contrib/sqlite3) 28 | set(SIMPLE_HEADERS_DIR ${PROJECT_SOURCE_DIR}/src) 29 | 30 | # setup sqlite 31 | option(BUILD_SQLITE3 "Enable build SQLite3" ON) 32 | if(BUILD_SQLITE3) 33 | set(SQLITE_ROOT ${PROJECT_SOURCE_DIR}/build/sqlite) 34 | set(SQLITE_INSTALL_DIR ${SQLITE_ROOT}) 35 | set(SQLITE_INCLUDE_DIR ${SQLITE_INSTALL_DIR}/include) 36 | set(SQLITE_LIB_DIR ${SQLITE_INSTALL_DIR}/lib) 37 | if (NOT IOS) 38 | option(SQLITE_ENABLE_FTS5 "enables full text searches version 5" ON) 39 | option(BUILD_SHELL "build SQLite3 shell application" ON) 40 | add_subdirectory(contrib/sqlite3) 41 | endif() 42 | endif() 43 | 44 | include(contrib/CMakeRC.cmake) 45 | cmrc_add_resource_library(PINYIN_TEXT TYPE OBJECT NAMESPACE pinyin_text contrib/pinyin.txt) 46 | # https://github.com/vector-of-bool/cmrc/issues/17#issuecomment-659501280 47 | set_property(TARGET PINYIN_TEXT PROPERTY POSITION_INDEPENDENT_CODE ON) 48 | 49 | # Code Coverage Configuration 50 | if(NOT TARGET coverage_config) 51 | add_library(coverage_config INTERFACE) 52 | endif() 53 | option(CODE_COVERAGE "Enable coverage reporting" OFF) 54 | # if(CODE_COVERAGE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") 55 | if(CODE_COVERAGE) 56 | # Add required flags (GCC & LLVM/Clang) 57 | target_compile_options(coverage_config INTERFACE 58 | -O0 # no optimization 59 | -g # generate debug info 60 | --coverage # sets all required flags 61 | ) 62 | if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.13) 63 | target_link_options(coverage_config INTERFACE --coverage) 64 | else() 65 | target_link_libraries(coverage_config INTERFACE --coverage) 66 | endif() 67 | endif(CODE_COVERAGE) 68 | # endif(CODE_COVERAGE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") 69 | 70 | # https://stackoverflow.com/a/15212881/1203241 71 | OPTION(SIMPLE_WITH_JIEBA "Option to build with cppjieba" ON) 72 | if(SIMPLE_WITH_JIEBA) 73 | add_definitions(-DUSE_JIEBA=1) 74 | endif() 75 | 76 | add_subdirectory(src) 77 | 78 | OPTION(BUILD_TEST_EXAMPLE "Option to build tests and examples" ON) 79 | if (NOT IOS AND BUILD_TEST_EXAMPLE) 80 | add_subdirectory(examples/cpp) 81 | enable_testing() 82 | add_subdirectory(test) 83 | endif() 84 | 85 | -------------------------------------------------------------------------------- /contrib/sqlite3/README.md: -------------------------------------------------------------------------------- 1 | # sqlite-amalgamation 2 | - [Release History](https://www.sqlite.org/changes.html) 3 | - [Chronology](https://www.sqlite.org/chronology.html) 4 | 5 | This repository mirrors the [SQLite](http://www.sqlite.org/download.html) 6 | amalgamation, which is the recommended method of building SQLite into larger 7 | projects. 8 | It also supports `cmake` for building, installing and exporting. 9 | 10 | SQLite includes more than 100 files in `*.c` / `*.h`, but 11 | > The [amalgamation](http://www.sqlite.org/amalgamation.html) contains 12 | > everything you need to integrate SQLite into a larger project. Just copy the 13 | > amalgamation into your source directory and compile it along with the other C 14 | > code files in your project. 15 | > ([A more detailed discussion](http://www.sqlite.org/howtocompile.html) of the 16 | > compilation process is available.) You may also want to make use of 17 | > the "sqlite3.h" header file that defines the programming API for SQLite. The 18 | > sqlite3.h header file is available separately. The sqlite3.h file is also 19 | > contained within the amalgamation, in the first few thousand lines. So if you 20 | > have a copy of sqlite3.c but cannot seem to locate sqlite3.h, you can always 21 | > regenerate the sqlite3.h by copying and pasting from the amalgamation. 22 | 23 | ![SQLite3](http://www.sqlite.org/images/sqlite370_banner.gif) 24 | 25 | 26 | ## build / install 27 | A static lib (`libsqlite3`) and the sqlite3 shell will be generated by the build 28 | system. 29 | 30 | ```bash 31 | $> mkdir .build 32 | $> cd .build 33 | $> cmake /path/to/this/repo # or cmake .. -G Ninja 34 | $> ccmake . # for build options or cmake-gui . 35 | $> make -j 2 # or ninja 36 | 37 | $> make install 38 | ``` 39 | 40 | ## usage 41 | to integrate this library into your project simply add these lines to your project 42 | `cmake`: 43 | ```cmake 44 | find_package(SQLite3 REQUIRED CONFIG) 45 | target_link_libraries(${PROJECT_NAME} SQLite::SQLite3) 46 | ``` 47 | 48 | the include directory and link library will be automatically added to your target. 49 | If you need to switch your project to use "standard" SQLite remove CONFIG option 50 | in `find_package` function call. 51 | 52 | ## SQLite3 build options 53 | `SQLite3` comes with plenty of 54 | [compile options](https://www.sqlite.org/compile.html) 55 | 56 | following cmake build options control some of those compile options: 57 | 58 | | options | default | 59 | | :-- | :-- | 60 | | `SQLITE_ENABLE_JSON1` | on | 61 | | `SQLITE_ENABLE_DBSTAT_VTAB` | off | 62 | | `SQLITE_ENABLE_FTS3` | off | 63 | | `SQLITE_ENABLE_FTS4` | off | 64 | | `SQLITE_ENABLE_FTS5` | off | 65 | | `SQLITE_ENABLE_GEOPOLY` | off | 66 | | `SQLITE_ENABLE_ICU` | off | 67 | | `SQLITE_ENABLE_RBU` | off | 68 | | `SQLITE_ENABLE_RTREE` | off | 69 | 70 | these **recommended** compile options are also passed to the compiler by 71 | `SQLITE_RECOMMENDED_OPTIONS` (on by default): 72 | 73 | | options | 74 | | :-- | 75 | | SQLITE_DQS = 0 | 76 | | SQLITE_DEFAULT_MEMSTATUS = 0 | 77 | | SQLITE_DEFAULT_WAL_SYNCHRONOUS = 1 | 78 | | SQLITE_MAX_EXPR_DEPTH = 0 | 79 | | SQLITE_LIKE_DOESNT_MATCH_BLOBS | 80 | | SQLITE_OMIT_DECLTYPE | 81 | | SQLITE_OMIT_DEPRECATED | 82 | | SQLITE_OMIT_PROGRESS_CALLBACK | 83 | | SQLITE_OMIT_SHARED_CACHE | 84 | | SQLITE_USE_ALLOCA | 85 | 86 | all compile-time options will go into `sqlite3_config.h`, you may 87 | use this file to check these options when building your application. 88 | 89 | the SQLite3 shell (executable) is disabled by default, to build it just 90 | activate the `BUILD_SHELL` option. 91 | 92 | 93 | -------------------------------------------------------------------------------- /examples/python3/db_connector.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | import sqlite3 3 | from sqlite3.dbapi2 import Cursor 4 | 5 | # need to run with python3 6 | # example modified from https://github.com/wangfenjin/simple/issues/56 7 | 8 | # https://www.sqlite.org/fts5.html#external_content_tables 9 | TABLE_SQL = "CREATE TABLE IF NOT EXISTS chat (_id INTEGER PRIMARY KEY AUTOINCREMENT,sender_nickname TEXT,data BLOB)" 10 | SEARCH_TABLE_SQL = "CREATE VIRTUAL TABLE IF NOT EXISTS chat_fts USING fts5(sender_nickname,data, content=chat, content_rowid=_id, tokenize='simple');" 11 | # In order to use this command to delete a row, the text value 'delete' must be inserted into the special column with the same name as the table. The rowid of the row to delete is inserted into the rowid column. 12 | TRIGGER_SQL = """ 13 | CREATE TRIGGER IF NOT EXISTS chat_fts_i AFTER INSERT ON chat BEGIN 14 | INSERT INTO chat_fts(rowid, sender_nickname, data) VALUES (new._id, new.sender_nickname, new.data); 15 | END; 16 | CREATE TRIGGER IF NOT EXISTS chat_fts_d AFTER DELETE ON chat BEGIN 17 | INSERT INTO chat_fts(chat_fts, rowid, sender_nickname, data) VALUES('delete', old._id, old.sender_nickname, old.data); 18 | END; 19 | CREATE TRIGGER IF NOT EXISTS chat_fts_u AFTER UPDATE ON chat BEGIN 20 | INSERT INTO chat_fts(chat_fts, rowid, sender_nickname, data) VALUES('delete', old._id, old.sender_nickname, old.data); 21 | INSERT INTO chat_fts(rowid, sender_nickname, data) VALUES (new._id, new.sender_nickname, new.data); 22 | END; 23 | """ 24 | INSERT_SQL = """ 25 | INSERT INTO chat(sender_nickname, data) VALUES(?, ?); 26 | """ 27 | SEARCH_SQL = "SELECT rowid,sender_nickname,data FROM chat_fts where chat_fts match ? ORDER BY rank;" 28 | 29 | class ftsDB: 30 | # NOTE: you need to set the ext_path 31 | def __init__(self, path, ext_path) -> None: 32 | # create database file if not exist 33 | self.db = sqlite3.connect(path) 34 | # create table if table not exist 35 | self.init_db(ext_path) 36 | 37 | def __del__(self): 38 | if hasattr(self, 'db') and self.db: 39 | self.db.commit() 40 | self.db.close() 41 | del self.db 42 | 43 | def init_db(self, ext_path): 44 | self.db.enable_load_extension(True) 45 | self.db.load_extension(ext_path) 46 | cursor = self.db.cursor() 47 | cursor.execute(TABLE_SQL) 48 | cursor.execute(SEARCH_TABLE_SQL) 49 | cursor.executescript(TRIGGER_SQL) 50 | self.commit() 51 | self.cursor = cursor 52 | 53 | def search(self, query: str): 54 | return self.cursor.execute(SEARCH_SQL, query) 55 | 56 | def insert(self, *data): 57 | # insert a line of data 58 | self.cursor.execute(INSERT_SQL, data) 59 | 60 | def bulk_insert(self, datas): 61 | # insert bulk data 62 | self.cursor.executemany(INSERT_SQL, datas) 63 | 64 | def commit(self): 65 | self.db.commit() 66 | 67 | def process_journal(self, filename, offset=0): 68 | with open(filename, 'r', encoding='utf-8') as f: 69 | if offset >= 0: 70 | f.seek(offset) 71 | while True: 72 | try: 73 | line = f.readline() 74 | if not line: 75 | break 76 | line = eval(line) 77 | self.insert(*line) 78 | except Exception as e: 79 | print(repr(line)) 80 | print(f"========pos: {offset}======") 81 | raise e 82 | finally: 83 | self.commit() 84 | offset = f.tell() 85 | 86 | if __name__ == '__main__': 87 | import os, sys 88 | 89 | assert(len(sys.argv) == 2) 90 | # sys.argv[1] should be the extension's path, `/path/to/libsimple` 91 | # without file extension such as so, dylib or dll 92 | db = ftsDB(':memory:', sys.argv[1]) 93 | db.process_journal(os.path.dirname(os.path.abspath(__file__)) + "/journal.txt") 94 | matched_row_count = 0 95 | for r in db.search('不'): 96 | matched_row_count += 1 97 | print(r) 98 | assert(matched_row_count == 3) 99 | db.__del__() 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Downloads](https://img.shields.io/github/downloads/wangfenjin/simple/total)](https://img.shields.io/github/downloads/wangfenjin/simple/total) 2 | [![build](https://github.com/wangfenjin/simple/workflows/CI/badge.svg)](https://github.com/wangfenjin/simple/actions?query=workflow%3ACI) 3 | [![codecov](https://codecov.io/gh/wangfenjin/simple/branch/master/graph/badge.svg?token=8SHLFZ3RB4)](https://codecov.io/gh/wangfenjin/simple) 4 | [![CodeFactor](https://www.codefactor.io/repository/github/wangfenjin/simple/badge)](https://www.codefactor.io/repository/github/wangfenjin/simple) 5 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/wangfenjin/simple/blob/master/LICENSE) 6 | 7 | # Simple tokenizer 8 | 9 | simple 是一个支持中文和拼音的 [sqlite3 fts5](https://www.sqlite.org/fts5.html) 拓展。它完整提供了 [微信移动端的全文检索多音字问题解决方案](https://cloud.tencent.com/developer/article/1198371) 一文中的方案四,非常简单和高效地支持中文及拼音的搜索。 10 | 11 | 实现相关介绍:https://www.wangfenjin.com/posts/simple-tokenizer/ 12 | 13 | 在此基础上,我们还支持通过 [cppjieba](https://github.com/yanyiwu/cppjieba) 实现更精准的词组匹配,介绍文章见 https://www.wangfenjin.com/posts/simple-jieba-tokenizer/ 14 | 15 | ## 用法 16 | 17 | ### 代码使用 18 | 19 | 20 | * 下载已经编译好的插件:https://github.com/wangfenjin/simple/releases 参考 examples 目录,目前已经有 c++, python, go 和 node-sqlite3 的例子。 21 | * iOS可以参考: 22 | - [#73](https://github.com/wangfenjin/simple/pull/73) 23 | - [@hxicoder](https://github.com/hxicoder) 提供的 [demo](https://github.com/hxicoder/DBDemo) 24 | - [@pipi32167](https://github.com/pipi32167)提供的[demo](https://github.com/pipi32167/SQLiteSwiftDemo) 25 | * 在 Rust 中使用的例子 https://github.com/wangfenjin/simple/issues/89 https://github.com/fundon/tiny-docs-se 26 | * Java 例子 https://github.com/wangfenjin/sqlite-java-connect 27 | * C# 例子 https://github.com/dudylan/SqliteCheck/ 28 | 29 | ### 命令行使用 30 | 31 | 首先需要确认你用到的 sqlite 版本支持 fts5 拓展,确认方法是: 32 | ```sql 33 | select fts5(?1); 34 | ``` 35 | 然后就可以使用了,具体的例子可以参考 [example.sql](./example.sql) 和 [cpp](https://github.com/wangfenjin/simple/blob/master/examples/cpp/main.cc) 36 | 37 | ``` 38 | $ ./sqlite3 39 | SQLite version 3.32.3 2020-06-18 14:00:33 40 | Enter ".help" for usage hints. 41 | Connected to a transient in-memory database. 42 | Use ".open FILENAME" to reopen on a persistent database. 43 | sqlite> .load libsimple 44 | sqlite> CREATE VIRTUAL TABLE t1 USING fts5(text, tokenize = 'simple'); 45 | sqlite> INSERT INTO t1 VALUES ('中华人民共和国国歌'); 46 | sqlite> select simple_highlight(t1, 0, '[', ']') as text from t1 where text match simple_query('中华国歌'); 47 | [中华]人民共和[国国歌] 48 | sqlite> select simple_highlight(t1, 0, '[', ']') as text from t1 where text match jieba_query('中华国歌'); 49 | [中华]人民共和国[国歌] 50 | sqlite> select simple_highlight(t1, 0, '[', ']') as text from t1 where text match simple_query('中华人民共和国'); 51 | [中华人民共和国国]歌 52 | sqlite> select simple_highlight(t1, 0, '[', ']') as text from t1 where text match jieba_query('中华人民共和国'); 53 | [中华人民共和国]国歌 54 | ``` 55 | 56 | ## 功能 57 | 58 | 1. simple tokenizer 支持中文和拼音的分词,并且可通过开关控制是否需要支持拼音 59 | 2. simple_query() 函数实现自动组装 match query 的功能,用户不用学习 fts5 query 的语法 60 | 3. simple_highlight() 实现连续高亮 match 的词汇,与 sqlite 自带的 highlight 类似,但是 simple_highlight 实现了连续 match 的词汇分到同一组的逻辑,理论上用户更需要这样 61 | 4. simple_highlight_pos() 实现返回 match 的词汇位置,用户可以自行决定怎么使用 62 | 5. simple_snippet() 实现截取 match 片段的功能,与 sqlite 自带的 snippet 功能类似,同样是增强连续 match 的词汇分到同一组的逻辑 63 | 6. jieba_query() 实现jieba分词的效果,在索引不变的情况下,可以实现更精准的匹配。可以通过 `-DSIMPLE_WITH_JIEBA=OFF ` 关掉结巴分词的功能 [#35](https://github.com/wangfenjin/simple/pull/35) 64 | 7. jieba_dict() 指定 dict 的目录,只需要调用一次,需要在调用 jieba_query() 之前指定。 65 | 66 | ## 开发 67 | 68 | ### 编译相关 69 | 70 | 使用支持 c++14 以上的编译器编译,直接在根目录 ./build-and-run 就会编译所有需要的文件并运行测试。编译输出见 output 目录 71 | 72 | 也可以手动 cmake: 73 | ```shell 74 | mkdir build; cd build 75 | cmake .. 76 | make -j 12 77 | make install 78 | ``` 79 | 80 | 支持 iOS 编译: 81 | ``` 82 | ./build-ios.sh 83 | ``` 84 | 85 | ### 代码 86 | - `src/entry` 入口文件,注册 sqlite tokenizer 和函数 87 | - `src/simple_tokenizer` 分词器实现 88 | - `src/simple_highlight` 高亮函数,基于内置的高亮函数改的,让命中的相邻单词连续高亮 89 | - `src/pinyin` 中文转拼音以及拼音拆 query 的实现 90 | 91 | ## TODO 92 | 93 | - [x] 添加 CI/CD 94 | - [x] 添加使用的例子,参见 [cpp](https://github.com/wangfenjin/simple/blob/master/examples/cpp/main.cc) [python3](https://github.com/wangfenjin/simple/blob/master/examples/python3/db_connector.py) 95 | - [x] 部分参数可配,比如拼音文件的路径(已经把文件打包到 so 中) 96 | - [x] 减少依赖,减小 so 的大小 97 | - [x] 给出性能数据:加载扩展时间2ms内;第一次使用拼音功能需要加载拼音文件,大概 500ms;第一次使用结巴分词功能需要加载结巴分词文件,大概 4s。 98 | 99 | ## Star History 100 | 101 | [![Star History Chart](https://api.star-history.com/svg?repos=wangfenjin/simple&type=Date)](https://star-history.com/#wangfenjin/simple&Date) 102 | 103 | -------------------------------------------------------------------------------- /test/tokenizer_test.cc: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "simple_tokenizer.h" 3 | 4 | using namespace simple_tokenizer; 5 | 6 | int printFn(void* pCtx, int flags, const char* index, int len, int start, int end) { 7 | std::cout << "\t" << index << " " << len << " " << start << " " << end << "\n"; 8 | return 0; 9 | } 10 | 11 | TEST(simple, tokenizer_with_pinyin) { 12 | SimpleTokenizer* t = new SimpleTokenizer(nullptr, 0); 13 | std::vector arr; 14 | std::vector query; 15 | arr.push_back("english@\"''"); 16 | query.push_back(R"VAGON(( e+n+g+l+i+s+h* OR eng+li+sh* OR english* ) AND "@" AND """" AND "'" AND "'")VAGON"); 17 | arr.push_back("zhou杰伦"); 18 | query.push_back(R"VAGON(( z+h+o+u* OR zhou* ) AND "杰" AND "伦")VAGON"); 19 | arr.push_back("杰伦 zhou 123"); 20 | query.push_back(R"VAGON("杰" AND "伦" AND ( z+h+o+u* OR zhou* ) AND "123"*)VAGON"); 21 | for (int i = 0; i < arr.size(); i++) { 22 | std::string s = arr[i]; 23 | std::cout << s << " as doc:\n"; 24 | t->tokenize(nullptr, FTS5_TOKENIZE_DOCUMENT, s.c_str(), s.length(), printFn); 25 | std::cout << s << " as query:\n"; 26 | t->tokenize(nullptr, FTS5_TOKENIZE_QUERY, s.c_str(), s.length(), printFn); 27 | std::cout << s << " as aux:\n"; 28 | std::string result = SimpleTokenizer::tokenize_query(s.c_str(), s.length()); 29 | std::cout << result << "\n"; 30 | ASSERT_EQ(result, query[i]); 31 | } 32 | } 33 | 34 | TEST(simple, tokenizer_disable_pinyin) { 35 | const char* p = "0"; 36 | SimpleTokenizer* t = new SimpleTokenizer(&p, 1); 37 | std::vector arr; 38 | std::vector query; 39 | arr.push_back("english@\"''"); 40 | query.push_back(R"VAGON(english* AND "@" AND """" AND "'" AND "'")VAGON"); 41 | arr.push_back("zhou杰伦"); 42 | query.push_back(R"VAGON(zhou* AND "杰" AND "伦")VAGON"); 43 | arr.push_back("杰伦123"); 44 | query.push_back(R"VAGON("杰" AND "伦" AND "123"*)VAGON"); 45 | for (int i = 0; i < arr.size(); i++) { 46 | std::string s = arr[i]; 47 | std::cout << s << " as doc:\n"; 48 | t->tokenize(nullptr, FTS5_TOKENIZE_DOCUMENT, s.c_str(), s.length(), printFn); 49 | std::cout << s << " as query:\n"; 50 | t->tokenize(nullptr, FTS5_TOKENIZE_QUERY, s.c_str(), s.length(), printFn); 51 | std::cout << s << " as aux:\n"; 52 | std::string result = SimpleTokenizer::tokenize_query(s.c_str(), s.length(), 0); 53 | std::cout << result << "\n"; 54 | ASSERT_EQ(result, query[i]); 55 | } 56 | } 57 | 58 | #ifdef USE_JIEBA 59 | TEST(simple, jieba_tokenizer_with_pinyin) { 60 | SimpleTokenizer* t = new SimpleTokenizer(nullptr, 0); 61 | std::vector arr; 62 | std::vector query; 63 | arr.push_back("english@\"''"); 64 | query.push_back(R"VAGON(( e+n+g+l+i+s+h* OR eng+li+sh* OR english* ) AND "@" AND """" AND "'" AND "'")VAGON"); 65 | arr.push_back("zhou杰伦"); 66 | query.push_back(R"VAGON(( z+h+o+u* OR zhou* ) AND "杰伦")VAGON"); 67 | arr.push_back("杰伦 zhou 123"); 68 | query.push_back(R"VAGON("杰伦" AND ( z+h+o+u* OR zhou* ) AND "123"*)VAGON"); 69 | for (int i = 0; i < arr.size(); i++) { 70 | std::string s = arr[i]; 71 | std::cout << s << " as doc:\n"; 72 | t->tokenize(nullptr, FTS5_TOKENIZE_DOCUMENT, s.c_str(), s.length(), printFn); 73 | std::cout << s << " as query:\n"; 74 | t->tokenize(nullptr, FTS5_TOKENIZE_QUERY, s.c_str(), s.length(), printFn); 75 | std::cout << s << " as aux:\n"; 76 | std::string result = SimpleTokenizer::tokenize_jieba_query(s.c_str(), s.length()); 77 | std::cout << result << "\n"; 78 | ASSERT_EQ(result, query[i]); 79 | } 80 | } 81 | 82 | TEST(simple, jieba_tokenizer_disable_pinyin) { 83 | const char* p = "0"; 84 | SimpleTokenizer* t = new SimpleTokenizer(&p, 1); 85 | std::vector arr; 86 | std::vector query; 87 | arr.push_back("english@\"''"); 88 | query.push_back(R"VAGON(english* AND "@" AND """" AND "'" AND "'")VAGON"); 89 | arr.push_back("zhou杰伦"); 90 | query.push_back(R"VAGON(zhou* AND "杰伦")VAGON"); 91 | arr.push_back("杰伦123"); 92 | query.push_back(R"VAGON("杰伦" AND "123"*)VAGON"); 93 | for (int i = 0; i < arr.size(); i++) { 94 | std::string s = arr[i]; 95 | std::cout << s << " as doc:\n"; 96 | t->tokenize(nullptr, FTS5_TOKENIZE_DOCUMENT, s.c_str(), s.length(), printFn); 97 | std::cout << s << " as query:\n"; 98 | t->tokenize(nullptr, FTS5_TOKENIZE_QUERY, s.c_str(), s.length(), printFn); 99 | std::cout << s << " as aux:\n"; 100 | std::string result = SimpleTokenizer::tokenize_jieba_query(s.c_str(), s.length(), 0); 101 | std::cout << result << "\n"; 102 | ASSERT_EQ(result, query[i]); 103 | } 104 | } 105 | #endif 106 | -------------------------------------------------------------------------------- /src/entry.cc: -------------------------------------------------------------------------------- 1 | #include "simple_highlight.h" 2 | #include "simple_tokenizer.h" 3 | SQLITE_EXTENSION_INIT1 4 | 5 | #include 6 | #include 7 | 8 | int fts5_simple_xCreate(void *sqlite3, const char **azArg, int nArg, Fts5Tokenizer **ppOut) { 9 | (void)sqlite3; 10 | auto *p = new simple_tokenizer::SimpleTokenizer(azArg, nArg); 11 | *ppOut = reinterpret_cast(p); 12 | return SQLITE_OK; 13 | } 14 | 15 | int fts5_simple_xTokenize(Fts5Tokenizer *tokenizer_ptr, void *pCtx, int flags, const char *pText, int nText, 16 | xTokenFn xToken) { 17 | auto *p = (simple_tokenizer::SimpleTokenizer *)tokenizer_ptr; 18 | return p->tokenize(pCtx, flags, pText, nText, xToken); 19 | } 20 | 21 | void fts5_simple_xDelete(Fts5Tokenizer *p) { 22 | auto *pST = (simple_tokenizer::SimpleTokenizer *)p; 23 | delete (pST); 24 | } 25 | 26 | /* 27 | ** Return a pointer to the fts5_api pointer for database connection db. 28 | ** If an error occurs, return NULL and leave an error in the database 29 | ** handle (accessible using sqlite3_errcode()/errmsg()). 30 | */ 31 | static int fts5_api_from_db(sqlite3 *db, fts5_api **ppApi) { 32 | sqlite3_stmt *pStmt = 0; 33 | int rc; 34 | 35 | *ppApi = 0; 36 | rc = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0); 37 | if (rc == SQLITE_OK) { 38 | sqlite3_bind_pointer(pStmt, 1, reinterpret_cast(ppApi), "fts5_api_ptr", 0); 39 | (void)sqlite3_step(pStmt); 40 | rc = sqlite3_finalize(pStmt); 41 | } 42 | 43 | return rc; 44 | } 45 | 46 | #ifdef USE_JIEBA 47 | static void jieba_dict(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal) { 48 | if (nVal >= 1) { 49 | const char *text = (const char *)sqlite3_value_text(apVal[0]); 50 | if (text) { 51 | std::string tmp(text); 52 | char sep = '/'; 53 | #ifdef _WIN32 54 | sep = '\\'; 55 | #endif 56 | if (tmp.back() != sep) { // Need to add a 57 | tmp += sep; // path separator 58 | } 59 | simple_tokenizer::jieba_dict_path = tmp; 60 | sqlite3_result_text(pCtx, tmp.c_str(), -1, SQLITE_TRANSIENT); 61 | return; 62 | } 63 | } 64 | sqlite3_result_null(pCtx); 65 | } 66 | 67 | static void jieba_query(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal) { 68 | if (nVal >= 1) { 69 | const char *text = (const char *)sqlite3_value_text(apVal[0]); 70 | if (text) { 71 | int flags = 1; 72 | if (nVal >= 2) { 73 | flags = atoi((const char *)sqlite3_value_text(apVal[1])); 74 | } 75 | std::string result = simple_tokenizer::SimpleTokenizer::tokenize_jieba_query(text, (int)std::strlen(text), flags); 76 | sqlite3_result_text(pCtx, result.c_str(), -1, SQLITE_TRANSIENT); 77 | return; 78 | } 79 | } 80 | sqlite3_result_null(pCtx); 81 | } 82 | #endif 83 | 84 | static void simple_query(sqlite3_context *pCtx, int nVal, sqlite3_value **apVal) { 85 | if (nVal >= 1) { 86 | const char *text = (const char *)sqlite3_value_text(apVal[0]); 87 | if (text) { 88 | int flags = 1; 89 | if (nVal >= 2) { 90 | flags = atoi((const char *)sqlite3_value_text(apVal[1])); 91 | } 92 | std::string result = simple_tokenizer::SimpleTokenizer::tokenize_query(text, (int)std::strlen(text), flags); 93 | sqlite3_result_text(pCtx, result.c_str(), -1, SQLITE_TRANSIENT); 94 | return; 95 | } 96 | } 97 | sqlite3_result_null(pCtx); 98 | } 99 | 100 | int sqlite3_simple_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi) { 101 | (void)pzErrMsg; 102 | int rc = SQLITE_OK; 103 | SQLITE_EXTENSION_INIT2(pApi) 104 | 105 | rc = sqlite3_create_function(db, "simple_query", -1, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, &simple_query, NULL, 106 | NULL); 107 | #ifdef USE_JIEBA 108 | rc = sqlite3_create_function(db, "jieba_query", -1, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, &jieba_query, NULL, 109 | NULL); 110 | rc = sqlite3_create_function(db, "jieba_dict", 1, SQLITE_UTF8 | SQLITE_DETERMINISTIC, NULL, &jieba_dict, NULL, NULL); 111 | #endif 112 | 113 | // fts5_tokenizer tokenizer = {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }; 114 | fts5_tokenizer tokenizer = {fts5_simple_xCreate, fts5_simple_xDelete, fts5_simple_xTokenize}; 115 | fts5_api *fts5api; 116 | rc = fts5_api_from_db(db, &fts5api); 117 | if (rc != SQLITE_OK) return rc; 118 | if (fts5api == 0 || fts5api->iVersion < 2) { 119 | return SQLITE_ERROR; 120 | } 121 | rc = fts5api->xCreateTokenizer(fts5api, "simple", reinterpret_cast(fts5api), &tokenizer, NULL); 122 | rc = 123 | fts5api->xCreateFunction(fts5api, "simple_highlight", reinterpret_cast(fts5api), &simple_highlight, NULL); 124 | rc = fts5api->xCreateFunction(fts5api, "simple_highlight_pos", reinterpret_cast(fts5api), 125 | &simple_highlight_pos, NULL); 126 | rc = fts5api->xCreateFunction(fts5api, "simple_snippet", reinterpret_cast(fts5api), &simple_snippet, NULL); 127 | return rc; 128 | } 129 | -------------------------------------------------------------------------------- /src/pinyin.cc: -------------------------------------------------------------------------------- 1 | #include "pinyin.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | CMRC_DECLARE(pinyin_text); 12 | 13 | namespace simple_tokenizer { 14 | 15 | PinYin::PinYin() { pinyin = build_pinyin_map(); } 16 | 17 | std::set PinYin::to_plain(const std::string &input) { 18 | std::set s; 19 | std::string value; 20 | for (size_t i = 0, len = 0; i != input.length(); i += len) { 21 | auto byte = input[i]; 22 | if (byte == ',') { 23 | s.insert(value); 24 | s.insert(value.substr(0, 1)); 25 | value.clear(); 26 | len = 1; 27 | continue; 28 | } 29 | len = get_str_len((unsigned char)byte); 30 | if (len == 1) { 31 | // Skip invisible byte 32 | // Fix the issue in Windows https://github.com/wangfenjin/simple/pull/143 33 | if (std::isspace(byte) || std::iscntrl(byte)) { 34 | continue; 35 | } 36 | value.push_back(byte); 37 | continue; 38 | } 39 | auto it = tone_to_plain.find(input.substr(i, len)); 40 | if (it != tone_to_plain.end()) { 41 | value.push_back(it->second); 42 | } else { 43 | value.push_back(byte); 44 | } 45 | } 46 | s.insert(value); 47 | s.insert(value.substr(0, 1)); 48 | return s; 49 | } 50 | 51 | // clang-format off 52 | std::map > PinYin::build_pinyin_map() { 53 | std::map > map; 54 | // clang-format on 55 | auto fs = cmrc::pinyin_text::get_filesystem(); 56 | auto pinyin_data = fs.open("contrib/pinyin.txt"); 57 | std::istringstream pinyin_file(std::string(pinyin_data.begin(), pinyin_data.end())); 58 | std::string line; 59 | char delimiter = ' '; 60 | std::string cp, py; 61 | while (std::getline(pinyin_file, line)) { 62 | if (line.length() == 0 || line[0] == '#') continue; 63 | std::stringstream tokenStream(line); 64 | std::getline(tokenStream, cp, delimiter); 65 | std::getline(tokenStream, py, delimiter); 66 | int codepoint = static_cast(std::stoul(cp.substr(2, cp.length() - 3), 0, 16l)); 67 | std::set s = to_plain(py); 68 | std::vector m(s.size()); 69 | std::copy(s.begin(), s.end(), m.begin()); 70 | map[codepoint] = m; 71 | } 72 | return map; 73 | } 74 | 75 | // Get UTF8 character encoding length(via first byte) 76 | int PinYin::get_str_len(unsigned char byte) { 77 | if (byte >= 0xF0) 78 | return 4; 79 | else if (byte >= 0xE0) 80 | return 3; 81 | else if (byte >= 0xC0) 82 | return 2; 83 | return 1; 84 | } 85 | 86 | // get the first valid utf8 string's code point 87 | int PinYin::codepoint(const std::string &u) { 88 | size_t l = u.length(); 89 | if (l < 1) return -1; 90 | size_t len = get_str_len((unsigned char)u[0]); 91 | if (l < len) return -1; 92 | switch (len) { 93 | case 1: 94 | return (unsigned char)u[0]; 95 | case 2: 96 | return ((unsigned char)u[0] - 192) * 64 + ((unsigned char)u[1] - 128); 97 | case 3: // most Chinese char in here 98 | return ((unsigned char)u[0] - 224) * 4096 + ((unsigned char)u[1] - 128) * 64 + ((unsigned char)u[2] - 128); 99 | case 4: 100 | return ((unsigned char)u[0] - 240) * 262144 + ((unsigned char)u[1] - 128) * 4096 + 101 | ((unsigned char)u[2] - 128) * 64 + ((unsigned char)u[3] - 128); 102 | default: 103 | throw std::runtime_error("should never happen"); 104 | } 105 | } 106 | 107 | const std::vector &PinYin::get_pinyin(const std::string &chinese) { return pinyin[codepoint(chinese)]; } 108 | 109 | std::vector PinYin::_split_pinyin(const std::string &input, int begin, int end) { 110 | if (begin >= end) { 111 | return empty_vector; 112 | } 113 | if (begin == end - 1) { 114 | return {input.substr(begin, end - begin)}; 115 | } 116 | std::vector result; 117 | std::string full = input.substr(begin, end - begin); 118 | if (pinyin_prefix.find(full) != pinyin_prefix.end() || pinyin_valid.find(full) != pinyin_valid.end()) { 119 | result.push_back(full); 120 | } 121 | int start = begin + 1; 122 | while (start < end) { 123 | std::string first = input.substr(begin, start - begin); 124 | if (pinyin_valid.find(first) == pinyin_valid.end()) { 125 | ++start; 126 | continue; 127 | } 128 | std::vector tmp = _split_pinyin(input, start, end); 129 | for (const auto &s : tmp) { 130 | result.push_back(first + "+" + s); 131 | } 132 | ++start; 133 | } 134 | return result; 135 | } 136 | 137 | std::set PinYin::split_pinyin(const std::string &input) { 138 | int slen = (int)input.size(); 139 | const int max_length = 20; 140 | if (slen > max_length || slen <= 1) { 141 | return {input}; 142 | } 143 | 144 | std::string spacedInput; 145 | for (auto c : input) { 146 | spacedInput.push_back('+'); 147 | spacedInput.push_back(c); 148 | } 149 | spacedInput = spacedInput.substr(1, spacedInput.size()); 150 | 151 | if (slen > 2) { 152 | std::vector tmp = _split_pinyin(input, 0, slen); 153 | std::set s(tmp.begin(), tmp.end()); 154 | s.insert(spacedInput); 155 | s.insert(input); 156 | return s; 157 | } 158 | return {input, spacedInput}; 159 | } 160 | 161 | } // namespace simple_tokenizer 162 | -------------------------------------------------------------------------------- /src/simple_tokenizer.cc: -------------------------------------------------------------------------------- 1 | #include "simple_tokenizer.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace simple_tokenizer { 13 | SimpleTokenizer::SimpleTokenizer(const char **azArg, int nArg) { 14 | if (nArg >= 1) { 15 | enable_pinyin = atoi(azArg[0]) != 0; 16 | } 17 | } 18 | 19 | PinYin *SimpleTokenizer::get_pinyin() { 20 | static auto *py = new PinYin(); 21 | return py; 22 | } 23 | 24 | static TokenCategory from_char(char c) { 25 | if (std::isdigit(c)) { 26 | return TokenCategory::DIGIT; 27 | } 28 | if (std::isspace(c) || std::iscntrl(c)) { 29 | return TokenCategory::SPACE; 30 | } 31 | if (std::isalpha(c)) { 32 | return TokenCategory::ASCII_ALPHABETIC; 33 | } 34 | return TokenCategory::OTHER; 35 | } 36 | 37 | std::string SimpleTokenizer::tokenize_query(const char *text, int textLen, int flags) { 38 | int start = 0; 39 | int index = 0; 40 | std::string tmp; 41 | std::string result; 42 | while (index < textLen) { 43 | TokenCategory category = from_char(text[index]); 44 | switch (category) { 45 | case TokenCategory::OTHER: 46 | index += PinYin::get_str_len(text[index]); 47 | break; 48 | default: 49 | while (++index < textLen && from_char(text[index]) == category) { 50 | } 51 | break; 52 | } 53 | tmp.clear(); 54 | std::copy(text + start, text + index, std::back_inserter(tmp)); 55 | append_result(result, tmp, category, start, flags); 56 | start = index; 57 | } 58 | return result; 59 | } 60 | 61 | #ifdef USE_JIEBA 62 | std::string jieba_dict_path = "./dict/"; 63 | std::string SimpleTokenizer::tokenize_jieba_query(const char *text, int textLen, int flags) { 64 | (void)textLen; 65 | static cppjieba::Jieba jieba(jieba_dict_path + "jieba.dict.utf8", jieba_dict_path + "hmm_model.utf8", 66 | jieba_dict_path + "user.dict.utf8", jieba_dict_path + "idf.utf8", 67 | jieba_dict_path + "stop_words.utf8"); 68 | std::string tmp; 69 | std::string result; 70 | std::vector words; 71 | jieba.Cut(text, words); 72 | for (auto word : words) { 73 | TokenCategory category = from_char(text[word.offset]); 74 | append_result(result, word.word, category, word.offset, flags); 75 | } 76 | return result; 77 | } 78 | #endif 79 | 80 | void SimpleTokenizer::append_result(std::string &result, std::string part, TokenCategory category, int offset, 81 | int flags) { 82 | if (category != TokenCategory::SPACE) { 83 | std::string tmp = std::move(part); 84 | if (category == TokenCategory::ASCII_ALPHABETIC) { 85 | std::transform(tmp.begin(), tmp.end(), tmp.begin(), [](unsigned char c) { return std::tolower(c); }); 86 | } 87 | 88 | if (flags != 0 && category == TokenCategory::ASCII_ALPHABETIC && tmp.size() > 1) { 89 | if (offset == 0) { 90 | result.append("( "); 91 | } else { 92 | result.append(" AND ( "); 93 | } 94 | std::set pys = SimpleTokenizer::get_pinyin()->split_pinyin(tmp); 95 | bool addOr = false; 96 | for (const std::string &s : pys) { 97 | if (addOr) { 98 | result.append(" OR "); 99 | } 100 | result.append(s); 101 | result.append("*"); 102 | addOr = true; 103 | } 104 | result.append(" )"); 105 | } else { 106 | if (offset > 0) { 107 | result.append(" AND "); 108 | } 109 | if (tmp == "\"") { 110 | tmp += tmp; 111 | } 112 | if (category != TokenCategory::ASCII_ALPHABETIC) { 113 | result.append('"' + tmp + '"'); 114 | } else { 115 | result.append(tmp); 116 | } 117 | if (category != TokenCategory::OTHER) { 118 | result.append("*"); 119 | } 120 | } 121 | } 122 | } 123 | 124 | // https://cloud.tencent.com/developer/article/1198371 125 | int SimpleTokenizer::tokenize(void *pCtx, int flags, const char *text, int textLen, xTokenFn xToken) const { 126 | int rc = SQLITE_OK; 127 | int start = 0; 128 | int index = 0; 129 | std::string result; 130 | while (index < textLen) { 131 | TokenCategory category = from_char(text[index]); 132 | switch (category) { 133 | case TokenCategory::OTHER: 134 | index += PinYin::get_str_len(text[index]); 135 | break; 136 | default: 137 | while (++index < textLen && from_char(text[index]) == category) { 138 | } 139 | break; 140 | } 141 | if (category != TokenCategory::SPACE) { 142 | result.clear(); 143 | std::copy(text + start, text + index, std::back_inserter(result)); 144 | if (category == TokenCategory::ASCII_ALPHABETIC) { 145 | std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::tolower(c); }); 146 | } 147 | 148 | rc = xToken(pCtx, 0, result.c_str(), (int)result.length(), start, index); 149 | if (enable_pinyin && category == TokenCategory::OTHER && (flags & FTS5_TOKENIZE_DOCUMENT)) { 150 | const std::vector &pys = SimpleTokenizer::get_pinyin()->get_pinyin(result); 151 | for (const std::string &s : pys) { 152 | rc = xToken(pCtx, FTS5_TOKEN_COLOCATED, s.c_str(), (int)s.length(), start, index); 153 | } 154 | } 155 | } 156 | start = index; 157 | } 158 | return rc; 159 | } 160 | } // namespace simple_tokenizer 161 | -------------------------------------------------------------------------------- /examples/cpp/main.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef _WIN32 8 | #include 9 | #define GetCurrentDir _getcwd 10 | #else 11 | #include 12 | #define GetCurrentDir getcwd 13 | #endif 14 | 15 | using namespace std; 16 | using Clock = std::chrono::system_clock; 17 | using ms = std::chrono::duration; 18 | 19 | // https://www.tutorialspoint.com/find-out-the-current-working-directory-in-c-cplusplus 20 | string get_current_dir() { 21 | char buff[FILENAME_MAX]; // create string buffer to hold path 22 | GetCurrentDir(buff, FILENAME_MAX); 23 | string current_working_dir(buff); 24 | return current_working_dir; 25 | } 26 | 27 | // Create a callback function 28 | int callback(void *NotUsed, int argc, char **argv, char **azColName) { 29 | // int argc: holds the number of results 30 | // (array) azColName: holds each column returned 31 | // (array) argv: holds each value 32 | for (int i = 0; i < argc; i++) { 33 | // Show column name, value, and newline 34 | cout << azColName[i] << ": " << argv[i] << endl; 35 | } 36 | if (argc > 0) { 37 | cout << endl; 38 | } 39 | // Return successful 40 | return 0; 41 | } 42 | 43 | void handle_rc(sqlite3 *db, int rc) { 44 | if (rc != SQLITE_OK) { 45 | cout << "sqlite3 rc: " << rc << ", error: " << sqlite3_errmsg(db) << endl; 46 | exit(rc); 47 | } 48 | } 49 | 50 | int main() { 51 | // Pointer to SQLite connection 52 | sqlite3 *db; 53 | // Save any error messages 54 | char *zErrMsg = 0; 55 | 56 | // Save the connection result 57 | int rc = sqlite3_open(":memory:", &db); 58 | handle_rc(db, rc); 59 | 60 | auto before = Clock::now(); 61 | // load simple 62 | rc = sqlite3_enable_load_extension(db, 1); 63 | handle_rc(db, rc); 64 | rc = sqlite3_load_extension(db, "libsimple", NULL, NULL); 65 | handle_rc(db, rc); 66 | ms load_extension = Clock::now() - before; 67 | std::cout << "It took " << load_extension.count() << "ms to load extension" << std::endl; 68 | 69 | // warm-up 70 | before = Clock::now(); 71 | string sql = "select simple_query('pinyin')"; 72 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 73 | handle_rc(db, rc); 74 | ms pinyin = Clock::now() - before; 75 | std::cout << "It took " << pinyin.count() << "ms to init pinyin" << std::endl; 76 | before = Clock::now(); 77 | sql = "select jieba_query('结巴')"; 78 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 79 | handle_rc(db, rc); 80 | ms warm_up = Clock::now() - before; 81 | std::cout << "It took " << warm_up.count() << "ms to init jieba" << std::endl; 82 | 83 | before = Clock::now(); 84 | // create fts table 85 | sql = "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'simple')"; 86 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 87 | handle_rc(db, rc); 88 | 89 | // insert some data 90 | sql = R"V0G0N( 91 | insert into t1(x) values ('周杰伦 Jay Chou:最美的不是下雨天,是曾与你躲过雨的屋檐'), 92 | ('I love China! 我爱中国!'), 93 | ('@English &special _characters."''bacon-&and''-eggs%') 94 | )V0G0N"; 95 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 96 | handle_rc(db, rc); 97 | 98 | // case 1: match pinyin 99 | sql = "select simple_highlight(t1, 0, '[', ']') as matched_pinyin from t1 where x match simple_query('zhoujiel')"; 100 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 101 | handle_rc(db, rc); 102 | // case 2: match special chars 103 | sql = 104 | "select simple_highlight(t1, 0, '[', ']') as matched_no_single_quote_special_chars from t1 where x match " 105 | "simple_query('@\"._-&%')"; 106 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 107 | handle_rc(db, rc); 108 | // case 3: single quote, will match! 109 | sql = 110 | "select simple_highlight(t1, 0, '[', ']') as matched_simple_query_special_chars from t1 where x match " 111 | "simple_query('@\"._''-&%')"; 112 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 113 | handle_rc(db, rc); 114 | #ifdef USE_JIEBA 115 | // set dict path manually 116 | string dict_path = get_current_dir() + "/dict"; 117 | sql = "select jieba_dict('" + dict_path + "')"; 118 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 119 | handle_rc(db, rc); 120 | // case 4: jieba, no match 121 | sql = "select simple_highlight(t1, 0, '[', ']') as no_matched_jieba from t1 where x match jieba_query('国中')"; 122 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 123 | handle_rc(db, rc); 124 | // case 5: jieba, match 125 | sql = "select simple_highlight(t1, 0, '[', ']') as matched_jieba from t1 where x match jieba_query('中国')"; 126 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 127 | handle_rc(db, rc); 128 | #endif 129 | // case 6: use highlight_pos 130 | sql = 131 | "select simple_highlight_pos(t1, 0) as matched_simple_query_special_chars from t1 where x match " 132 | "simple_query('shi')"; 133 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 134 | handle_rc(db, rc); 135 | sql = 136 | "select simple_highlight_pos(t1, 0) as matched_simple_query_special_chars from t1 where x match " 137 | "simple_query('special')"; 138 | rc = sqlite3_exec(db, sql.c_str(), callback, 0, &zErrMsg); 139 | handle_rc(db, rc); 140 | 141 | ms last_query = Clock::now() - before; 142 | std::cout << "It took " << last_query.count() << "ms for all query" << std::endl; 143 | 144 | // Close the connection 145 | sqlite3_close(db); 146 | 147 | return (0); 148 | } 149 | -------------------------------------------------------------------------------- /contrib/sqlite3/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(SQLite3 3 | VERSION 3.31.1 4 | LANGUAGES C 5 | ) 6 | 7 | include(GNUInstallDirs) 8 | 9 | #------------------------------------------------------------------------------ 10 | # build options and optional modules: 11 | option(SQLITE_ENABLE_DBSTAT_VTAB "enables dbstat virtual table" OFF) 12 | option(SQLITE_ENABLE_FTS3 "enables full text searches version 3" OFF) 13 | option(SQLITE_ENABLE_FTS4 "enables full text searches version 3 & 4" OFF) 14 | option(SQLITE_ENABLE_FTS5 "enables full text searches version 5" OFF) 15 | option(SQLITE_ENABLE_GEOPOLY "enables Geopoly extention" OFF) 16 | option(SQLITE_ENABLE_ICU "enables international components for unicode" OFF) 17 | option(SQLITE_ENABLE_JSON1 "enables JSON SQL functins" ON) 18 | option(SQLITE_ENABLE_RBU "enables resumable bulk update extension" OFF) 19 | option(SQLITE_ENABLE_RTREE "enables R*TRee index extension" OFF) 20 | option(SQLITE_RECOMMENDED_OPTIONS "compile by SQLite3 recommended options" ON) 21 | 22 | if(NOT CMAKE_BUILD_TYPE) 23 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Release or Debug?" FORCE) 24 | endif() 25 | 26 | #------------------------------------------------------------------------------ 27 | 28 | # SQLite3 as static library: 29 | add_library(${PROJECT_NAME} STATIC sqlite3.c) 30 | set_target_properties(${PROJECT_NAME} PROPERTIES 31 | OUTPUT_NAME sqlite3 32 | PUBLIC_HEADER "sqlite3.h;sqlite3ext.h" 33 | DEBUG_POSTFIX d 34 | ) 35 | target_include_directories(${PROJECT_NAME} PUBLIC $) 36 | target_compile_definitions(${PROJECT_NAME} PUBLIC # inject user's options 37 | $:SQLITE_ENABLE_DBSTAT_VTAB> 39 | $<$:SQLITE_ENABLE_FTS3> 40 | $<$:SQLITE_ENABLE_FTS4> 41 | $<$:SQLITE_ENABLE_FTS5> 42 | $<$:SQLITE_ENABLE_GEOPOLY> 43 | $<$:SQLITE_ENABLE_ICU> 44 | $<$:SQLITE_ENABLE_JSON1> 45 | $<$:SQLITE_ENABLE_RBU> 46 | $<$:SQLITE_ENABLE_RTREE> 47 | $<$: 48 | SQLITE_DQS=0 49 | SQLITE_DEFAULT_MEMSTATUS=0 50 | SQLITE_DEFAULT_WAL_SYNCHRONOUS=1 51 | SQLITE_LIKE_DOESNT_MATCH_BLOBS 52 | SQLITE_MAX_EXPR_DEPTH=0 53 | SQLITE_OMIT_DECLTYPE 54 | SQLITE_OMIT_DEPRECATED 55 | SQLITE_OMIT_PROGRESS_CALLBACK 56 | SQLITE_OMIT_SHARED_CACHE 57 | SQLITE_USE_ALLOCA 58 | > 59 | > 60 | ) 61 | 62 | # platform/compiler specific settings 63 | if(CMAKE_SYSTEM_NAME MATCHES Linux) 64 | find_package(Threads REQUIRED) 65 | target_link_libraries(${PROJECT_NAME} INTERFACE Threads::Threads ${CMAKE_DL_LIBS}) 66 | elseif(WIN32 AND ${CMAKE_SIZEOF_VOID_P} LESS 8) # this is a 32bit windows 67 | option(BUILD_WITH_XPSDK "build for old 32bit (WinXP/2003) targets" OFF) 68 | if(BUILD_WITH_XPSDK) 69 | target_compile_definitions(${PROJECT_NAME} PUBLIC 70 | $ 73 | ) 74 | endif() 75 | endif() 76 | 77 | #------------------------------------------------------------------------------ 78 | configure_file(sqlite3_config.h.in ${CMAKE_BINARY_DIR}/sqlite3_config.h) 79 | 80 | install(TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}Config 81 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 82 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 83 | PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 84 | ) 85 | install(EXPORT ${PROJECT_NAME}Config 86 | NAMESPACE SQLite:: 87 | DESTINATION cmake 88 | ) 89 | install(FILES 90 | ${CMAKE_BINARY_DIR}/sqlite3_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 91 | ) 92 | 93 | #------------------------------------------------------------------------------ 94 | # SQLite3 shell application: 95 | option(BUILD_SHELL "build SQLite3 shell application" OFF) 96 | if(BUILD_SHELL) 97 | add_executable(shell_app shell.c) 98 | set_target_properties(shell_app PROPERTIES OUTPUT_NAME sqlite3) 99 | 100 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}) 101 | find_package(Readline) 102 | if(READLINE_FOUND) 103 | set(CMAKE_C_FLAGS "-DHAVE_READLINE") 104 | target_link_libraries(shell_app PRIVATE ${PROJECT_NAME} ${Readline_LIBRARY}) 105 | endif() 106 | 107 | if(UNIX) 108 | target_link_libraries(shell_app PRIVATE ${PROJECT_NAME} m) 109 | elseif(MSVC) 110 | target_link_libraries(shell_app PRIVATE ${PROJECT_NAME}) 111 | option(BUILD_SHELL_STATIC "build shell by static c/c++ runtime" ON) 112 | foreach(flag CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_DEBUG) 113 | if(BUILD_SHELL_STATIC) 114 | string(REGEX REPLACE "/MD" "/MT" ${flag} "${${flag}}") 115 | else() 116 | string(REGEX REPLACE "/MT" "/MD" ${flag} "${${flag}}") 117 | endif() 118 | set(${flag} "${${flag}}" CACHE STRING "msvc flags" FORCE) 119 | endforeach() 120 | else() 121 | target_link_libraries(shell_app PRIVATE ${PROJECT_NAME} m) 122 | endif() 123 | install(TARGETS shell_app 124 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 125 | ) 126 | endif() 127 | -------------------------------------------------------------------------------- /example.sql: -------------------------------------------------------------------------------- 1 | -- Examples to use simple tokenizer 2 | 3 | -- load so file 4 | .load libsimple 5 | 6 | select '启用拼音分词:'; 7 | -- set tokenize to simple 8 | CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = 'simple'); 9 | 10 | -- add some values into the table 11 | insert into t1(x) values ('周杰伦 Jay Chou:最美的不是下雨天,是曾与你躲过雨的屋檐'), 12 | ('I love China! 我爱中国!'), 13 | ('@English &special _characters."''bacon-&and''-eggs%'); 14 | select '所有数据:'; 15 | select ' ', * from t1; 16 | 17 | select '特殊字符:'; 18 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match simple_query('@"''._-&%'); 19 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match '"''"'; 20 | 21 | select '搜索 杰伦:'; 22 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match simple_query('杰伦'); 23 | select ' ', simple_snippet(t1, 0, '[', ']', '...', 1) from t1 where x match simple_query('杰伦'); 24 | select ' ', simple_snippet(t1, 0, '[', ']', '...', 2) from t1 where x match simple_query('杰伦'); 25 | select ' ', simple_snippet(t1, 0, '[', ']', '...', 3) from t1 where x match simple_query('杰伦'); 26 | select ' ', simple_snippet(t1, 0, '[', ']', '...', 4) from t1 where x match simple_query('杰伦'); 27 | select ' ', simple_snippet(t1, 0, '[', ']', '...', 5) from t1 where x match simple_query('杰伦'); 28 | select ' ', simple_snippet(t1, 0, '[', ']', '...', 10) from t1 where x match simple_query('杰伦'); 29 | select ' ', simple_snippet(t1, 0, '[', ']', '...', 20) from t1 where x match simple_query('杰伦'); 30 | select ' ', simple_snippet(t1, 0, '[', ']', '...', 100) from t1 where x match simple_query('杰伦'); 31 | select ' ', simple_highlight_pos(t1, 0) from t1 where x match simple_query('杰伦'); 32 | 33 | select '搜索 雨天:'; 34 | select ' ', simple_snippet(t1, 0, '[', ']', '...', 10) from t1 where x match simple_query('雨天'); 35 | 36 | select '搜索 zhoujiel:'; 37 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match simple_query('zhoujiel'); 38 | select ' ', simple_highlight_pos(t1, 0) from t1 where x match simple_query('zhoujiel'); 39 | 40 | select '搜索 zhoujie:'; 41 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match simple_query('zhoujie'); 42 | -- will not match 43 | select ' !!!!! should not match', simple_highlight_pos(t1, 0) from t1 where x match simple_query('jiezhou'); 44 | 45 | 46 | select '搜索 zjl:'; 47 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match simple_query('zjl'); 48 | select ' ', simple_highlight_pos(t1, 0) from t1 where x match simple_query('zjl'); 49 | 50 | select '搜索 ZHOUJi:'; 51 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match simple_query('ZHOUJi'); 52 | select ' ', simple_highlight_pos(t1, 0) from t1 where x match simple_query('ZHOUJi'); 53 | 54 | select '搜索 love zg:'; 55 | select ' ', simple_highlight(t1, 0, '[', ']') from t1 where x match simple_query('love zg'); 56 | select ' ', simple_highlight_pos(t1, 0) from t1 where x match simple_query('love zg'); 57 | 58 | select ''; 59 | select ''; 60 | select '--------------------------------------------------------------------------------'; 61 | select '禁用拼音分词:'; 62 | -- set tokenize to simple, 0 means disable pinyin 63 | CREATE VIRTUAL TABLE t2 USING fts5(x, tokenize = 'simple 0'); 64 | 65 | -- add some values into the table 66 | insert into t2(x) values ('周杰伦 Jay Chou:最美的不是下雨天,是曾与你躲过雨的屋檐'), 67 | ('I love China! 我爱中国!') ; 68 | select '所有数据:'; 69 | select ' ', * from t2; 70 | 71 | select '搜索 杰伦:'; 72 | -- in simple_query, we accept a second params, '0' means disable pinyin split 73 | select ' ', simple_highlight(t2, 0, '[', ']') from t2 where x match simple_query('杰伦', '0'); 74 | select ' ', simple_snippet(t2, 0, '[', ']', '...', 1) from t2 where x match simple_query('杰伦', '0'); 75 | select ' ', simple_snippet(t2, 0, '[', ']', '...', 2) from t2 where x match simple_query('杰伦', '0'); 76 | select ' ', simple_snippet(t2, 0, '[', ']', '...', 3) from t2 where x match simple_query('杰伦', '0'); 77 | select ' ', simple_snippet(t2, 0, '[', ']', '...', 4) from t2 where x match simple_query('杰伦', '0'); 78 | select ' ', simple_snippet(t2, 0, '[', ']', '...', 5) from t2 where x match simple_query('杰伦', '0'); 79 | select ' ', simple_snippet(t2, 0, '[', ']', '...', 10) from t2 where x match simple_query('杰伦', '0'); 80 | select ' ', simple_snippet(t2, 0, '[', ']', '...', 20) from t2 where x match simple_query('杰伦', '0'); 81 | select ' ', simple_snippet(t2, 0, '[', ']', '...', 100) from t2 where x match simple_query('杰伦', '0'); 82 | select ' ', simple_highlight_pos(t2, 0) from t2 where x match simple_query('杰伦', '0'); 83 | 84 | select '搜索 雨天:'; 85 | select ' ', simple_snippet(t2, 0, '[', ']', '...', 10) from t2 where x match simple_query('雨天', '0'); 86 | 87 | select '搜索 zhoujiel:'; 88 | select ' ', simple_highlight(t2, 0, '[', ']') from t2 where x match simple_query('zhoujiel', '0'); 89 | select ' ', simple_highlight_pos(t2, 0) from t2 where x match simple_query('zhoujiel', '0'); 90 | select ' !!!!! should not match', simple_highlight_pos(t1, 0) from t1 where x match simple_query('jiezhou', '0'); 91 | 92 | select '搜索 zjl:'; 93 | select ' ', simple_highlight(t2, 0, '[', ']') from t2 where x match simple_query('zjl', '0'); 94 | select ' ', simple_highlight_pos(t2, 0) from t2 where x match simple_query('zjl', '0'); 95 | 96 | select '搜索 ZHOUJi:'; 97 | select ' ', simple_highlight(t2, 0, '[', ']') from t2 where x match simple_query('ZHOUJi', '0'); 98 | select ' ', simple_highlight_pos(t2, 0) from t2 where x match simple_query('ZHOUJi', '0'); 99 | 100 | select '搜索 love zg:'; 101 | select ' ', simple_highlight(t2, 0, '[', ']') from t2 where x match simple_query('love zg', '0'); 102 | select ' ', simple_highlight_pos(t2, 0) from t2 where x match simple_query('love zg', '0'); 103 | -------------------------------------------------------------------------------- /src/pinyin.h: -------------------------------------------------------------------------------- 1 | #ifndef PINYIN_H_ 2 | #define PINYIN_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace simple_tokenizer { 10 | 11 | class PinYin { 12 | private: 13 | std::map > pinyin; 14 | const std::vector empty_vector; 15 | 16 | // clang-format off 17 | const std::map tone_to_plain = { 18 | {"ā", 'a'}, {"á", 'a'}, {"ǎ", 'a'}, {"à", 'a'}, 19 | {"ē", 'e'}, {"é", 'e'}, {"ě", 'e'}, {"è", 'e'}, 20 | {"ế", 'e'}, {"ề", 'e'}, {"ê", 'e'}, 21 | {"ō", 'o'}, {"ó", 'o'}, {"ǒ", 'o'}, {"ò", 'o'}, 22 | {"ī", 'i'}, {"í", 'i'}, {"ǐ", 'i'}, {"ì", 'i'}, 23 | {"ū", 'u'}, {"ú", 'u'}, {"ǔ", 'u'}, {"ù", 'u'}, 24 | {"ǘ", 'u'}, {"ǚ", 'u'}, {"ǜ", 'u'}, {"ü", 'u'}, 25 | {"ń", 'n'}, {"ň", 'n'}, {"ǹ", 'n'}, 26 | {"ḿ", 'm'}, 27 | }; 28 | // 不是合法拼音,但是是前缀,只能出现在结尾。 29 | const std::set pinyin_prefix = { 30 | "be","bia", 31 | "ch","cho","chon","chua","co","con","cua", 32 | "din","don","do","dua", 33 | "fe", 34 | "go","gon", 35 | "ho","hon", 36 | "len","lon","lua", 37 | "mia", 38 | "nia","no","non","nua", 39 | "pe","pia", 40 | "qio","qion","qua", 41 | "ra","ro","ron","rua", 42 | "sh","sho","so","son","sua", 43 | "ten","tia","tin","to","ton","tua", 44 | "we", 45 | "xio","xion","xua", 46 | "yon","yua", 47 | "zh","zho","zhon","zo","zon","zua", 48 | }; 49 | 50 | // 合法拼音 51 | const std::set pinyin_valid = { 52 | "a", "ai", "an", "ang", "ao", 53 | "ba", "bai", "ban", "bang", "bao", 54 | "bei", "ben", "beng", "bi", "bian", "biao", "bie", "bin", "bing", "bo", "bu", 55 | "ca", "cai", "can", "cang", "cao", "ce", "cen", "ceng", "cha", "chai", 56 | "chan", "chang", "chao", "che", "chen", "cheng", "chi", "chong", "chou", "chu", 57 | "chuai", "chuan", "chuang", "chui", "chun", "chuo", "ci", "cong", "cou", "cu", 58 | "cuan", "cui", "cun", "cuo", 59 | "da", "dai", "dan", "dang", "dao", "de", "dei", 60 | "den", "deng", "di", "dia", "dian", "diao", "die", "ding", "diu", 61 | "dong", "dou", "du", "duan", "dui", "dun", "duo", 62 | "e", "ei", "en", "eng", "er", 63 | "fa", "fan", "fang", "fei", "fen", "feng", "fo", "fou", "fu", 64 | "ga", "gai", "gan", "gang", "gao", "ge", "gei", "gen", "geng", 65 | "gong", "gou", "gu", "gua", "guai", "guan", "guang", "gui", "gun", "guo", 66 | "ha", "hai", "han", "hang", "hao", "he", 67 | "hei", "hen", "heng", "hong", "hou", "hu", 68 | "hua", "huai", "huan", "huang", "hui", "hun", "huo", 69 | // "i"=>[], 70 | "ji", "jia", "jian", "jiang", "qiao", "jiao", "jie", "jin", "jing", "jiong", 71 | "jiu", "ju", "juan", "jue", "jun","jv", 72 | "ka", "kai", "kan", "kang", "kao", "ke", "kei", "ken", "keng", "kong", "kou", "ku", "kua", "kuai", 73 | "kuan", "kuang", "kui", "kun", "kuo", 74 | "la", "lai", "lan", "lang", "lao", 75 | "le", "lei", "leng", "li", "lia", "lian", "liang", "liao", "lie", "lin", 76 | "ling", "liu", "long", "lo", "lou", "lu", "luan", "lue", "lun", "luo","lv", 77 | "ma", "mai", "man", "mang", "mao", "me", "mei", "men", "meng", "mi", "mian", 78 | "miao", "mie", "min", "ming", "miu", "mo", "mou", "mu", 79 | "na", "nai", "nan", "nang", "nao", "ne", "nei", "nen", "neng", "ni", "nian", "niang", 80 | "niao", "nie", "nin", "ning", "niu", "nong", "nou", "nu", "nuan", "nue", "nun", "nuo", "nv", 81 | "o", "ou", 82 | "pa", "pai", "pan", "pang", "pao", "pei", "pen", 83 | "peng", "pi", "pian", "piao", "pie", "pin", "ping", "po", "pou", "pu", 84 | "qi", "qia", "qian", "qiang", "qie", "qin", "qing", "qiong", "qiu", "qu", 85 | "quan", "que", "qun","qv", 86 | "ran", "rang", "rao", "re", "ren", "reng", "ri", 87 | "rong", "rou", "ru", "ruan", "rui", "run", "ruo", 88 | "sa", "sai", "san", 89 | "sang", "sao", "se", "sen", "seng", "sha", "shai", "shan", "shang", "shao", 90 | "she", "shei", "shen", "sheng", "shi", "shou", "shu", "shua", "shuai", "shuan", "shuang", 91 | "shui", "shun", "shuo", "si", "song", "sou", "su", "suan", "sui", "sun", "suo", 92 | "ta", "tai", "tan", "tang", "tao", "te", "tei", "teng", "ti", "tian", 93 | "tiao", "tie", "ting", "tong", "tou", "tu", "tuan", "tui", "tun", "tuo", 94 | // "u"=>[], 95 | // "v"=>[], 96 | "wa", "wai", "wan", "wang", "wei", "wen", "weng", "wo", "wu", 97 | "xi", "xia", "xian", "xiang", "xiao", "xie", "xin", "xing", "xiong", "xiu", "xu", 98 | "xuan", "xue", "xun","xv", 99 | "ya", "yan", "yang","yao", "ye", "yi", "yin", "ying", 100 | "yo", "yong", "you", "yu", "yuan", "yue", "yun", 101 | "za", "zai", "zan", 102 | "zang", "zao", "ze", "zei", "zen", "zeng", "zha", "zhai", "zhan", "zhang", 103 | "zhao", "zhe", "zhen", "zheng", "zhi", "zhong", "zhou", "zhu", "zhua", "zhuai", 104 | "zhuan", "zhuang", "zhui", "zhun", "zhuo", "zi", "zong", "zou", "zu", "zuan", 105 | "zui", "zun", "zuo", 106 | }; 107 | // clang-format on 108 | std::set to_plain(const std::string &input); 109 | std::map > build_pinyin_map(); 110 | static int codepoint(const std::string &u); 111 | std::vector _split_pinyin(const std::string &input, int begin, int end); 112 | 113 | public: 114 | const std::vector &get_pinyin(const std::string &chinese); 115 | static int get_str_len(unsigned char byte); 116 | std::set split_pinyin(const std::string &input); 117 | PinYin(); 118 | }; 119 | 120 | } // namespace simple_tokenizer 121 | 122 | #endif // PINYIN_H_ 123 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | # Controls when the action will run. Triggers the workflow on push or pull request 4 | # events but only for the master branch 5 | on: 6 | push: 7 | branches: [ master ] 8 | tags: [ '*' ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 13 | jobs: 14 | Windows: 15 | runs-on: windows-latest 16 | needs: Linux 17 | strategy: 18 | fail-fast: true 19 | matrix: 20 | include: 21 | - arch: x86 22 | - arch: x64 23 | 24 | steps: 25 | - name: "Set Build Type" 26 | id: build_type 27 | run: | 28 | if ("${{ github.ref }}".StartsWith("refs/tags/")) { 29 | echo "BUILD_TYPE::Release"; 30 | echo "::set-output name=BUILD_TYPE::Release"; 31 | } else { 32 | echo "BUILD_TYPE::Debug"; 33 | echo "::set-output name=BUILD_TYPE::Debug"; 34 | } 35 | 36 | if ("${{ matrix.arch }}" -eq "x64") { 37 | echo "Arch:x64"; 38 | echo "::set-output name=BUILD_ARCH::x64"; 39 | } else { 40 | echo "Arch:Win32"; 41 | echo "::set-output name=BUILD_ARCH::Win32"; 42 | } 43 | 44 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 45 | - uses: actions/checkout@v2 46 | 47 | # https://github.com/OpenTTD/OpenTTD/blob/master/.github/workflows/ci-build.yml 48 | # "restore-cache" which is done by "run-vcpkg" uses Windows tar. 49 | # A git clone on windows marks a few files as read-only; when Windows tar 50 | # tries to extract the cache over this folder, it fails, despite the files 51 | # being identical. This failure shows up as an warning in the logs. We 52 | # avoid this by simply removing the read-only mark from the git folder. 53 | # In other words: this is a hack! 54 | # See: https://github.com/lukka/run-vcpkg/issues/61 55 | # - name: Remove read-only flag from vcpkg git folder 56 | # shell: powershell 57 | # run: | 58 | # attrib -r "c:\vcpkg\.git\*.*" /s 59 | # - name: Prepare vcpkg (with cache) 60 | # uses: lukka/run-vcpkg@v6 61 | # with: 62 | # vcpkgDirectory: 'c:/vcpkg' 63 | # doNotUpdateVcpkg: true 64 | # vcpkgArguments: 'liblzma libpng lzo zlib' 65 | # vcpkgTriplet: '${{ matrix.arch }}-windows-static' 66 | 67 | # - name: Install MSVC problem matcher 68 | # uses: ammaraskar/msvc-problem-matcher@master 69 | 70 | - name: 'Run CMake with VS16' 71 | uses: lukka/run-cmake@v3 72 | with: 73 | cmakeListsOrSettingsJson: CMakeListsTxtAdvanced 74 | cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' 75 | buildDirectory: "${{ github.workspace }}/../../_temp/windows" 76 | useVcpkgToolchainFile: true 77 | cmakeBuildType: ${{ steps.build_type.outputs.BUILD_TYPE }} 78 | buildWithCMake: true 79 | cmakeGenerator: VS16Win64 80 | cmakeAppendedArgs: -A ${{steps.build_type.outputs.BUILD_ARCH }} 81 | buildWithCMakeArgs: --config ${{ steps.build_type.outputs.BUILD_TYPE }} 82 | 83 | - name: "Check file existence" 84 | uses: andstor/file-existence-action@v1 85 | with: 86 | allow_failure: true 87 | files: "${{ github.workspace }}/../../_temp/windows/src/${{ steps.build_type.outputs.BUILD_TYPE }}/simple.dll, ${{ github.workspace }}/../../_temp/windows/cppjieba/src/cppjieba/dict/jieba.dict.utf8" 88 | 89 | # can't build 90 | # npm run 91 | # - uses: actions/setup-node@v2 92 | # with: 93 | # node-version: '16' 94 | # - name: run node example 95 | # working-directory: ./examples/node/ 96 | # run: | 97 | # npm install 98 | # npm run p -- --ext_path="${{ github.workspace }}/../../_temp/windows/src/${{ steps.build_type.outputs.BUILD_TYPE }}/" --dict_path="${{ github.workspace }}/../../_temp/windows/cppjieba/src/cppjieba/dict/" 99 | 100 | # - name: 'Run CTest' 101 | # run: ctest -C ${{ env.BUILD_TYPE }} 102 | # working-directory: "${{ github.workspace }}/../../_temp/windows" 103 | 104 | # python run 105 | # - uses: actions/setup-python@v4 106 | # with: 107 | # python-version: '3.x' # Version range or exact version of a Python version to use, using SemVer's version range syntax 108 | # - name: run python example 109 | # working-directory: ./examples/python3/ 110 | # run: python db_connector.py "${{ github.workspace }}/../../_temp/windows/src/${{ steps.build_type.outputs.BUILD_TYPE }}/simple" 111 | 112 | - name: Package 113 | if: startsWith(github.ref, 'refs/tags/') 114 | run: | 115 | mkdir libsimple-windows-${{ matrix.arch }} 116 | Copy-Item -Path src/${{ steps.build_type.outputs.BUILD_TYPE }}/simple.dll,cppjieba/src/cppjieba/dict/ -Destination libsimple-windows-${{ matrix.arch }}/ -Recurse 117 | Compress-Archive -Path libsimple-windows-${{ matrix.arch }} -DestinationPath libsimple-windows-${{ matrix.arch }}.zip 118 | working-directory: "${{ github.workspace }}/../../_temp/windows/" 119 | - name: Release 120 | if: startsWith(github.ref, 'refs/tags/') 121 | uses: softprops/action-gh-release@v1 122 | with: 123 | draft: true 124 | files: "D:/a/_temp/windows/libsimple-windows-${{ matrix.arch }}.zip" 125 | # files: "${{ github.workspace }}/../../_temp/windows/libsimple-windows-${{ matrix.arch }}.zip" 126 | env: 127 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 128 | 129 | 130 | # copy from https://github.com/Cyan4973/xxHash/blob/dev/.github/workflows/ci.yml 131 | # Linux ARM64 132 | # All tests are using QEMU and gcc cross compiler. 133 | 134 | qemu-consistency: 135 | name: QEMU ${{ matrix.name }} 136 | needs: Linux 137 | runs-on: ${{ matrix.os }} 138 | strategy: 139 | fail-fast: true # 'false' means Don't stop matrix workflows even if some matrix failed. 140 | matrix: 141 | include: [ 142 | { name: 'ARM64', xcc_pkg: gcc-aarch64-linux-gnu, xcc: aarch64-linux-gnu-gcc, xemu_pkg: qemu-system-arm, xemu: qemu-aarch64-static, os: ubuntu-latest, }, 143 | 144 | { name: 'ARM64, gcc-9', xcc_pkg: gcc-9-aarch64-linux-gnu, xcc: aarch64-linux-gnu-gcc-9, xemu_pkg: qemu-system-arm, xemu: qemu-aarch64-static, os: ubuntu-20.04, }, 145 | ] 146 | env: # Set environment variables 147 | CC: ${{ matrix.xcc }} 148 | XEMU: ${{ matrix.xemu }} 149 | # LDFLAGS: -static 150 | steps: 151 | - uses: actions/checkout@v2 # https://github.com/actions/checkout 152 | - name: apt update & install (1) 153 | run: | 154 | sudo apt-get update 155 | sudo apt-get install gcc-multilib g++-multilib qemu-utils qemu-user-static 156 | - name: Environment info (1) 157 | run: | 158 | echo && apt-cache search "^gcc-" | grep "linux" | sort 159 | - name: apt update & install (2) 160 | run: | 161 | sudo apt-get install ${{ matrix.xcc_pkg }} ${{ matrix.xemu_pkg }} 162 | - name: Environment info (2) 163 | run: | 164 | echo && which $CC 165 | echo && $CC --version 166 | echo && $CC -v # Show built-in specs 167 | echo && which $XEMU 168 | echo && $XEMU --version 169 | - name: ARM64 170 | uses: lukka/run-cmake@v3 171 | with: 172 | cmakeListsOrSettingsJson: CMakeListsTxtAdvanced 173 | cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' 174 | cmakeBuildType: 'Release' 175 | buildDirectory: "${{ github.workspace }}/build" 176 | buildWithCMake: true 177 | buildWithCMakeArgs: --config Release 178 | # cmakeAppendedArgs: " -DCODE_COVERAGE=OFF -DBUILD_SHELL=OFF -DBUILD_TEST_EXAMPLE=OFF -DBUILD_STATIC" 179 | cmakeAppendedArgs: " -DCODE_COVERAGE=OFF -DBUILD_SHELL=OFF -DBUILD_TEST_EXAMPLE=OFF" 180 | - name: "Check file existence" 181 | uses: andstor/file-existence-action@v1 182 | with: 183 | allow_failure: true 184 | # files: "${{ github.workspace }}/build/src/libsimple.a, ${{ github.workspace }}/build/src/libPINYIN_TEXT.a, ${{ github.workspace }}/build/test/dict/jieba.dict.utf8" 185 | files: "${{ github.workspace }}/build/src/libsimple.so, ${{ github.workspace }}/build/test/dict/jieba.dict.utf8" 186 | - name: Package 187 | if: startsWith(github.ref, 'refs/tags/') 188 | run: | 189 | mkdir libsimple-${{ matrix.xcc }} 190 | # cp -r src/libsimple.a src/libPINYIN_TEXT.a test/dict/ libsimple-linux-${{ matrix.xcc }}/ 191 | cp -r src/libsimple.so test/dict/ libsimple-${{ matrix.xcc }}/ 192 | zip -r libsimple-${{ matrix.xcc }}.zip libsimple-${{ matrix.xcc }} 193 | working-directory: "${{ github.workspace }}/build" 194 | - name: Release 195 | if: startsWith(github.ref, 'refs/tags/') 196 | uses: softprops/action-gh-release@v1 197 | with: 198 | draft: true 199 | files: ${{ github.workspace }}/build/libsimple-${{ matrix.xcc }}.zip 200 | env: 201 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 202 | 203 | Linux: 204 | runs-on: ${{ matrix.os }} 205 | strategy: 206 | fail-fast: true 207 | matrix: 208 | include: 209 | - os: ubuntu-20.04 210 | - os: ubuntu-latest 211 | steps: 212 | - name: "Release Build Type" 213 | if: startsWith(github.ref, 'refs/tags/') 214 | run: echo "BUILD_TYPE=Release" >> $GITHUB_ENV 215 | - name: "Debug Build Type" 216 | if: startsWith(github.ref, 'refs/tags/') != true 217 | run: echo "BUILD_TYPE=Debug" >> $GITHUB_ENV 218 | 219 | - uses: actions/checkout@v2 220 | 221 | - name: Update apt-get 222 | run: sudo apt-get update 223 | 224 | - name: Install lcov 225 | if: startsWith(github.ref, 'refs/tags/') != true 226 | run: sudo apt-get install lcov 227 | 228 | - name: Install codecov 229 | if: startsWith(github.ref, 'refs/tags/') != true 230 | shell: bash 231 | run: sudo pip install codecov 232 | 233 | - name: 'Run CMake' 234 | uses: lukka/run-cmake@v3 235 | if: startsWith(github.ref, 'refs/tags/') != true 236 | with: 237 | cmakeListsOrSettingsJson: CMakeListsTxtAdvanced 238 | cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' 239 | cmakeBuildType: ${{ env.BUILD_TYPE }} 240 | buildDirectory: "${{ github.workspace }}/build" 241 | buildWithCMake: true 242 | buildWithCMakeArgs: --config ${{ env.BUILD_TYPE }} 243 | cmakeAppendedArgs: " -DCODE_COVERAGE=ON " 244 | 245 | - name: 'Run CMake without coverage' 246 | if: startsWith(github.ref, 'refs/tags/') 247 | uses: lukka/run-cmake@v3 248 | with: 249 | cmakeListsOrSettingsJson: CMakeListsTxtAdvanced 250 | cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' 251 | cmakeBuildType: ${{ env.BUILD_TYPE }} 252 | buildDirectory: "${{ github.workspace }}/build" 253 | buildWithCMake: true 254 | buildWithCMakeArgs: --config ${{ env.BUILD_TYPE }} 255 | cmakeAppendedArgs: " -DCODE_COVERAGE=OFF -DBUILD_SHELL=OFF -DBUILD_TEST_EXAMPLE=OFF" 256 | 257 | - name: 'Run CTest' 258 | if: ${{ startsWith(github.ref, 'refs/tags/') != true && matrix.os == 'ubuntu-latest' }} 259 | run: ctest -V -C ${{ env.BUILD_TYPE }} 260 | working-directory: "${{ github.workspace }}/build" 261 | 262 | # CODE COVERAGE 263 | - name: Code coverage - Capture coverage info 264 | if: ${{ startsWith(github.ref, 'refs/tags/') != true && matrix.os == 'ubuntu-latest' }} 265 | run: lcov --directory . --capture --output-file coverage.info 266 | - name: Code coverage - Filter out system, external, and unit test source files 267 | if: ${{ startsWith(github.ref, 'refs/tags/') != true && matrix.os == 'ubuntu-latest' }} 268 | run: lcov --remove coverage.info --output-file coverage_filter.info '/Library/*' '/usr/*' '*/test/*' '*/cmrc/*' '*/entry.cc' '*/simple_highlight.*' 269 | - name: Code coverage - Output coverage data for debugging 270 | if: ${{ startsWith(github.ref, 'refs/tags/') != true && matrix.os == 'ubuntu-latest' }} 271 | run: lcov --list coverage_filter.info 272 | - name: Code coverage - Upload to CodeCov 273 | if: ${{ startsWith(github.ref, 'refs/tags/') != true && matrix.os == 'ubuntu-latest' }} 274 | run: bash <(curl -s https://codecov.io/bash) -f coverage_filter.info || echo "Codecov did not collect coverage reports" 275 | 276 | - name: "Check file existence" 277 | uses: andstor/file-existence-action@v1 278 | with: 279 | allow_failure: true 280 | files: "${{ github.workspace }}/build/src/libsimple.so, ${{ github.workspace }}/build/test/dict/jieba.dict.utf8" 281 | 282 | # npm run 283 | - uses: actions/setup-node@v2 284 | if: startsWith(github.ref, 'refs/tags/') != true 285 | with: 286 | node-version: '16' 287 | - name: run node example 288 | if: startsWith(github.ref, 'refs/tags/') != true 289 | working-directory: ./examples/node/ 290 | run: | 291 | npm install 292 | npm run p -- --ext_path="${{ github.workspace }}/build/src/" --dict_path="${{ github.workspace }}/build/test/dict/" 293 | npm run b -- --ext_path="${{ github.workspace }}/build/src/" --dict_path="${{ github.workspace }}/build/test/dict/" 294 | 295 | # python run 296 | - uses: actions/setup-python@v4 297 | if: startsWith(github.ref, 'refs/tags/') != true 298 | with: 299 | python-version: '3.10' # Version range or exact version of a Python version to use, using SemVer's version range syntax 300 | - name: run python example 301 | if: startsWith(github.ref, 'refs/tags/') != true 302 | working-directory: ./examples/python3/ 303 | run: python db_connector.py "${{ github.workspace }}/build/src/libsimple" 304 | 305 | # create release 306 | - name: "Build Changelog" 307 | id: build_changelog 308 | if: ${{ startsWith(github.ref, 'refs/tags/') && matrix.os == 'ubuntu-latest' }} 309 | uses: mikepenz/release-changelog-builder-action@v1 310 | env: 311 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 312 | - name: Package 313 | if: startsWith(github.ref, 'refs/tags/') 314 | run: | 315 | mkdir libsimple-linux-${{ matrix.os }} 316 | cp -r src/libsimple.so test/dict/ libsimple-linux-${{ matrix.os }}/ 317 | zip -r libsimple-linux-${{ matrix.os }}.zip libsimple-linux-${{ matrix.os }} 318 | working-directory: "${{ github.workspace }}/build" 319 | - name: Release without changelog 320 | if: ${{ startsWith(github.ref, 'refs/tags/') && matrix.os != 'ubuntu-latest' }} 321 | uses: softprops/action-gh-release@v1 322 | with: 323 | draft: true 324 | files: ${{ github.workspace }}/build/libsimple-linux-${{ matrix.os }}.zip 325 | env: 326 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 327 | - name: Release 328 | if: ${{ startsWith(github.ref, 'refs/tags/') && matrix.os == 'ubuntu-latest' }} 329 | uses: softprops/action-gh-release@v1 330 | with: 331 | draft: true 332 | files: ${{ github.workspace }}/build/libsimple-linux-${{ matrix.os }}.zip 333 | body: ${{steps.build_changelog.outputs.changelog}} 334 | env: 335 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 336 | 337 | MacOS: 338 | runs-on: macos-latest 339 | needs: Linux 340 | steps: 341 | - name: "Release Build Type" 342 | if: startsWith(github.ref, 'refs/tags/') 343 | run: echo "BUILD_TYPE=Release" >> $GITHUB_ENV 344 | - name: "Debug Build Type" 345 | if: startsWith(github.ref, 'refs/tags/') != true 346 | run: echo "BUILD_TYPE=Debug" >> $GITHUB_ENV 347 | 348 | - uses: actions/checkout@v2 349 | - name: 'Run CMake' 350 | uses: lukka/run-cmake@v3 351 | with: 352 | cmakeListsOrSettingsJson: CMakeListsTxtAdvanced 353 | cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' 354 | cmakeBuildType: ${{ env.BUILD_TYPE }} 355 | buildDirectory: "${{ github.workspace }}/../../_temp/macos" 356 | buildWithCMake: true 357 | cmakeAppendedArgs: -DCMAKE_INSTALL_PREFIX="${{ github.workspace }}/../../_temp/macos/install" 358 | buildWithCMakeArgs: --config ${{ env.BUILD_TYPE }} 359 | 360 | - name: 'Run CTest' 361 | run: ctest -C ${{ env.BUILD_TYPE }} 362 | working-directory: "${{ github.workspace }}/../../_temp/macos" 363 | 364 | - name: "Check file existence" 365 | uses: andstor/file-existence-action@v1 366 | with: 367 | allow_failure: true 368 | files: "${{ github.workspace }}/../../_temp/macos/src/libsimple.dylib, ${{ github.workspace }}/../../_temp/macos/test/dict/jieba.dict.utf8" 369 | # npm run 370 | - uses: actions/setup-node@v2 371 | with: 372 | node-version: '16' 373 | - name: run node example 374 | working-directory: ./examples/node/ 375 | run: | 376 | npm install 377 | npm run p -- --ext_path="${{ github.workspace }}/../../_temp/macos/src/" --dict_path="${{ github.workspace }}/../../_temp/macos/test/dict/" 378 | # don't run this as it's toooo slow 379 | # npm run b -- --ext_path="${{ github.workspace }}/../../_temp/macos/src/" --dict_path="${{ github.workspace }}/../../_temp/macos/test/dict/" 380 | 381 | # python run 382 | - uses: actions/setup-python@v4 383 | with: 384 | python-version: '3.10' # Version range or exact version of a Python version to use, using SemVer's version range syntax 385 | - name: run python example 386 | working-directory: ./examples/python3/ 387 | run: python db_connector.py "${{ github.workspace }}/../../_temp/macos/src/libsimple" 388 | 389 | - name: Package 390 | if: startsWith(github.ref, 'refs/tags/') 391 | run: | 392 | mkdir libsimple-osx-x64 393 | sudo xattr -r -d com.apple.quarantine src/libsimple.dylib 394 | cp -r src/libsimple.dylib test/dict libsimple-osx-x64/ 395 | zip -r libsimple-osx-x64.zip libsimple-osx-x64 396 | working-directory: "${{ github.workspace }}/../../_temp/macos" 397 | - name: Release 398 | if: startsWith(github.ref, 'refs/tags/') 399 | uses: softprops/action-gh-release@v1 400 | with: 401 | draft: true 402 | files: ${{ github.workspace }}/../../_temp/macos/libsimple-osx-x64.zip 403 | env: 404 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 405 | 406 | - name: build-iOS 407 | run: ./build-ios.sh 408 | -------------------------------------------------------------------------------- /contrib/CMakeRC.cmake: -------------------------------------------------------------------------------- 1 | # This block is executed when generating an intermediate resource file, not when 2 | # running in CMake configure mode 3 | if(_CMRC_GENERATE_MODE) 4 | # Read in the digits 5 | file(READ "${INPUT_FILE}" bytes HEX) 6 | # Format each pair into a character literal. Heuristics seem to favor doing 7 | # the conversion in groups of five for fastest conversion 8 | string(REGEX REPLACE "(..)(..)(..)(..)(..)" "'\\\\x\\1','\\\\x\\2','\\\\x\\3','\\\\x\\4','\\\\x\\5'," chars "${bytes}") 9 | # Since we did this in groups, we have some leftovers to clean up 10 | string(LENGTH "${bytes}" n_bytes2) 11 | math(EXPR n_bytes "${n_bytes2} / 2") 12 | math(EXPR remainder "${n_bytes} % 5") # <-- '5' is the grouping count from above 13 | set(cleanup_re "$") 14 | set(cleanup_sub ) 15 | while(remainder) 16 | set(cleanup_re "(..)${cleanup_re}") 17 | set(cleanup_sub "'\\\\x\\${remainder}',${cleanup_sub}") 18 | math(EXPR remainder "${remainder} - 1") 19 | endwhile() 20 | if(NOT cleanup_re STREQUAL "$") 21 | string(REGEX REPLACE "${cleanup_re}" "${cleanup_sub}" chars "${chars}") 22 | endif() 23 | string(CONFIGURE [[ 24 | namespace { const char file_array[] = { @chars@ 0 }; } 25 | namespace cmrc { namespace @NAMESPACE@ { namespace res_chars { 26 | extern const char* const @SYMBOL@_begin = file_array; 27 | extern const char* const @SYMBOL@_end = file_array + @n_bytes@; 28 | }}} 29 | ]] code) 30 | file(WRITE "${OUTPUT_FILE}" "${code}") 31 | # Exit from the script. Nothing else needs to be processed 32 | return() 33 | endif() 34 | 35 | set(_version 2.0.0) 36 | 37 | cmake_minimum_required(VERSION 3.3) 38 | include(CMakeParseArguments) 39 | 40 | if(COMMAND cmrc_add_resource_library) 41 | if(NOT DEFINED _CMRC_VERSION OR NOT (_version STREQUAL _CMRC_VERSION)) 42 | message(WARNING "More than one CMakeRC version has been included in this project.") 43 | endif() 44 | # CMakeRC has already been included! Don't do anything 45 | return() 46 | endif() 47 | 48 | set(_CMRC_VERSION "${_version}" CACHE INTERNAL "CMakeRC version. Used for checking for conflicts") 49 | 50 | set(_CMRC_SCRIPT "${CMAKE_CURRENT_LIST_FILE}" CACHE INTERNAL "Path to CMakeRC script") 51 | 52 | function(_cmrc_normalize_path var) 53 | set(path "${${var}}") 54 | file(TO_CMAKE_PATH "${path}" path) 55 | while(path MATCHES "//") 56 | string(REPLACE "//" "/" path "${path}") 57 | endwhile() 58 | string(REGEX REPLACE "/+$" "" path "${path}") 59 | set("${var}" "${path}" PARENT_SCOPE) 60 | endfunction() 61 | 62 | get_filename_component(_inc_dir "${CMAKE_BINARY_DIR}/_cmrc/include" ABSOLUTE) 63 | set(CMRC_INCLUDE_DIR "${_inc_dir}" CACHE INTERNAL "Directory for CMakeRC include files") 64 | # Let's generate the primary include file 65 | file(MAKE_DIRECTORY "${CMRC_INCLUDE_DIR}/cmrc") 66 | set(hpp_content [==[ 67 | #ifndef CMRC_CMRC_HPP_INCLUDED 68 | #define CMRC_CMRC_HPP_INCLUDED 69 | 70 | #include 71 | #include 72 | #include 73 | #include 74 | #include 75 | #include 76 | #include 77 | #include 78 | #include 79 | 80 | #if !(defined(__EXCEPTIONS) || defined(__cpp_exceptions) || defined(_CPPUNWIND) || defined(CMRC_NO_EXCEPTIONS)) 81 | #define CMRC_NO_EXCEPTIONS 1 82 | #endif 83 | 84 | namespace cmrc { namespace detail { struct dummy; } } 85 | 86 | #define CMRC_DECLARE(libid) \ 87 | namespace cmrc { namespace detail { \ 88 | struct dummy; \ 89 | static_assert(std::is_same::value, "CMRC_DECLARE() must only appear at the global namespace"); \ 90 | } } \ 91 | namespace cmrc { namespace libid { \ 92 | cmrc::embedded_filesystem get_filesystem(); \ 93 | } } static_assert(true, "") 94 | 95 | namespace cmrc { 96 | 97 | class file { 98 | const char* _begin = nullptr; 99 | const char* _end = nullptr; 100 | 101 | public: 102 | using iterator = const char*; 103 | using const_iterator = iterator; 104 | iterator begin() const noexcept { return _begin; } 105 | iterator cbegin() const noexcept { return _begin; } 106 | iterator end() const noexcept { return _end; } 107 | iterator cend() const noexcept { return _end; } 108 | std::size_t size() const { return static_cast(std::distance(begin(), end())); } 109 | 110 | file() = default; 111 | file(iterator beg, iterator end) noexcept : _begin(beg), _end(end) {} 112 | }; 113 | 114 | class directory_entry; 115 | 116 | namespace detail { 117 | 118 | class directory; 119 | class file_data; 120 | 121 | class file_or_directory { 122 | union _data_t { 123 | class file_data* file_data; 124 | class directory* directory; 125 | } _data; 126 | bool _is_file = true; 127 | 128 | public: 129 | explicit file_or_directory(file_data& f) { 130 | _data.file_data = &f; 131 | } 132 | explicit file_or_directory(directory& d) { 133 | _data.directory = &d; 134 | _is_file = false; 135 | } 136 | bool is_file() const noexcept { 137 | return _is_file; 138 | } 139 | bool is_directory() const noexcept { 140 | return !is_file(); 141 | } 142 | const directory& as_directory() const noexcept { 143 | assert(!is_file()); 144 | return *_data.directory; 145 | } 146 | const file_data& as_file() const noexcept { 147 | assert(is_file()); 148 | return *_data.file_data; 149 | } 150 | }; 151 | 152 | class file_data { 153 | public: 154 | const char* begin_ptr; 155 | const char* end_ptr; 156 | file_data(const file_data&) = delete; 157 | file_data(const char* b, const char* e) : begin_ptr(b), end_ptr(e) {} 158 | }; 159 | 160 | inline std::pair split_path(const std::string& path) { 161 | auto first_sep = path.find("/"); 162 | if (first_sep == path.npos) { 163 | return std::make_pair(path, ""); 164 | } else { 165 | return std::make_pair(path.substr(0, first_sep), path.substr(first_sep + 1)); 166 | } 167 | } 168 | 169 | struct created_subdirectory { 170 | class directory& directory; 171 | class file_or_directory& index_entry; 172 | }; 173 | 174 | class directory { 175 | std::list _files; 176 | std::list _dirs; 177 | std::map _index; 178 | 179 | using base_iterator = std::map::const_iterator; 180 | 181 | public: 182 | 183 | directory() = default; 184 | directory(const directory&) = delete; 185 | 186 | created_subdirectory add_subdir(std::string name) & { 187 | _dirs.emplace_back(); 188 | auto& back = _dirs.back(); 189 | auto& fod = _index.emplace(name, file_or_directory{back}).first->second; 190 | return created_subdirectory{back, fod}; 191 | } 192 | 193 | file_or_directory* add_file(std::string name, const char* begin, const char* end) & { 194 | assert(_index.find(name) == _index.end()); 195 | _files.emplace_back(begin, end); 196 | return &_index.emplace(name, file_or_directory{_files.back()}).first->second; 197 | } 198 | 199 | const file_or_directory* get(const std::string& path) const { 200 | auto pair = split_path(path); 201 | auto child = _index.find(pair.first); 202 | if (child == _index.end()) { 203 | return nullptr; 204 | } 205 | auto& entry = child->second; 206 | if (pair.second.empty()) { 207 | // We're at the end of the path 208 | return &entry; 209 | } 210 | 211 | if (entry.is_file()) { 212 | // We can't traverse into a file. Stop. 213 | return nullptr; 214 | } 215 | // Keep going down 216 | return entry.as_directory().get(pair.second); 217 | } 218 | 219 | class iterator { 220 | base_iterator _base_iter; 221 | base_iterator _end_iter; 222 | public: 223 | using value_type = directory_entry; 224 | using difference_type = std::ptrdiff_t; 225 | using pointer = const value_type*; 226 | using reference = const value_type&; 227 | using iterator_category = std::input_iterator_tag; 228 | 229 | iterator() = default; 230 | explicit iterator(base_iterator iter, base_iterator end) : _base_iter(iter), _end_iter(end) {} 231 | 232 | iterator begin() const noexcept { 233 | return *this; 234 | } 235 | 236 | iterator end() const noexcept { 237 | return iterator(_end_iter, _end_iter); 238 | } 239 | 240 | inline value_type operator*() const noexcept; 241 | 242 | bool operator==(const iterator& rhs) const noexcept { 243 | return _base_iter == rhs._base_iter; 244 | } 245 | 246 | bool operator!=(const iterator& rhs) const noexcept { 247 | return !(*this == rhs); 248 | } 249 | 250 | iterator& operator++() noexcept { 251 | ++_base_iter; 252 | return *this; 253 | } 254 | 255 | iterator operator++(int) noexcept { 256 | auto cp = *this; 257 | ++_base_iter; 258 | return cp; 259 | } 260 | }; 261 | 262 | using const_iterator = iterator; 263 | 264 | iterator begin() const noexcept { 265 | return iterator(_index.begin(), _index.end()); 266 | } 267 | 268 | iterator end() const noexcept { 269 | return iterator(); 270 | } 271 | }; 272 | 273 | inline std::string normalize_path(std::string path) { 274 | while (path.find("/") == 0) { 275 | path.erase(path.begin()); 276 | } 277 | while (!path.empty() && (path.rfind("/") == path.size() - 1)) { 278 | path.pop_back(); 279 | } 280 | auto off = path.npos; 281 | while ((off = path.find("//")) != path.npos) { 282 | path.erase(path.begin() + static_cast(off)); 283 | } 284 | return path; 285 | } 286 | 287 | using index_type = std::map; 288 | 289 | } // detail 290 | 291 | class directory_entry { 292 | std::string _fname; 293 | const detail::file_or_directory* _item; 294 | 295 | public: 296 | directory_entry() = delete; 297 | explicit directory_entry(std::string filename, const detail::file_or_directory& item) 298 | : _fname(filename) 299 | , _item(&item) 300 | {} 301 | 302 | const std::string& filename() const & { 303 | return _fname; 304 | } 305 | std::string filename() const && { 306 | return std::move(_fname); 307 | } 308 | 309 | bool is_file() const { 310 | return _item->is_file(); 311 | } 312 | 313 | bool is_directory() const { 314 | return _item->is_directory(); 315 | } 316 | }; 317 | 318 | directory_entry detail::directory::iterator::operator*() const noexcept { 319 | assert(begin() != end()); 320 | return directory_entry(_base_iter->first, _base_iter->second); 321 | } 322 | 323 | using directory_iterator = detail::directory::iterator; 324 | 325 | class embedded_filesystem { 326 | // Never-null: 327 | const cmrc::detail::index_type* _index; 328 | const detail::file_or_directory* _get(std::string path) const { 329 | path = detail::normalize_path(path); 330 | auto found = _index->find(path); 331 | if (found == _index->end()) { 332 | return nullptr; 333 | } else { 334 | return found->second; 335 | } 336 | } 337 | 338 | public: 339 | explicit embedded_filesystem(const detail::index_type& index) 340 | : _index(&index) 341 | {} 342 | 343 | file open(const std::string& path) const { 344 | auto entry_ptr = _get(path); 345 | if (!entry_ptr || !entry_ptr->is_file()) { 346 | #ifdef CMRC_NO_EXCEPTIONS 347 | fprintf(stderr, "Error no such file or directory: %s\n", path.c_str()); 348 | abort(); 349 | #else 350 | throw std::system_error(make_error_code(std::errc::no_such_file_or_directory), path); 351 | #endif 352 | } 353 | auto& dat = entry_ptr->as_file(); 354 | return file{dat.begin_ptr, dat.end_ptr}; 355 | } 356 | 357 | bool is_file(const std::string& path) const noexcept { 358 | auto entry_ptr = _get(path); 359 | return entry_ptr && entry_ptr->is_file(); 360 | } 361 | 362 | bool is_directory(const std::string& path) const noexcept { 363 | auto entry_ptr = _get(path); 364 | return entry_ptr && entry_ptr->is_directory(); 365 | } 366 | 367 | bool exists(const std::string& path) const noexcept { 368 | return !!_get(path); 369 | } 370 | 371 | directory_iterator iterate_directory(const std::string& path) const { 372 | auto entry_ptr = _get(path); 373 | if (!entry_ptr) { 374 | #ifdef CMRC_NO_EXCEPTIONS 375 | fprintf(stderr, "Error no such file or directory: %s\n", path.c_str()); 376 | abort(); 377 | #else 378 | throw std::system_error(make_error_code(std::errc::no_such_file_or_directory), path); 379 | #endif 380 | } 381 | if (!entry_ptr->is_directory()) { 382 | #ifdef CMRC_NO_EXCEPTIONS 383 | fprintf(stderr, "Error not a directory: %s\n", path.c_str()); 384 | abort(); 385 | #else 386 | throw std::system_error(make_error_code(std::errc::not_a_directory), path); 387 | #endif 388 | } 389 | return entry_ptr->as_directory().begin(); 390 | } 391 | }; 392 | 393 | } 394 | 395 | #endif // CMRC_CMRC_HPP_INCLUDED 396 | ]==]) 397 | 398 | set(cmrc_hpp "${CMRC_INCLUDE_DIR}/cmrc/cmrc.hpp" CACHE INTERNAL "") 399 | set(_generate 1) 400 | if(EXISTS "${cmrc_hpp}") 401 | file(READ "${cmrc_hpp}" _current) 402 | if(_current STREQUAL hpp_content) 403 | set(_generate 0) 404 | endif() 405 | endif() 406 | file(GENERATE OUTPUT "${cmrc_hpp}" CONTENT "${hpp_content}" CONDITION ${_generate}) 407 | 408 | add_library(cmrc-base INTERFACE) 409 | target_include_directories(cmrc-base INTERFACE $) 410 | # Signal a basic C++11 feature to require C++11. 411 | target_compile_features(cmrc-base INTERFACE cxx_nullptr) 412 | set_property(TARGET cmrc-base PROPERTY INTERFACE_CXX_EXTENSIONS OFF) 413 | add_library(cmrc::base ALIAS cmrc-base) 414 | 415 | function(cmrc_add_resource_library name) 416 | set(args ALIAS NAMESPACE TYPE) 417 | cmake_parse_arguments(ARG "" "${args}" "" "${ARGN}") 418 | # Generate the identifier for the resource library's namespace 419 | set(ns_re "[a-zA-Z_][a-zA-Z0-9_]*") 420 | if(NOT DEFINED ARG_NAMESPACE) 421 | # Check that the library name is also a valid namespace 422 | if(NOT name MATCHES "${ns_re}") 423 | message(SEND_ERROR "Library name is not a valid namespace. Specify the NAMESPACE argument") 424 | endif() 425 | set(ARG_NAMESPACE "${name}") 426 | else() 427 | if(NOT ARG_NAMESPACE MATCHES "${ns_re}") 428 | message(SEND_ERROR "NAMESPACE for ${name} is not a valid C++ namespace identifier (${ARG_NAMESPACE})") 429 | endif() 430 | endif() 431 | set(libname "${name}") 432 | # Check that type is either "STATIC" or "OBJECT", or default to "STATIC" if 433 | # not set 434 | if(NOT DEFINED ARG_TYPE) 435 | set(ARG_TYPE STATIC) 436 | elseif(NOT "${ARG_TYPE}" MATCHES "^(STATIC|OBJECT)$") 437 | message(SEND_ERROR "${ARG_TYPE} is not a valid TYPE (STATIC and OBJECT are acceptable)") 438 | set(ARG_TYPE STATIC) 439 | endif() 440 | # Generate a library with the compiled in character arrays. 441 | string(CONFIGURE [=[ 442 | #include 443 | #include 444 | #include 445 | 446 | namespace cmrc { 447 | namespace @ARG_NAMESPACE@ { 448 | 449 | namespace res_chars { 450 | // These are the files which are available in this resource library 451 | $, 452 | > 453 | } 454 | 455 | namespace { 456 | 457 | const cmrc::detail::index_type& 458 | get_root_index() { 459 | static cmrc::detail::directory root_directory_; 460 | static cmrc::detail::file_or_directory root_directory_fod{root_directory_}; 461 | static cmrc::detail::index_type root_index; 462 | root_index.emplace("", &root_directory_fod); 463 | struct dir_inl { 464 | class cmrc::detail::directory& directory; 465 | }; 466 | dir_inl root_directory_dir{root_directory_}; 467 | (void)root_directory_dir; 468 | $, 469 | > 470 | $, 471 | > 472 | return root_index; 473 | } 474 | 475 | } 476 | 477 | cmrc::embedded_filesystem get_filesystem() { 478 | static auto& index = get_root_index(); 479 | return cmrc::embedded_filesystem{index}; 480 | } 481 | 482 | } // @ARG_NAMESPACE@ 483 | } // cmrc 484 | ]=] cpp_content @ONLY) 485 | get_filename_component(libdir "${CMAKE_CURRENT_BINARY_DIR}/__cmrc_${name}" ABSOLUTE) 486 | get_filename_component(lib_tmp_cpp "${libdir}/lib_.cpp" ABSOLUTE) 487 | string(REPLACE "\n " "\n" cpp_content "${cpp_content}") 488 | file(GENERATE OUTPUT "${lib_tmp_cpp}" CONTENT "${cpp_content}") 489 | get_filename_component(libcpp "${libdir}/lib.cpp" ABSOLUTE) 490 | add_custom_command(OUTPUT "${libcpp}" 491 | DEPENDS "${lib_tmp_cpp}" "${cmrc_hpp}" 492 | COMMAND ${CMAKE_COMMAND} -E copy_if_different "${lib_tmp_cpp}" "${libcpp}" 493 | COMMENT "Generating ${name} resource loader" 494 | ) 495 | # Generate the actual static library. Each source file is just a single file 496 | # with a character array compiled in containing the contents of the 497 | # corresponding resource file. 498 | add_library(${name} ${ARG_TYPE} ${libcpp}) 499 | set_property(TARGET ${name} PROPERTY CMRC_LIBDIR "${libdir}") 500 | set_property(TARGET ${name} PROPERTY CMRC_NAMESPACE "${ARG_NAMESPACE}") 501 | target_link_libraries(${name} PUBLIC cmrc::base) 502 | set_property(TARGET ${name} PROPERTY CMRC_IS_RESOURCE_LIBRARY TRUE) 503 | if(ARG_ALIAS) 504 | add_library("${ARG_ALIAS}" ALIAS ${name}) 505 | endif() 506 | cmrc_add_resources(${name} ${ARG_UNPARSED_ARGUMENTS}) 507 | endfunction() 508 | 509 | function(_cmrc_register_dirs name dirpath) 510 | if(dirpath STREQUAL "") 511 | return() 512 | endif() 513 | # Skip this dir if we have already registered it 514 | get_target_property(registered "${name}" _CMRC_REGISTERED_DIRS) 515 | if(dirpath IN_LIST registered) 516 | return() 517 | endif() 518 | # Register the parent directory first 519 | get_filename_component(parent "${dirpath}" DIRECTORY) 520 | if(NOT parent STREQUAL "") 521 | _cmrc_register_dirs("${name}" "${parent}") 522 | endif() 523 | # Now generate the registration 524 | set_property(TARGET "${name}" APPEND PROPERTY _CMRC_REGISTERED_DIRS "${dirpath}") 525 | _cm_encode_fpath(sym "${dirpath}") 526 | if(parent STREQUAL "") 527 | set(parent_sym root_directory) 528 | else() 529 | _cm_encode_fpath(parent_sym "${parent}") 530 | endif() 531 | get_filename_component(leaf "${dirpath}" NAME) 532 | set_property( 533 | TARGET "${name}" 534 | APPEND PROPERTY CMRC_MAKE_DIRS 535 | "static auto ${sym}_dir = ${parent_sym}_dir.directory.add_subdir(\"${leaf}\")\;" 536 | "root_index.emplace(\"${dirpath}\", &${sym}_dir.index_entry)\;" 537 | ) 538 | endfunction() 539 | 540 | function(cmrc_add_resources name) 541 | get_target_property(is_reslib ${name} CMRC_IS_RESOURCE_LIBRARY) 542 | if(NOT TARGET ${name} OR NOT is_reslib) 543 | message(SEND_ERROR "cmrc_add_resources called on target '${name}' which is not an existing resource library") 544 | return() 545 | endif() 546 | 547 | set(options) 548 | set(args WHENCE PREFIX) 549 | set(list_args) 550 | cmake_parse_arguments(ARG "${options}" "${args}" "${list_args}" "${ARGN}") 551 | 552 | if(NOT ARG_WHENCE) 553 | set(ARG_WHENCE ${CMAKE_CURRENT_SOURCE_DIR}) 554 | endif() 555 | _cmrc_normalize_path(ARG_WHENCE) 556 | get_filename_component(ARG_WHENCE "${ARG_WHENCE}" ABSOLUTE) 557 | 558 | # Generate the identifier for the resource library's namespace 559 | get_target_property(lib_ns "${name}" CMRC_NAMESPACE) 560 | 561 | get_target_property(libdir ${name} CMRC_LIBDIR) 562 | get_target_property(target_dir ${name} SOURCE_DIR) 563 | file(RELATIVE_PATH reldir "${target_dir}" "${CMAKE_CURRENT_SOURCE_DIR}") 564 | if(reldir MATCHES "^\\.\\.") 565 | message(SEND_ERROR "Cannot call cmrc_add_resources in a parent directory from the resource library target") 566 | return() 567 | endif() 568 | 569 | foreach(input IN LISTS ARG_UNPARSED_ARGUMENTS) 570 | _cmrc_normalize_path(input) 571 | get_filename_component(abs_in "${input}" ABSOLUTE) 572 | # Generate a filename based on the input filename that we can put in 573 | # the intermediate directory. 574 | file(RELATIVE_PATH relpath "${ARG_WHENCE}" "${abs_in}") 575 | if(relpath MATCHES "^\\.\\.") 576 | # For now we just error on files that exist outside of the soure dir. 577 | message(SEND_ERROR "Cannot add file '${input}': File must be in a subdirectory of ${ARG_WHENCE}") 578 | continue() 579 | endif() 580 | if(DEFINED ARG_PREFIX) 581 | _cmrc_normalize_path(ARG_PREFIX) 582 | endif() 583 | if(ARG_PREFIX AND NOT ARG_PREFIX MATCHES "/$") 584 | set(ARG_PREFIX "${ARG_PREFIX}/") 585 | endif() 586 | get_filename_component(dirpath "${ARG_PREFIX}${relpath}" DIRECTORY) 587 | _cmrc_register_dirs("${name}" "${dirpath}") 588 | get_filename_component(abs_out "${libdir}/intermediate/${ARG_PREFIX}${relpath}.cpp" ABSOLUTE) 589 | # Generate a symbol name relpath the file's character array 590 | _cm_encode_fpath(sym "${relpath}") 591 | # Get the symbol name for the parent directory 592 | if(dirpath STREQUAL "") 593 | set(parent_sym root_directory) 594 | else() 595 | _cm_encode_fpath(parent_sym "${dirpath}") 596 | endif() 597 | # Generate the rule for the intermediate source file 598 | _cmrc_generate_intermediate_cpp(${lib_ns} ${sym} "${abs_out}" "${abs_in}") 599 | target_sources(${name} PRIVATE "${abs_out}") 600 | set_property(TARGET ${name} APPEND PROPERTY CMRC_EXTERN_DECLS 601 | "// Pointers to ${input}" 602 | "extern const char* const ${sym}_begin\;" 603 | "extern const char* const ${sym}_end\;" 604 | ) 605 | get_filename_component(leaf "${relpath}" NAME) 606 | set_property( 607 | TARGET ${name} 608 | APPEND PROPERTY CMRC_MAKE_FILES 609 | "root_index.emplace(" 610 | " \"${ARG_PREFIX}${relpath}\"," 611 | " ${parent_sym}_dir.directory.add_file(" 612 | " \"${leaf}\"," 613 | " res_chars::${sym}_begin," 614 | " res_chars::${sym}_end" 615 | " )" 616 | ")\;" 617 | ) 618 | endforeach() 619 | endfunction() 620 | 621 | function(_cmrc_generate_intermediate_cpp lib_ns symbol outfile infile) 622 | add_custom_command( 623 | # This is the file we will generate 624 | OUTPUT "${outfile}" 625 | # These are the primary files that affect the output 626 | DEPENDS "${infile}" "${_CMRC_SCRIPT}" 627 | COMMAND 628 | "${CMAKE_COMMAND}" 629 | -D_CMRC_GENERATE_MODE=TRUE 630 | -DNAMESPACE=${lib_ns} 631 | -DSYMBOL=${symbol} 632 | "-DINPUT_FILE=${infile}" 633 | "-DOUTPUT_FILE=${outfile}" 634 | -P "${_CMRC_SCRIPT}" 635 | COMMENT "Generating intermediate file for ${infile}" 636 | ) 637 | endfunction() 638 | 639 | function(_cm_encode_fpath var fpath) 640 | string(MAKE_C_IDENTIFIER "${fpath}" ident) 641 | string(MD5 hash "${fpath}") 642 | string(SUBSTRING "${hash}" 0 4 hash) 643 | set(${var} f_${hash}_${ident} PARENT_SCOPE) 644 | endfunction() 645 | -------------------------------------------------------------------------------- /src/simple_highlight.cc: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | ** 2014 May 31 4 | ** 5 | ** The author disclaims copyright to this source code. In place of 6 | ** a legal notice, here is a blessing: 7 | ** 8 | ** May you do good and not evil. 9 | ** May you find forgiveness for yourself and forgive others. 10 | ** May you share freely, never taking more than you give. 11 | ** 12 | ****************************************************************************** 13 | */ 14 | 15 | #include "simple_highlight.h" 16 | 17 | #include /* amalgamator: keep */ 18 | #include 19 | #include 20 | SQLITE_EXTENSION_INIT3 21 | 22 | /* Mark a function parameter as unused, to suppress nuisance compiler 23 | ** warnings. */ 24 | #ifndef UNUSED_PARAM 25 | #define UNUSED_PARAM(X) (void)(X) 26 | #endif 27 | 28 | #ifndef UNUSED_PARAM2 29 | #define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y) 30 | #endif 31 | 32 | /* 33 | ** Object used to iterate through all "coalesced phrase instances" in 34 | ** a single column of the current row. If the phrase instances in the 35 | ** column being considered do not overlap, this object simply iterates 36 | ** through them. Or, if they do overlap (share one or more tokens in 37 | ** common), each set of overlapping instances is treated as a single 38 | ** match. See documentation for the highlight() auxiliary function for 39 | ** details. 40 | ** 41 | ** Usage is: 42 | ** 43 | ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); 44 | ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); 45 | ** rc = fts5CInstIterNext(&iter) 46 | ** ){ 47 | ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); 48 | ** } 49 | ** 50 | */ 51 | typedef struct CInstIter CInstIter; 52 | struct CInstIter { 53 | const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ 54 | Fts5Context *pFts; /* First arg to pass to pApi functions */ 55 | int iCol; /* Column to search */ 56 | int iInst; /* Next phrase instance index */ 57 | int nInst; /* Total number of phrase instances */ 58 | 59 | /* Output variables */ 60 | int iStart; /* First token in coalesced phrase instance */ 61 | int iEnd; /* Last token in coalesced phrase instance */ 62 | }; 63 | 64 | /* 65 | ** Advance the iterator to the next coalesced phrase instance. Return 66 | ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. 67 | */ 68 | static int fts5CInstIterNext(CInstIter *pIter) { 69 | int rc = SQLITE_OK; 70 | pIter->iStart = -1; 71 | pIter->iEnd = -1; 72 | 73 | while (rc == SQLITE_OK && pIter->iInst < pIter->nInst) { 74 | int ip; 75 | int ic; 76 | int io; 77 | rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); 78 | if (rc == SQLITE_OK) { 79 | if (ic == pIter->iCol) { 80 | int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); 81 | if (pIter->iStart < 0) { 82 | pIter->iStart = io; 83 | pIter->iEnd = iEnd; 84 | } else if (io <= pIter->iEnd + 1) { // NOTE: +1 is the only diff with buildin highlight function 85 | if (iEnd > pIter->iEnd) pIter->iEnd = iEnd; 86 | } else { 87 | break; 88 | } 89 | } 90 | pIter->iInst++; 91 | } 92 | } 93 | 94 | return rc; 95 | } 96 | 97 | /* 98 | ** Initialize the iterator object indicated by the final parameter to 99 | ** iterate through coalesced phrase instances in column iCol. 100 | */ 101 | static int fts5CInstIterInit(const Fts5ExtensionApi *pApi, Fts5Context *pFts, int iCol, CInstIter *pIter) { 102 | int rc; 103 | 104 | memset(pIter, 0, sizeof(CInstIter)); 105 | pIter->pApi = pApi; 106 | pIter->pFts = pFts; 107 | pIter->iCol = iCol; 108 | rc = pApi->xInstCount(pFts, &pIter->nInst); 109 | 110 | if (rc == SQLITE_OK) { 111 | rc = fts5CInstIterNext(pIter); 112 | } 113 | 114 | return rc; 115 | } 116 | 117 | /************************************************************************* 118 | ** Start of highlight() implementation. 119 | */ 120 | typedef struct HighlightContext HighlightContext; 121 | struct HighlightContext { 122 | CInstIter iter; /* Coalesced Instance Iterator */ 123 | int iPos; /* Current token offset in zIn[] */ 124 | int iRangeStart; /* First token to include */ 125 | int iRangeEnd; /* If non-zero, last token to include */ 126 | const char *zOpen; /* Opening highlight */ 127 | const char *zClose; /* Closing highlight */ 128 | const char *zIn; /* Input text */ 129 | int nIn; /* Size of input text in bytes */ 130 | int iOff; /* Current offset within zIn[] */ 131 | char *zOut; /* Output value */ 132 | }; 133 | 134 | /* 135 | ** Append text to the HighlightContext output string - p->zOut. Argument 136 | ** z points to a buffer containing n bytes of text to append. If n is 137 | ** negative, everything up until the first '\0' is appended to the output. 138 | ** 139 | ** If *pRc is set to any value other than SQLITE_OK when this function is 140 | ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, 141 | ** *pRc is set to an error code before returning. 142 | */ 143 | static void fts5HighlightAppend(int *pRc, HighlightContext *p, const char *z, int n) { 144 | if (*pRc == SQLITE_OK && z) { 145 | if (n < 0) n = (int)strlen(z); 146 | p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); 147 | if (p->zOut == 0) *pRc = SQLITE_NOMEM; 148 | } 149 | } 150 | 151 | /* 152 | ** Tokenizer callback used by implementation of highlight() function. 153 | */ 154 | static int fts5HighlightCb(void *pContext, /* Pointer to HighlightContext object */ 155 | int tflags, /* Mask of FTS5_TOKEN_* flags */ 156 | const char *pToken, /* Buffer containing token */ 157 | int nToken, /* Size of token in bytes */ 158 | int iStartOff, /* Start offset of token */ 159 | int iEndOff /* End offset of token */ 160 | ) { 161 | HighlightContext *p = (HighlightContext *)pContext; 162 | int rc = SQLITE_OK; 163 | int iPos; 164 | 165 | if (tflags & FTS5_TOKEN_COLOCATED) return SQLITE_OK; 166 | iPos = p->iPos++; 167 | 168 | if (p->iRangeEnd > 0) { 169 | if (iPos < p->iRangeStart || iPos > p->iRangeEnd) return SQLITE_OK; 170 | if (p->iRangeStart && iPos == p->iRangeStart) p->iOff = iStartOff; 171 | } 172 | 173 | if (iPos == p->iter.iStart) { 174 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); 175 | fts5HighlightAppend(&rc, p, p->zOpen, -1); 176 | p->iOff = iStartOff; 177 | } 178 | 179 | if (iPos == p->iter.iEnd) { 180 | if (p->iRangeEnd && p->iter.iStart < p->iRangeStart) { 181 | fts5HighlightAppend(&rc, p, p->zOpen, -1); 182 | } 183 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); 184 | fts5HighlightAppend(&rc, p, p->zClose, -1); 185 | p->iOff = iEndOff; 186 | if (rc == SQLITE_OK) { 187 | rc = fts5CInstIterNext(&p->iter); 188 | } 189 | } 190 | 191 | if (p->iRangeEnd > 0 && iPos == p->iRangeEnd) { 192 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); 193 | p->iOff = iEndOff; 194 | if (iPos >= p->iter.iStart && iPos < p->iter.iEnd) { 195 | fts5HighlightAppend(&rc, p, p->zClose, -1); 196 | } 197 | } 198 | 199 | return rc; 200 | } 201 | 202 | /* 203 | ** Implementation of simple_highlight() function. 204 | */ 205 | void simple_highlight(const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 206 | Fts5Context *pFts, /* First arg to pass to pApi functions */ 207 | sqlite3_context *pCtx, /* Context for returning result/error */ 208 | int nVal, /* Number of values in apVal[] array */ 209 | sqlite3_value **apVal /* Array of trailing arguments */ 210 | ) { 211 | HighlightContext ctx; 212 | int rc; 213 | int iCol; 214 | 215 | if (nVal != 3) { 216 | const char *zErr = "wrong number of arguments to function highlight()"; 217 | sqlite3_result_error(pCtx, zErr, -1); 218 | return; 219 | } 220 | 221 | iCol = sqlite3_value_int(apVal[0]); 222 | memset(&ctx, 0, sizeof(HighlightContext)); 223 | ctx.zOpen = (const char *)sqlite3_value_text(apVal[1]); 224 | ctx.zClose = (const char *)sqlite3_value_text(apVal[2]); 225 | rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); 226 | 227 | if (ctx.zIn) { 228 | if (rc == SQLITE_OK) { 229 | rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); 230 | } 231 | 232 | if (rc == SQLITE_OK) { 233 | rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void *)&ctx, fts5HighlightCb); 234 | } 235 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); 236 | 237 | if (rc == SQLITE_OK) { 238 | sqlite3_result_text(pCtx, (const char *)ctx.zOut, -1, SQLITE_TRANSIENT); 239 | } 240 | sqlite3_free(ctx.zOut); 241 | } 242 | if (rc != SQLITE_OK) { 243 | sqlite3_result_error_code(pCtx, rc); 244 | } 245 | } 246 | /* 247 | ** End of highlight() implementation. 248 | **************************************************************************/ 249 | 250 | /************************************************************************* 251 | ** Start of highlight_pos() implementation. 252 | */ 253 | typedef struct HighlightPosContext HighlightPosContext; 254 | struct HighlightPosContext { 255 | CInstIter iter; /* Coalesced Instance Iterator */ 256 | int iPos; /* Current token offset in zIn[] */ 257 | int iRangeStart; /* First token to include */ 258 | int iRangeEnd; /* If non-zero, last token to include */ 259 | const char *zIn; /* Input text */ 260 | int nIn; /* Size of input text in bytes */ 261 | int iOff; /* Current offset within zIn[] */ 262 | char *zOut; /* Output value */ 263 | }; 264 | 265 | /* 266 | ** Append text to the HighlightPosContext output string - p->zOut. Argument 267 | ** z points to a buffer containing n bytes of text to append. If n is 268 | ** negative, everything up until the first '\0' is appended to the output. 269 | ** 270 | ** If *pRc is set to any value other than SQLITE_OK when this function is 271 | ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, 272 | ** *pRc is set to an error code before returning. 273 | */ 274 | static void fts5HighlightPosAppend(int *pRc, HighlightPosContext *p, const char *z, int n) { 275 | if (*pRc == SQLITE_OK) { 276 | if (n < 0) n = (int)strlen(z); 277 | p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); 278 | if (p->zOut == 0) *pRc = SQLITE_NOMEM; 279 | } 280 | } 281 | 282 | static void fts5HighlightPosAppendStart(int *pRc, HighlightPosContext *p, int start) { 283 | char str[64]; 284 | sprintf(str, "%d", start); 285 | fts5HighlightPosAppend(pRc, p, str, -1); 286 | fts5HighlightPosAppend(pRc, p, ",", -1); 287 | } 288 | 289 | static void fts5HighlightPosAppendEnd(int *pRc, HighlightPosContext *p, int end) { 290 | char str[64]; 291 | sprintf(str, "%d", end); 292 | fts5HighlightPosAppend(pRc, p, str, -1); 293 | fts5HighlightPosAppend(pRc, p, ";", -1); 294 | } 295 | 296 | /* 297 | ** Tokenizer callback used by implementation of highlight_pos() function. 298 | */ 299 | static int fts5HighlightPosCb(void *pContext, /* Pointer to HighlightContext object */ 300 | int tflags, /* Mask of FTS5_TOKEN_* flags */ 301 | const char *pToken, /* Buffer containing token */ 302 | int nToken, /* Size of token in bytes */ 303 | int iStartOff, /* Start offset of token */ 304 | int iEndOff /* End offset of token */ 305 | ) { 306 | HighlightPosContext *p = (HighlightPosContext *)pContext; 307 | int rc = SQLITE_OK; 308 | int iPos; 309 | 310 | if (tflags & FTS5_TOKEN_COLOCATED) return SQLITE_OK; 311 | iPos = p->iPos++; 312 | 313 | if (p->iRangeEnd > 0) { 314 | if (iPos < p->iRangeStart || iPos > p->iRangeEnd) return SQLITE_OK; 315 | if (p->iRangeStart && iPos == p->iRangeStart) p->iOff = iStartOff; 316 | } 317 | 318 | if (iPos == p->iter.iStart) { 319 | fts5HighlightPosAppendStart(&rc, p, iStartOff); 320 | p->iOff = iStartOff; 321 | } 322 | 323 | if (iPos == p->iter.iEnd) { 324 | fts5HighlightPosAppendEnd(&rc, p, iEndOff); 325 | p->iOff = iEndOff; 326 | if (rc == SQLITE_OK) { 327 | rc = fts5CInstIterNext(&p->iter); 328 | } 329 | } 330 | 331 | if (p->iRangeEnd > 0 && iPos == p->iRangeEnd) { 332 | fts5HighlightPosAppendEnd(&rc, p, iEndOff); 333 | p->iOff = iEndOff; 334 | } 335 | 336 | return rc; 337 | } 338 | 339 | /* 340 | ** Implementation of simple_highlight_pos() function. 341 | */ 342 | void simple_highlight_pos(const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 343 | Fts5Context *pFts, /* First arg to pass to pApi functions */ 344 | sqlite3_context *pCtx, /* Context for returning result/error */ 345 | int nVal, /* Number of values in apVal[] array */ 346 | sqlite3_value **apVal /* Array of trailing arguments */ 347 | ) { 348 | HighlightPosContext ctx; 349 | int rc; 350 | int iCol; 351 | 352 | if (nVal != 1) { 353 | const char *zErr = "wrong number of arguments to function highlight_pos()"; 354 | sqlite3_result_error(pCtx, zErr, -1); 355 | return; 356 | } 357 | 358 | iCol = sqlite3_value_int(apVal[0]); 359 | memset(&ctx, 0, sizeof(HighlightPosContext)); 360 | rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); 361 | 362 | if (ctx.zIn) { 363 | if (rc == SQLITE_OK) { 364 | rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); 365 | } 366 | 367 | if (rc == SQLITE_OK) { 368 | rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void *)&ctx, fts5HighlightPosCb); 369 | } 370 | 371 | if (rc == SQLITE_OK) { 372 | sqlite3_result_text(pCtx, (const char *)ctx.zOut, -1, SQLITE_TRANSIENT); 373 | } 374 | sqlite3_free(ctx.zOut); 375 | } 376 | if (rc != SQLITE_OK) { 377 | sqlite3_result_error_code(pCtx, rc); 378 | } 379 | } 380 | 381 | /* 382 | ** End of highlight_pos() implementation. 383 | **************************************************************************/ 384 | 385 | /*************************************************************************/ 386 | /* Start of simple_snippet() implementation. 387 | /* adapt from snippet 388 | /* 389 | ** Context object passed to the fts5SnippetFinderCb() function. 390 | */ 391 | typedef struct Fts5SnippetFinder Fts5SnippetFinder; 392 | struct Fts5SnippetFinder { 393 | int iPos; /* Current token position */ 394 | int nFirstAlloc; /* Allocated size of aFirst[] */ 395 | int nFirst; /* Number of entries in aFirst[] */ 396 | int *aFirst; /* Array of first token in each sentence */ 397 | const char *zDoc; /* Document being tokenized */ 398 | }; 399 | 400 | /* 401 | ** Add an entry to the Fts5SnippetFinder.aFirst[] array. Grow the array if 402 | ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an 403 | ** error occurs. 404 | */ 405 | static int fts5SnippetFinderAdd(Fts5SnippetFinder *p, int iAdd) { 406 | if (p->nFirstAlloc == p->nFirst) { 407 | int nNew = p->nFirstAlloc ? p->nFirstAlloc * 2 : 64; 408 | int *aNew; 409 | 410 | aNew = (int *)sqlite3_realloc64(p->aFirst, nNew * sizeof(int)); 411 | if (aNew == 0) return SQLITE_NOMEM; 412 | p->aFirst = aNew; 413 | p->nFirstAlloc = nNew; 414 | } 415 | p->aFirst[p->nFirst++] = iAdd; 416 | return SQLITE_OK; 417 | } 418 | 419 | /* 420 | ** This function is an xTokenize() callback used by the auxiliary simple_snippet() 421 | ** function. Its job is to identify tokens that are the first in a sentence. 422 | ** For each such token, an entry is added to the SFinder.aFirst[] array. 423 | */ 424 | static int fts5SnippetFinderCb(void *pContext, /* Pointer to HighlightContext object */ 425 | int tflags, /* Mask of FTS5_TOKEN_* flags */ 426 | const char *pToken, /* Buffer containing token */ 427 | int nToken, /* Size of token in bytes */ 428 | int iStartOff, /* Start offset of token */ 429 | int iEndOff /* End offset of token */ 430 | ) { 431 | int rc = SQLITE_OK; 432 | 433 | UNUSED_PARAM2(pToken, nToken); 434 | UNUSED_PARAM(iEndOff); 435 | 436 | if ((tflags & FTS5_TOKEN_COLOCATED) == 0) { 437 | Fts5SnippetFinder *p = (Fts5SnippetFinder *)pContext; 438 | if (p->iPos > 0) { 439 | int i; 440 | char c = 0; 441 | for (i = iStartOff - 1; i >= 0; i--) { 442 | c = p->zDoc[i]; 443 | if (c != ' ' && c != '\t' && c != '\n' && c != '\r') break; 444 | } 445 | if (i != iStartOff - 1 && (c == '.' || c == ':')) { 446 | rc = fts5SnippetFinderAdd(p, p->iPos); 447 | } 448 | } else { 449 | rc = fts5SnippetFinderAdd(p, 0); 450 | } 451 | p->iPos++; 452 | } 453 | return rc; 454 | } 455 | 456 | static int fts5SnippetScore(const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 457 | Fts5Context *pFts, /* First arg to pass to pApi functions */ 458 | int nDocsize, /* Size of column in tokens */ 459 | unsigned char *aSeen, /* Array with one element per query phrase */ 460 | int iCol, /* Column to score */ 461 | int iPos, /* Starting offset to score */ 462 | int nToken, /* Max tokens per snippet */ 463 | int *pnScore, /* OUT: Score */ 464 | int *piPos /* OUT: Adjusted offset */ 465 | ) { 466 | int rc; 467 | int i; 468 | int ip = 0; 469 | int ic = 0; 470 | int iOff = 0; 471 | int iFirst = -1; 472 | int nInst; 473 | int nScore = 0; 474 | int iLast = 0; 475 | sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken; 476 | 477 | rc = pApi->xInstCount(pFts, &nInst); 478 | for (i = 0; i < nInst && rc == SQLITE_OK; i++) { 479 | rc = pApi->xInst(pFts, i, &ip, &ic, &iOff); 480 | if (rc == SQLITE_OK && ic == iCol && iOff >= iPos && iOff < iEnd) { 481 | nScore += (aSeen[ip] ? 1 : 1000); 482 | aSeen[ip] = 1; 483 | if (iFirst < 0) iFirst = iOff; 484 | iLast = iOff + pApi->xPhraseSize(pFts, ip); 485 | } 486 | } 487 | 488 | *pnScore = nScore; 489 | if (piPos) { 490 | sqlite3_int64 iAdj = iFirst - (nToken - (iLast - iFirst)) / 2; 491 | if ((iAdj + nToken) > nDocsize) iAdj = nDocsize - nToken; 492 | if (iAdj < 0) iAdj = 0; 493 | *piPos = (int)iAdj; 494 | } 495 | 496 | return rc; 497 | } 498 | 499 | /* 500 | ** Return the value in pVal interpreted as utf-8 text. Except, if pVal 501 | ** contains a NULL value, return a pointer to a static string zero 502 | ** bytes in length instead of a NULL pointer. 503 | */ 504 | static const char *fts5ValueToText(sqlite3_value *pVal) { 505 | const char *zRet = (const char *)sqlite3_value_text(pVal); 506 | return zRet ? zRet : ""; 507 | } 508 | 509 | /* 510 | ** Implementation of simple_snippet() function. 511 | */ 512 | void simple_snippet(const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 513 | Fts5Context *pFts, /* First arg to pass to pApi functions */ 514 | sqlite3_context *pCtx, /* Context for returning result/error */ 515 | int nVal, /* Number of values in apVal[] array */ 516 | sqlite3_value **apVal /* Array of trailing arguments */ 517 | ) { 518 | HighlightContext ctx; 519 | int rc = SQLITE_OK; /* Return code */ 520 | int iCol; /* 1st argument to snippet() */ 521 | const char *zEllips; /* 4th argument to snippet() */ 522 | int nToken; /* 5th argument to snippet() */ 523 | int nInst = 0; /* Number of instance matches this row */ 524 | int i; /* Used to iterate through instances */ 525 | int nPhrase; /* Number of phrases in query */ 526 | unsigned char *aSeen; /* Array of "seen instance" flags */ 527 | int iBestCol; /* Column containing best snippet */ 528 | int iBestStart = 0; /* First token of best snippet */ 529 | int nBestScore = 0; /* Score of best snippet */ 530 | int nColSize = 0; /* Total size of iBestCol in tokens */ 531 | Fts5SnippetFinder sFinder; /* Used to find the beginnings of sentences */ 532 | int nCol; 533 | 534 | if (nVal != 5) { 535 | const char *zErr = "wrong number of arguments to function snippet()"; 536 | sqlite3_result_error(pCtx, zErr, -1); 537 | return; 538 | } 539 | 540 | nCol = pApi->xColumnCount(pFts); 541 | memset(&ctx, 0, sizeof(HighlightContext)); 542 | iCol = sqlite3_value_int(apVal[0]); 543 | ctx.zOpen = fts5ValueToText(apVal[1]); 544 | ctx.zClose = fts5ValueToText(apVal[2]); 545 | zEllips = fts5ValueToText(apVal[3]); 546 | nToken = sqlite3_value_int(apVal[4]); 547 | 548 | iBestCol = (iCol >= 0 ? iCol : 0); 549 | nPhrase = pApi->xPhraseCount(pFts); 550 | aSeen = (unsigned char *)sqlite3_malloc(nPhrase); 551 | if (aSeen == 0) { 552 | rc = SQLITE_NOMEM; 553 | } 554 | if (rc == SQLITE_OK) { 555 | rc = pApi->xInstCount(pFts, &nInst); 556 | } 557 | 558 | memset(&sFinder, 0, sizeof(Fts5SnippetFinder)); 559 | for (i = 0; i < nCol; i++) { 560 | if (iCol < 0 || iCol == i) { 561 | int nDoc; 562 | int nDocsize; 563 | int ii; 564 | sFinder.iPos = 0; 565 | sFinder.nFirst = 0; 566 | rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); 567 | if (rc != SQLITE_OK) break; 568 | rc = pApi->xTokenize(pFts, sFinder.zDoc, nDoc, (void *)&sFinder, fts5SnippetFinderCb); 569 | if (rc != SQLITE_OK) break; 570 | rc = pApi->xColumnSize(pFts, i, &nDocsize); 571 | if (rc != SQLITE_OK) break; 572 | 573 | for (ii = 0; rc == SQLITE_OK && ii < nInst; ii++) { 574 | int ip, ic, io; 575 | int iAdj; 576 | int nScore; 577 | int jj; 578 | 579 | rc = pApi->xInst(pFts, ii, &ip, &ic, &io); 580 | if (ic != i) continue; 581 | if (io > nDocsize) rc = SQLITE_CORRUPT_VTAB; 582 | if (rc != SQLITE_OK) continue; 583 | memset(aSeen, 0, nPhrase); 584 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, io, nToken, &nScore, &iAdj); 585 | if (rc == SQLITE_OK && nScore > nBestScore) { 586 | nBestScore = nScore; 587 | iBestCol = i; 588 | iBestStart = iAdj; 589 | nColSize = nDocsize; 590 | } 591 | 592 | if (rc == SQLITE_OK && sFinder.nFirst && nDocsize > nToken) { 593 | for (jj = 0; jj < (sFinder.nFirst - 1); jj++) { 594 | if (sFinder.aFirst[jj + 1] > io) break; 595 | } 596 | 597 | if (sFinder.aFirst[jj] < io) { 598 | memset(aSeen, 0, nPhrase); 599 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, sFinder.aFirst[jj], nToken, &nScore, 0); 600 | 601 | nScore += (sFinder.aFirst[jj] == 0 ? 120 : 100); 602 | if (rc == SQLITE_OK && nScore > nBestScore) { 603 | nBestScore = nScore; 604 | iBestCol = i; 605 | iBestStart = sFinder.aFirst[jj]; 606 | nColSize = nDocsize; 607 | } 608 | } 609 | } 610 | } 611 | } 612 | } 613 | 614 | if (rc == SQLITE_OK) { 615 | rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); 616 | } 617 | if (rc == SQLITE_OK && nColSize == 0) { 618 | rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); 619 | } 620 | if (ctx.zIn) { 621 | if (rc == SQLITE_OK) { 622 | rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); 623 | } 624 | 625 | ctx.iRangeStart = iBestStart; 626 | ctx.iRangeEnd = iBestStart + nToken - 1; 627 | 628 | if (iBestStart > 0) { 629 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); 630 | } 631 | 632 | /* Advance iterator ctx.iter so that it points to the first coalesced 633 | ** phrase instance at or following position iBestStart. */ 634 | while (ctx.iter.iStart >= 0 && ctx.iter.iStart < iBestStart && rc == SQLITE_OK) { 635 | rc = fts5CInstIterNext(&ctx.iter); 636 | } 637 | 638 | if (rc == SQLITE_OK) { 639 | rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void *)&ctx, fts5HighlightCb); 640 | } 641 | if (ctx.iRangeEnd >= (nColSize - 1)) { 642 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); 643 | } else { 644 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); 645 | } 646 | } 647 | if (rc == SQLITE_OK) { 648 | sqlite3_result_text(pCtx, (const char *)ctx.zOut, -1, SQLITE_TRANSIENT); 649 | } else { 650 | sqlite3_result_error_code(pCtx, rc); 651 | } 652 | sqlite3_free(ctx.zOut); 653 | sqlite3_free(aSeen); 654 | sqlite3_free(sFinder.aFirst); 655 | } 656 | 657 | /************************************************************************/ 658 | -------------------------------------------------------------------------------- /contrib/sqlite3/sqlite3ext.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** 2006 June 7 3 | ** 4 | ** The author disclaims copyright to this source code. In place of 5 | ** a legal notice, here is a blessing: 6 | ** 7 | ** May you do good and not evil. 8 | ** May you find forgiveness for yourself and forgive others. 9 | ** May you share freely, never taking more than you give. 10 | ** 11 | ************************************************************************* 12 | ** This header file defines the SQLite interface for use by 13 | ** shared libraries that want to be imported as extensions into 14 | ** an SQLite instance. Shared libraries that intend to be loaded 15 | ** as extensions by SQLite should #include this file instead of 16 | ** sqlite3.h. 17 | */ 18 | #ifndef SQLITE3EXT_H 19 | #define SQLITE3EXT_H 20 | #include "sqlite3.h" 21 | 22 | /* 23 | ** The following structure holds pointers to all of the SQLite API 24 | ** routines. 25 | ** 26 | ** WARNING: In order to maintain backwards compatibility, add new 27 | ** interfaces to the end of this structure only. If you insert new 28 | ** interfaces in the middle of this structure, then older different 29 | ** versions of SQLite will not be able to load each other's shared 30 | ** libraries! 31 | */ 32 | struct sqlite3_api_routines { 33 | void * (*aggregate_context)(sqlite3_context*,int nBytes); 34 | int (*aggregate_count)(sqlite3_context*); 35 | int (*bind_blob)(sqlite3_stmt*,int,const void*,int n,void(*)(void*)); 36 | int (*bind_double)(sqlite3_stmt*,int,double); 37 | int (*bind_int)(sqlite3_stmt*,int,int); 38 | int (*bind_int64)(sqlite3_stmt*,int,sqlite_int64); 39 | int (*bind_null)(sqlite3_stmt*,int); 40 | int (*bind_parameter_count)(sqlite3_stmt*); 41 | int (*bind_parameter_index)(sqlite3_stmt*,const char*zName); 42 | const char * (*bind_parameter_name)(sqlite3_stmt*,int); 43 | int (*bind_text)(sqlite3_stmt*,int,const char*,int n,void(*)(void*)); 44 | int (*bind_text16)(sqlite3_stmt*,int,const void*,int,void(*)(void*)); 45 | int (*bind_value)(sqlite3_stmt*,int,const sqlite3_value*); 46 | int (*busy_handler)(sqlite3*,int(*)(void*,int),void*); 47 | int (*busy_timeout)(sqlite3*,int ms); 48 | int (*changes)(sqlite3*); 49 | int (*close)(sqlite3*); 50 | int (*collation_needed)(sqlite3*,void*,void(*)(void*,sqlite3*, 51 | int eTextRep,const char*)); 52 | int (*collation_needed16)(sqlite3*,void*,void(*)(void*,sqlite3*, 53 | int eTextRep,const void*)); 54 | const void * (*column_blob)(sqlite3_stmt*,int iCol); 55 | int (*column_bytes)(sqlite3_stmt*,int iCol); 56 | int (*column_bytes16)(sqlite3_stmt*,int iCol); 57 | int (*column_count)(sqlite3_stmt*pStmt); 58 | const char * (*column_database_name)(sqlite3_stmt*,int); 59 | const void * (*column_database_name16)(sqlite3_stmt*,int); 60 | const char * (*column_decltype)(sqlite3_stmt*,int i); 61 | const void * (*column_decltype16)(sqlite3_stmt*,int); 62 | double (*column_double)(sqlite3_stmt*,int iCol); 63 | int (*column_int)(sqlite3_stmt*,int iCol); 64 | sqlite_int64 (*column_int64)(sqlite3_stmt*,int iCol); 65 | const char * (*column_name)(sqlite3_stmt*,int); 66 | const void * (*column_name16)(sqlite3_stmt*,int); 67 | const char * (*column_origin_name)(sqlite3_stmt*,int); 68 | const void * (*column_origin_name16)(sqlite3_stmt*,int); 69 | const char * (*column_table_name)(sqlite3_stmt*,int); 70 | const void * (*column_table_name16)(sqlite3_stmt*,int); 71 | const unsigned char * (*column_text)(sqlite3_stmt*,int iCol); 72 | const void * (*column_text16)(sqlite3_stmt*,int iCol); 73 | int (*column_type)(sqlite3_stmt*,int iCol); 74 | sqlite3_value* (*column_value)(sqlite3_stmt*,int iCol); 75 | void * (*commit_hook)(sqlite3*,int(*)(void*),void*); 76 | int (*complete)(const char*sql); 77 | int (*complete16)(const void*sql); 78 | int (*create_collation)(sqlite3*,const char*,int,void*, 79 | int(*)(void*,int,const void*,int,const void*)); 80 | int (*create_collation16)(sqlite3*,const void*,int,void*, 81 | int(*)(void*,int,const void*,int,const void*)); 82 | int (*create_function)(sqlite3*,const char*,int,int,void*, 83 | void (*xFunc)(sqlite3_context*,int,sqlite3_value**), 84 | void (*xStep)(sqlite3_context*,int,sqlite3_value**), 85 | void (*xFinal)(sqlite3_context*)); 86 | int (*create_function16)(sqlite3*,const void*,int,int,void*, 87 | void (*xFunc)(sqlite3_context*,int,sqlite3_value**), 88 | void (*xStep)(sqlite3_context*,int,sqlite3_value**), 89 | void (*xFinal)(sqlite3_context*)); 90 | int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*); 91 | int (*data_count)(sqlite3_stmt*pStmt); 92 | sqlite3 * (*db_handle)(sqlite3_stmt*); 93 | int (*declare_vtab)(sqlite3*,const char*); 94 | int (*enable_shared_cache)(int); 95 | int (*errcode)(sqlite3*db); 96 | const char * (*errmsg)(sqlite3*); 97 | const void * (*errmsg16)(sqlite3*); 98 | int (*exec)(sqlite3*,const char*,sqlite3_callback,void*,char**); 99 | int (*expired)(sqlite3_stmt*); 100 | int (*finalize)(sqlite3_stmt*pStmt); 101 | void (*free)(void*); 102 | void (*free_table)(char**result); 103 | int (*get_autocommit)(sqlite3*); 104 | void * (*get_auxdata)(sqlite3_context*,int); 105 | int (*get_table)(sqlite3*,const char*,char***,int*,int*,char**); 106 | int (*global_recover)(void); 107 | void (*interruptx)(sqlite3*); 108 | sqlite_int64 (*last_insert_rowid)(sqlite3*); 109 | const char * (*libversion)(void); 110 | int (*libversion_number)(void); 111 | void *(*malloc)(int); 112 | char * (*mprintf)(const char*,...); 113 | int (*open)(const char*,sqlite3**); 114 | int (*open16)(const void*,sqlite3**); 115 | int (*prepare)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); 116 | int (*prepare16)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); 117 | void * (*profile)(sqlite3*,void(*)(void*,const char*,sqlite_uint64),void*); 118 | void (*progress_handler)(sqlite3*,int,int(*)(void*),void*); 119 | void *(*realloc)(void*,int); 120 | int (*reset)(sqlite3_stmt*pStmt); 121 | void (*result_blob)(sqlite3_context*,const void*,int,void(*)(void*)); 122 | void (*result_double)(sqlite3_context*,double); 123 | void (*result_error)(sqlite3_context*,const char*,int); 124 | void (*result_error16)(sqlite3_context*,const void*,int); 125 | void (*result_int)(sqlite3_context*,int); 126 | void (*result_int64)(sqlite3_context*,sqlite_int64); 127 | void (*result_null)(sqlite3_context*); 128 | void (*result_text)(sqlite3_context*,const char*,int,void(*)(void*)); 129 | void (*result_text16)(sqlite3_context*,const void*,int,void(*)(void*)); 130 | void (*result_text16be)(sqlite3_context*,const void*,int,void(*)(void*)); 131 | void (*result_text16le)(sqlite3_context*,const void*,int,void(*)(void*)); 132 | void (*result_value)(sqlite3_context*,sqlite3_value*); 133 | void * (*rollback_hook)(sqlite3*,void(*)(void*),void*); 134 | int (*set_authorizer)(sqlite3*,int(*)(void*,int,const char*,const char*, 135 | const char*,const char*),void*); 136 | void (*set_auxdata)(sqlite3_context*,int,void*,void (*)(void*)); 137 | char * (*xsnprintf)(int,char*,const char*,...); 138 | int (*step)(sqlite3_stmt*); 139 | int (*table_column_metadata)(sqlite3*,const char*,const char*,const char*, 140 | char const**,char const**,int*,int*,int*); 141 | void (*thread_cleanup)(void); 142 | int (*total_changes)(sqlite3*); 143 | void * (*trace)(sqlite3*,void(*xTrace)(void*,const char*),void*); 144 | int (*transfer_bindings)(sqlite3_stmt*,sqlite3_stmt*); 145 | void * (*update_hook)(sqlite3*,void(*)(void*,int ,char const*,char const*, 146 | sqlite_int64),void*); 147 | void * (*user_data)(sqlite3_context*); 148 | const void * (*value_blob)(sqlite3_value*); 149 | int (*value_bytes)(sqlite3_value*); 150 | int (*value_bytes16)(sqlite3_value*); 151 | double (*value_double)(sqlite3_value*); 152 | int (*value_int)(sqlite3_value*); 153 | sqlite_int64 (*value_int64)(sqlite3_value*); 154 | int (*value_numeric_type)(sqlite3_value*); 155 | const unsigned char * (*value_text)(sqlite3_value*); 156 | const void * (*value_text16)(sqlite3_value*); 157 | const void * (*value_text16be)(sqlite3_value*); 158 | const void * (*value_text16le)(sqlite3_value*); 159 | int (*value_type)(sqlite3_value*); 160 | char *(*vmprintf)(const char*,va_list); 161 | /* Added ??? */ 162 | int (*overload_function)(sqlite3*, const char *zFuncName, int nArg); 163 | /* Added by 3.3.13 */ 164 | int (*prepare_v2)(sqlite3*,const char*,int,sqlite3_stmt**,const char**); 165 | int (*prepare16_v2)(sqlite3*,const void*,int,sqlite3_stmt**,const void**); 166 | int (*clear_bindings)(sqlite3_stmt*); 167 | /* Added by 3.4.1 */ 168 | int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*, 169 | void (*xDestroy)(void *)); 170 | /* Added by 3.5.0 */ 171 | int (*bind_zeroblob)(sqlite3_stmt*,int,int); 172 | int (*blob_bytes)(sqlite3_blob*); 173 | int (*blob_close)(sqlite3_blob*); 174 | int (*blob_open)(sqlite3*,const char*,const char*,const char*,sqlite3_int64, 175 | int,sqlite3_blob**); 176 | int (*blob_read)(sqlite3_blob*,void*,int,int); 177 | int (*blob_write)(sqlite3_blob*,const void*,int,int); 178 | int (*create_collation_v2)(sqlite3*,const char*,int,void*, 179 | int(*)(void*,int,const void*,int,const void*), 180 | void(*)(void*)); 181 | int (*file_control)(sqlite3*,const char*,int,void*); 182 | sqlite3_int64 (*memory_highwater)(int); 183 | sqlite3_int64 (*memory_used)(void); 184 | sqlite3_mutex *(*mutex_alloc)(int); 185 | void (*mutex_enter)(sqlite3_mutex*); 186 | void (*mutex_free)(sqlite3_mutex*); 187 | void (*mutex_leave)(sqlite3_mutex*); 188 | int (*mutex_try)(sqlite3_mutex*); 189 | int (*open_v2)(const char*,sqlite3**,int,const char*); 190 | int (*release_memory)(int); 191 | void (*result_error_nomem)(sqlite3_context*); 192 | void (*result_error_toobig)(sqlite3_context*); 193 | int (*sleep)(int); 194 | void (*soft_heap_limit)(int); 195 | sqlite3_vfs *(*vfs_find)(const char*); 196 | int (*vfs_register)(sqlite3_vfs*,int); 197 | int (*vfs_unregister)(sqlite3_vfs*); 198 | int (*xthreadsafe)(void); 199 | void (*result_zeroblob)(sqlite3_context*,int); 200 | void (*result_error_code)(sqlite3_context*,int); 201 | int (*test_control)(int, ...); 202 | void (*randomness)(int,void*); 203 | sqlite3 *(*context_db_handle)(sqlite3_context*); 204 | int (*extended_result_codes)(sqlite3*,int); 205 | int (*limit)(sqlite3*,int,int); 206 | sqlite3_stmt *(*next_stmt)(sqlite3*,sqlite3_stmt*); 207 | const char *(*sql)(sqlite3_stmt*); 208 | int (*status)(int,int*,int*,int); 209 | int (*backup_finish)(sqlite3_backup*); 210 | sqlite3_backup *(*backup_init)(sqlite3*,const char*,sqlite3*,const char*); 211 | int (*backup_pagecount)(sqlite3_backup*); 212 | int (*backup_remaining)(sqlite3_backup*); 213 | int (*backup_step)(sqlite3_backup*,int); 214 | const char *(*compileoption_get)(int); 215 | int (*compileoption_used)(const char*); 216 | int (*create_function_v2)(sqlite3*,const char*,int,int,void*, 217 | void (*xFunc)(sqlite3_context*,int,sqlite3_value**), 218 | void (*xStep)(sqlite3_context*,int,sqlite3_value**), 219 | void (*xFinal)(sqlite3_context*), 220 | void(*xDestroy)(void*)); 221 | int (*db_config)(sqlite3*,int,...); 222 | sqlite3_mutex *(*db_mutex)(sqlite3*); 223 | int (*db_status)(sqlite3*,int,int*,int*,int); 224 | int (*extended_errcode)(sqlite3*); 225 | void (*log)(int,const char*,...); 226 | sqlite3_int64 (*soft_heap_limit64)(sqlite3_int64); 227 | const char *(*sourceid)(void); 228 | int (*stmt_status)(sqlite3_stmt*,int,int); 229 | int (*strnicmp)(const char*,const char*,int); 230 | int (*unlock_notify)(sqlite3*,void(*)(void**,int),void*); 231 | int (*wal_autocheckpoint)(sqlite3*,int); 232 | int (*wal_checkpoint)(sqlite3*,const char*); 233 | void *(*wal_hook)(sqlite3*,int(*)(void*,sqlite3*,const char*,int),void*); 234 | int (*blob_reopen)(sqlite3_blob*,sqlite3_int64); 235 | int (*vtab_config)(sqlite3*,int op,...); 236 | int (*vtab_on_conflict)(sqlite3*); 237 | /* Version 3.7.16 and later */ 238 | int (*close_v2)(sqlite3*); 239 | const char *(*db_filename)(sqlite3*,const char*); 240 | int (*db_readonly)(sqlite3*,const char*); 241 | int (*db_release_memory)(sqlite3*); 242 | const char *(*errstr)(int); 243 | int (*stmt_busy)(sqlite3_stmt*); 244 | int (*stmt_readonly)(sqlite3_stmt*); 245 | int (*stricmp)(const char*,const char*); 246 | int (*uri_boolean)(const char*,const char*,int); 247 | sqlite3_int64 (*uri_int64)(const char*,const char*,sqlite3_int64); 248 | const char *(*uri_parameter)(const char*,const char*); 249 | char *(*xvsnprintf)(int,char*,const char*,va_list); 250 | int (*wal_checkpoint_v2)(sqlite3*,const char*,int,int*,int*); 251 | /* Version 3.8.7 and later */ 252 | int (*auto_extension)(void(*)(void)); 253 | int (*bind_blob64)(sqlite3_stmt*,int,const void*,sqlite3_uint64, 254 | void(*)(void*)); 255 | int (*bind_text64)(sqlite3_stmt*,int,const char*,sqlite3_uint64, 256 | void(*)(void*),unsigned char); 257 | int (*cancel_auto_extension)(void(*)(void)); 258 | int (*load_extension)(sqlite3*,const char*,const char*,char**); 259 | void *(*malloc64)(sqlite3_uint64); 260 | sqlite3_uint64 (*msize)(void*); 261 | void *(*realloc64)(void*,sqlite3_uint64); 262 | void (*reset_auto_extension)(void); 263 | void (*result_blob64)(sqlite3_context*,const void*,sqlite3_uint64, 264 | void(*)(void*)); 265 | void (*result_text64)(sqlite3_context*,const char*,sqlite3_uint64, 266 | void(*)(void*), unsigned char); 267 | int (*strglob)(const char*,const char*); 268 | /* Version 3.8.11 and later */ 269 | sqlite3_value *(*value_dup)(const sqlite3_value*); 270 | void (*value_free)(sqlite3_value*); 271 | int (*result_zeroblob64)(sqlite3_context*,sqlite3_uint64); 272 | int (*bind_zeroblob64)(sqlite3_stmt*, int, sqlite3_uint64); 273 | /* Version 3.9.0 and later */ 274 | unsigned int (*value_subtype)(sqlite3_value*); 275 | void (*result_subtype)(sqlite3_context*,unsigned int); 276 | /* Version 3.10.0 and later */ 277 | int (*status64)(int,sqlite3_int64*,sqlite3_int64*,int); 278 | int (*strlike)(const char*,const char*,unsigned int); 279 | int (*db_cacheflush)(sqlite3*); 280 | /* Version 3.12.0 and later */ 281 | int (*system_errno)(sqlite3*); 282 | /* Version 3.14.0 and later */ 283 | int (*trace_v2)(sqlite3*,unsigned,int(*)(unsigned,void*,void*,void*),void*); 284 | char *(*expanded_sql)(sqlite3_stmt*); 285 | /* Version 3.18.0 and later */ 286 | void (*set_last_insert_rowid)(sqlite3*,sqlite3_int64); 287 | /* Version 3.20.0 and later */ 288 | int (*prepare_v3)(sqlite3*,const char*,int,unsigned int, 289 | sqlite3_stmt**,const char**); 290 | int (*prepare16_v3)(sqlite3*,const void*,int,unsigned int, 291 | sqlite3_stmt**,const void**); 292 | int (*bind_pointer)(sqlite3_stmt*,int,void*,const char*,void(*)(void*)); 293 | void (*result_pointer)(sqlite3_context*,void*,const char*,void(*)(void*)); 294 | void *(*value_pointer)(sqlite3_value*,const char*); 295 | int (*vtab_nochange)(sqlite3_context*); 296 | int (*value_nochange)(sqlite3_value*); 297 | const char *(*vtab_collation)(sqlite3_index_info*,int); 298 | /* Version 3.24.0 and later */ 299 | int (*keyword_count)(void); 300 | int (*keyword_name)(int,const char**,int*); 301 | int (*keyword_check)(const char*,int); 302 | sqlite3_str *(*str_new)(sqlite3*); 303 | char *(*str_finish)(sqlite3_str*); 304 | void (*str_appendf)(sqlite3_str*, const char *zFormat, ...); 305 | void (*str_vappendf)(sqlite3_str*, const char *zFormat, va_list); 306 | void (*str_append)(sqlite3_str*, const char *zIn, int N); 307 | void (*str_appendall)(sqlite3_str*, const char *zIn); 308 | void (*str_appendchar)(sqlite3_str*, int N, char C); 309 | void (*str_reset)(sqlite3_str*); 310 | int (*str_errcode)(sqlite3_str*); 311 | int (*str_length)(sqlite3_str*); 312 | char *(*str_value)(sqlite3_str*); 313 | /* Version 3.25.0 and later */ 314 | int (*create_window_function)(sqlite3*,const char*,int,int,void*, 315 | void (*xStep)(sqlite3_context*,int,sqlite3_value**), 316 | void (*xFinal)(sqlite3_context*), 317 | void (*xValue)(sqlite3_context*), 318 | void (*xInv)(sqlite3_context*,int,sqlite3_value**), 319 | void(*xDestroy)(void*)); 320 | /* Version 3.26.0 and later */ 321 | const char *(*normalized_sql)(sqlite3_stmt*); 322 | /* Version 3.28.0 and later */ 323 | int (*stmt_isexplain)(sqlite3_stmt*); 324 | int (*value_frombind)(sqlite3_value*); 325 | /* Version 3.30.0 and later */ 326 | int (*drop_modules)(sqlite3*,const char**); 327 | /* Version 3.31.0 and later */ 328 | sqlite3_int64 (*hard_heap_limit64)(sqlite3_int64); 329 | const char *(*uri_key)(const char*,int); 330 | const char *(*filename_database)(const char*); 331 | const char *(*filename_journal)(const char*); 332 | const char *(*filename_wal)(const char*); 333 | /* Version 3.32.0 and later */ 334 | char *(*create_filename)(const char*,const char*,const char*, 335 | int,const char**); 336 | void (*free_filename)(char*); 337 | sqlite3_file *(*database_file_object)(const char*); 338 | }; 339 | 340 | /* 341 | ** This is the function signature used for all extension entry points. It 342 | ** is also defined in the file "loadext.c". 343 | */ 344 | typedef int (*sqlite3_loadext_entry)( 345 | sqlite3 *db, /* Handle to the database. */ 346 | char **pzErrMsg, /* Used to set error string on failure. */ 347 | const sqlite3_api_routines *pThunk /* Extension API function pointers. */ 348 | ); 349 | 350 | /* 351 | ** The following macros redefine the API routines so that they are 352 | ** redirected through the global sqlite3_api structure. 353 | ** 354 | ** This header file is also used by the loadext.c source file 355 | ** (part of the main SQLite library - not an extension) so that 356 | ** it can get access to the sqlite3_api_routines structure 357 | ** definition. But the main library does not want to redefine 358 | ** the API. So the redefinition macros are only valid if the 359 | ** SQLITE_CORE macros is undefined. 360 | */ 361 | #if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) 362 | #define sqlite3_aggregate_context sqlite3_api->aggregate_context 363 | #ifndef SQLITE_OMIT_DEPRECATED 364 | #define sqlite3_aggregate_count sqlite3_api->aggregate_count 365 | #endif 366 | #define sqlite3_bind_blob sqlite3_api->bind_blob 367 | #define sqlite3_bind_double sqlite3_api->bind_double 368 | #define sqlite3_bind_int sqlite3_api->bind_int 369 | #define sqlite3_bind_int64 sqlite3_api->bind_int64 370 | #define sqlite3_bind_null sqlite3_api->bind_null 371 | #define sqlite3_bind_parameter_count sqlite3_api->bind_parameter_count 372 | #define sqlite3_bind_parameter_index sqlite3_api->bind_parameter_index 373 | #define sqlite3_bind_parameter_name sqlite3_api->bind_parameter_name 374 | #define sqlite3_bind_text sqlite3_api->bind_text 375 | #define sqlite3_bind_text16 sqlite3_api->bind_text16 376 | #define sqlite3_bind_value sqlite3_api->bind_value 377 | #define sqlite3_busy_handler sqlite3_api->busy_handler 378 | #define sqlite3_busy_timeout sqlite3_api->busy_timeout 379 | #define sqlite3_changes sqlite3_api->changes 380 | #define sqlite3_close sqlite3_api->close 381 | #define sqlite3_collation_needed sqlite3_api->collation_needed 382 | #define sqlite3_collation_needed16 sqlite3_api->collation_needed16 383 | #define sqlite3_column_blob sqlite3_api->column_blob 384 | #define sqlite3_column_bytes sqlite3_api->column_bytes 385 | #define sqlite3_column_bytes16 sqlite3_api->column_bytes16 386 | #define sqlite3_column_count sqlite3_api->column_count 387 | #define sqlite3_column_database_name sqlite3_api->column_database_name 388 | #define sqlite3_column_database_name16 sqlite3_api->column_database_name16 389 | #define sqlite3_column_decltype sqlite3_api->column_decltype 390 | #define sqlite3_column_decltype16 sqlite3_api->column_decltype16 391 | #define sqlite3_column_double sqlite3_api->column_double 392 | #define sqlite3_column_int sqlite3_api->column_int 393 | #define sqlite3_column_int64 sqlite3_api->column_int64 394 | #define sqlite3_column_name sqlite3_api->column_name 395 | #define sqlite3_column_name16 sqlite3_api->column_name16 396 | #define sqlite3_column_origin_name sqlite3_api->column_origin_name 397 | #define sqlite3_column_origin_name16 sqlite3_api->column_origin_name16 398 | #define sqlite3_column_table_name sqlite3_api->column_table_name 399 | #define sqlite3_column_table_name16 sqlite3_api->column_table_name16 400 | #define sqlite3_column_text sqlite3_api->column_text 401 | #define sqlite3_column_text16 sqlite3_api->column_text16 402 | #define sqlite3_column_type sqlite3_api->column_type 403 | #define sqlite3_column_value sqlite3_api->column_value 404 | #define sqlite3_commit_hook sqlite3_api->commit_hook 405 | #define sqlite3_complete sqlite3_api->complete 406 | #define sqlite3_complete16 sqlite3_api->complete16 407 | #define sqlite3_create_collation sqlite3_api->create_collation 408 | #define sqlite3_create_collation16 sqlite3_api->create_collation16 409 | #define sqlite3_create_function sqlite3_api->create_function 410 | #define sqlite3_create_function16 sqlite3_api->create_function16 411 | #define sqlite3_create_module sqlite3_api->create_module 412 | #define sqlite3_create_module_v2 sqlite3_api->create_module_v2 413 | #define sqlite3_data_count sqlite3_api->data_count 414 | #define sqlite3_db_handle sqlite3_api->db_handle 415 | #define sqlite3_declare_vtab sqlite3_api->declare_vtab 416 | #define sqlite3_enable_shared_cache sqlite3_api->enable_shared_cache 417 | #define sqlite3_errcode sqlite3_api->errcode 418 | #define sqlite3_errmsg sqlite3_api->errmsg 419 | #define sqlite3_errmsg16 sqlite3_api->errmsg16 420 | #define sqlite3_exec sqlite3_api->exec 421 | #ifndef SQLITE_OMIT_DEPRECATED 422 | #define sqlite3_expired sqlite3_api->expired 423 | #endif 424 | #define sqlite3_finalize sqlite3_api->finalize 425 | #define sqlite3_free sqlite3_api->free 426 | #define sqlite3_free_table sqlite3_api->free_table 427 | #define sqlite3_get_autocommit sqlite3_api->get_autocommit 428 | #define sqlite3_get_auxdata sqlite3_api->get_auxdata 429 | #define sqlite3_get_table sqlite3_api->get_table 430 | #ifndef SQLITE_OMIT_DEPRECATED 431 | #define sqlite3_global_recover sqlite3_api->global_recover 432 | #endif 433 | #define sqlite3_interrupt sqlite3_api->interruptx 434 | #define sqlite3_last_insert_rowid sqlite3_api->last_insert_rowid 435 | #define sqlite3_libversion sqlite3_api->libversion 436 | #define sqlite3_libversion_number sqlite3_api->libversion_number 437 | #define sqlite3_malloc sqlite3_api->malloc 438 | #define sqlite3_mprintf sqlite3_api->mprintf 439 | #define sqlite3_open sqlite3_api->open 440 | #define sqlite3_open16 sqlite3_api->open16 441 | #define sqlite3_prepare sqlite3_api->prepare 442 | #define sqlite3_prepare16 sqlite3_api->prepare16 443 | #define sqlite3_prepare_v2 sqlite3_api->prepare_v2 444 | #define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 445 | #define sqlite3_profile sqlite3_api->profile 446 | #define sqlite3_progress_handler sqlite3_api->progress_handler 447 | #define sqlite3_realloc sqlite3_api->realloc 448 | #define sqlite3_reset sqlite3_api->reset 449 | #define sqlite3_result_blob sqlite3_api->result_blob 450 | #define sqlite3_result_double sqlite3_api->result_double 451 | #define sqlite3_result_error sqlite3_api->result_error 452 | #define sqlite3_result_error16 sqlite3_api->result_error16 453 | #define sqlite3_result_int sqlite3_api->result_int 454 | #define sqlite3_result_int64 sqlite3_api->result_int64 455 | #define sqlite3_result_null sqlite3_api->result_null 456 | #define sqlite3_result_text sqlite3_api->result_text 457 | #define sqlite3_result_text16 sqlite3_api->result_text16 458 | #define sqlite3_result_text16be sqlite3_api->result_text16be 459 | #define sqlite3_result_text16le sqlite3_api->result_text16le 460 | #define sqlite3_result_value sqlite3_api->result_value 461 | #define sqlite3_rollback_hook sqlite3_api->rollback_hook 462 | #define sqlite3_set_authorizer sqlite3_api->set_authorizer 463 | #define sqlite3_set_auxdata sqlite3_api->set_auxdata 464 | #define sqlite3_snprintf sqlite3_api->xsnprintf 465 | #define sqlite3_step sqlite3_api->step 466 | #define sqlite3_table_column_metadata sqlite3_api->table_column_metadata 467 | #define sqlite3_thread_cleanup sqlite3_api->thread_cleanup 468 | #define sqlite3_total_changes sqlite3_api->total_changes 469 | #define sqlite3_trace sqlite3_api->trace 470 | #ifndef SQLITE_OMIT_DEPRECATED 471 | #define sqlite3_transfer_bindings sqlite3_api->transfer_bindings 472 | #endif 473 | #define sqlite3_update_hook sqlite3_api->update_hook 474 | #define sqlite3_user_data sqlite3_api->user_data 475 | #define sqlite3_value_blob sqlite3_api->value_blob 476 | #define sqlite3_value_bytes sqlite3_api->value_bytes 477 | #define sqlite3_value_bytes16 sqlite3_api->value_bytes16 478 | #define sqlite3_value_double sqlite3_api->value_double 479 | #define sqlite3_value_int sqlite3_api->value_int 480 | #define sqlite3_value_int64 sqlite3_api->value_int64 481 | #define sqlite3_value_numeric_type sqlite3_api->value_numeric_type 482 | #define sqlite3_value_text sqlite3_api->value_text 483 | #define sqlite3_value_text16 sqlite3_api->value_text16 484 | #define sqlite3_value_text16be sqlite3_api->value_text16be 485 | #define sqlite3_value_text16le sqlite3_api->value_text16le 486 | #define sqlite3_value_type sqlite3_api->value_type 487 | #define sqlite3_vmprintf sqlite3_api->vmprintf 488 | #define sqlite3_vsnprintf sqlite3_api->xvsnprintf 489 | #define sqlite3_overload_function sqlite3_api->overload_function 490 | #define sqlite3_prepare_v2 sqlite3_api->prepare_v2 491 | #define sqlite3_prepare16_v2 sqlite3_api->prepare16_v2 492 | #define sqlite3_clear_bindings sqlite3_api->clear_bindings 493 | #define sqlite3_bind_zeroblob sqlite3_api->bind_zeroblob 494 | #define sqlite3_blob_bytes sqlite3_api->blob_bytes 495 | #define sqlite3_blob_close sqlite3_api->blob_close 496 | #define sqlite3_blob_open sqlite3_api->blob_open 497 | #define sqlite3_blob_read sqlite3_api->blob_read 498 | #define sqlite3_blob_write sqlite3_api->blob_write 499 | #define sqlite3_create_collation_v2 sqlite3_api->create_collation_v2 500 | #define sqlite3_file_control sqlite3_api->file_control 501 | #define sqlite3_memory_highwater sqlite3_api->memory_highwater 502 | #define sqlite3_memory_used sqlite3_api->memory_used 503 | #define sqlite3_mutex_alloc sqlite3_api->mutex_alloc 504 | #define sqlite3_mutex_enter sqlite3_api->mutex_enter 505 | #define sqlite3_mutex_free sqlite3_api->mutex_free 506 | #define sqlite3_mutex_leave sqlite3_api->mutex_leave 507 | #define sqlite3_mutex_try sqlite3_api->mutex_try 508 | #define sqlite3_open_v2 sqlite3_api->open_v2 509 | #define sqlite3_release_memory sqlite3_api->release_memory 510 | #define sqlite3_result_error_nomem sqlite3_api->result_error_nomem 511 | #define sqlite3_result_error_toobig sqlite3_api->result_error_toobig 512 | #define sqlite3_sleep sqlite3_api->sleep 513 | #define sqlite3_soft_heap_limit sqlite3_api->soft_heap_limit 514 | #define sqlite3_vfs_find sqlite3_api->vfs_find 515 | #define sqlite3_vfs_register sqlite3_api->vfs_register 516 | #define sqlite3_vfs_unregister sqlite3_api->vfs_unregister 517 | #define sqlite3_threadsafe sqlite3_api->xthreadsafe 518 | #define sqlite3_result_zeroblob sqlite3_api->result_zeroblob 519 | #define sqlite3_result_error_code sqlite3_api->result_error_code 520 | #define sqlite3_test_control sqlite3_api->test_control 521 | #define sqlite3_randomness sqlite3_api->randomness 522 | #define sqlite3_context_db_handle sqlite3_api->context_db_handle 523 | #define sqlite3_extended_result_codes sqlite3_api->extended_result_codes 524 | #define sqlite3_limit sqlite3_api->limit 525 | #define sqlite3_next_stmt sqlite3_api->next_stmt 526 | #define sqlite3_sql sqlite3_api->sql 527 | #define sqlite3_status sqlite3_api->status 528 | #define sqlite3_backup_finish sqlite3_api->backup_finish 529 | #define sqlite3_backup_init sqlite3_api->backup_init 530 | #define sqlite3_backup_pagecount sqlite3_api->backup_pagecount 531 | #define sqlite3_backup_remaining sqlite3_api->backup_remaining 532 | #define sqlite3_backup_step sqlite3_api->backup_step 533 | #define sqlite3_compileoption_get sqlite3_api->compileoption_get 534 | #define sqlite3_compileoption_used sqlite3_api->compileoption_used 535 | #define sqlite3_create_function_v2 sqlite3_api->create_function_v2 536 | #define sqlite3_db_config sqlite3_api->db_config 537 | #define sqlite3_db_mutex sqlite3_api->db_mutex 538 | #define sqlite3_db_status sqlite3_api->db_status 539 | #define sqlite3_extended_errcode sqlite3_api->extended_errcode 540 | #define sqlite3_log sqlite3_api->log 541 | #define sqlite3_soft_heap_limit64 sqlite3_api->soft_heap_limit64 542 | #define sqlite3_sourceid sqlite3_api->sourceid 543 | #define sqlite3_stmt_status sqlite3_api->stmt_status 544 | #define sqlite3_strnicmp sqlite3_api->strnicmp 545 | #define sqlite3_unlock_notify sqlite3_api->unlock_notify 546 | #define sqlite3_wal_autocheckpoint sqlite3_api->wal_autocheckpoint 547 | #define sqlite3_wal_checkpoint sqlite3_api->wal_checkpoint 548 | #define sqlite3_wal_hook sqlite3_api->wal_hook 549 | #define sqlite3_blob_reopen sqlite3_api->blob_reopen 550 | #define sqlite3_vtab_config sqlite3_api->vtab_config 551 | #define sqlite3_vtab_on_conflict sqlite3_api->vtab_on_conflict 552 | /* Version 3.7.16 and later */ 553 | #define sqlite3_close_v2 sqlite3_api->close_v2 554 | #define sqlite3_db_filename sqlite3_api->db_filename 555 | #define sqlite3_db_readonly sqlite3_api->db_readonly 556 | #define sqlite3_db_release_memory sqlite3_api->db_release_memory 557 | #define sqlite3_errstr sqlite3_api->errstr 558 | #define sqlite3_stmt_busy sqlite3_api->stmt_busy 559 | #define sqlite3_stmt_readonly sqlite3_api->stmt_readonly 560 | #define sqlite3_stricmp sqlite3_api->stricmp 561 | #define sqlite3_uri_boolean sqlite3_api->uri_boolean 562 | #define sqlite3_uri_int64 sqlite3_api->uri_int64 563 | #define sqlite3_uri_parameter sqlite3_api->uri_parameter 564 | #define sqlite3_uri_vsnprintf sqlite3_api->xvsnprintf 565 | #define sqlite3_wal_checkpoint_v2 sqlite3_api->wal_checkpoint_v2 566 | /* Version 3.8.7 and later */ 567 | #define sqlite3_auto_extension sqlite3_api->auto_extension 568 | #define sqlite3_bind_blob64 sqlite3_api->bind_blob64 569 | #define sqlite3_bind_text64 sqlite3_api->bind_text64 570 | #define sqlite3_cancel_auto_extension sqlite3_api->cancel_auto_extension 571 | #define sqlite3_load_extension sqlite3_api->load_extension 572 | #define sqlite3_malloc64 sqlite3_api->malloc64 573 | #define sqlite3_msize sqlite3_api->msize 574 | #define sqlite3_realloc64 sqlite3_api->realloc64 575 | #define sqlite3_reset_auto_extension sqlite3_api->reset_auto_extension 576 | #define sqlite3_result_blob64 sqlite3_api->result_blob64 577 | #define sqlite3_result_text64 sqlite3_api->result_text64 578 | #define sqlite3_strglob sqlite3_api->strglob 579 | /* Version 3.8.11 and later */ 580 | #define sqlite3_value_dup sqlite3_api->value_dup 581 | #define sqlite3_value_free sqlite3_api->value_free 582 | #define sqlite3_result_zeroblob64 sqlite3_api->result_zeroblob64 583 | #define sqlite3_bind_zeroblob64 sqlite3_api->bind_zeroblob64 584 | /* Version 3.9.0 and later */ 585 | #define sqlite3_value_subtype sqlite3_api->value_subtype 586 | #define sqlite3_result_subtype sqlite3_api->result_subtype 587 | /* Version 3.10.0 and later */ 588 | #define sqlite3_status64 sqlite3_api->status64 589 | #define sqlite3_strlike sqlite3_api->strlike 590 | #define sqlite3_db_cacheflush sqlite3_api->db_cacheflush 591 | /* Version 3.12.0 and later */ 592 | #define sqlite3_system_errno sqlite3_api->system_errno 593 | /* Version 3.14.0 and later */ 594 | #define sqlite3_trace_v2 sqlite3_api->trace_v2 595 | #define sqlite3_expanded_sql sqlite3_api->expanded_sql 596 | /* Version 3.18.0 and later */ 597 | #define sqlite3_set_last_insert_rowid sqlite3_api->set_last_insert_rowid 598 | /* Version 3.20.0 and later */ 599 | #define sqlite3_prepare_v3 sqlite3_api->prepare_v3 600 | #define sqlite3_prepare16_v3 sqlite3_api->prepare16_v3 601 | #define sqlite3_bind_pointer sqlite3_api->bind_pointer 602 | #define sqlite3_result_pointer sqlite3_api->result_pointer 603 | #define sqlite3_value_pointer sqlite3_api->value_pointer 604 | /* Version 3.22.0 and later */ 605 | #define sqlite3_vtab_nochange sqlite3_api->vtab_nochange 606 | #define sqlite3_value_nochange sqlite3_api->value_nochange 607 | #define sqlite3_vtab_collation sqlite3_api->vtab_collation 608 | /* Version 3.24.0 and later */ 609 | #define sqlite3_keyword_count sqlite3_api->keyword_count 610 | #define sqlite3_keyword_name sqlite3_api->keyword_name 611 | #define sqlite3_keyword_check sqlite3_api->keyword_check 612 | #define sqlite3_str_new sqlite3_api->str_new 613 | #define sqlite3_str_finish sqlite3_api->str_finish 614 | #define sqlite3_str_appendf sqlite3_api->str_appendf 615 | #define sqlite3_str_vappendf sqlite3_api->str_vappendf 616 | #define sqlite3_str_append sqlite3_api->str_append 617 | #define sqlite3_str_appendall sqlite3_api->str_appendall 618 | #define sqlite3_str_appendchar sqlite3_api->str_appendchar 619 | #define sqlite3_str_reset sqlite3_api->str_reset 620 | #define sqlite3_str_errcode sqlite3_api->str_errcode 621 | #define sqlite3_str_length sqlite3_api->str_length 622 | #define sqlite3_str_value sqlite3_api->str_value 623 | /* Version 3.25.0 and later */ 624 | #define sqlite3_create_window_function sqlite3_api->create_window_function 625 | /* Version 3.26.0 and later */ 626 | #define sqlite3_normalized_sql sqlite3_api->normalized_sql 627 | /* Version 3.28.0 and later */ 628 | #define sqlite3_stmt_isexplain sqlite3_api->stmt_isexplain 629 | #define sqlite3_value_frombind sqlite3_api->value_frombind 630 | /* Version 3.30.0 and later */ 631 | #define sqlite3_drop_modules sqlite3_api->drop_modules 632 | /* Version 3.31.0 and later */ 633 | #define sqlite3_hard_heap_limit64 sqlite3_api->hard_heap_limit64 634 | #define sqlite3_uri_key sqlite3_api->uri_key 635 | #define sqlite3_filename_database sqlite3_api->filename_database 636 | #define sqlite3_filename_journal sqlite3_api->filename_journal 637 | #define sqlite3_filename_wal sqlite3_api->filename_wal 638 | /* Version 3.32.0 and later */ 639 | #define sqlite3_create_filename sqlite3_api->create_filename 640 | #define sqlite3_free_filename sqlite3_api->free_filename 641 | #define sqlite3_database_file_object sqlite3_api->database_file_object 642 | #endif /* !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) */ 643 | 644 | #if !defined(SQLITE_CORE) && !defined(SQLITE_OMIT_LOAD_EXTENSION) 645 | /* This case when the file really is being compiled as a loadable 646 | ** extension */ 647 | # define SQLITE_EXTENSION_INIT1 const sqlite3_api_routines *sqlite3_api=0; 648 | # define SQLITE_EXTENSION_INIT2(v) sqlite3_api=v; 649 | # define SQLITE_EXTENSION_INIT3 \ 650 | extern const sqlite3_api_routines *sqlite3_api; 651 | #else 652 | /* This case when the file is being statically linked into the 653 | ** application */ 654 | # define SQLITE_EXTENSION_INIT1 /*no-op*/ 655 | # define SQLITE_EXTENSION_INIT2(v) (void)v; /* unused parameter */ 656 | # define SQLITE_EXTENSION_INIT3 /*no-op*/ 657 | #endif 658 | 659 | #endif /* SQLITE3EXT_H */ 660 | --------------------------------------------------------------------------------