├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── setup.py ├── spellfix.c ├── sqlite_spellfix.py └── tea.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /dist 3 | *.pyc 4 | *.so 5 | *.egg-info 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Karl Bartel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | python3 -m build 3 | 4 | upload: 5 | python3 -m twine upload dist/sqlite_spellfix-*.tar.gz 6 | 7 | test-upload: 8 | python3 -m twine upload --repository testpypi dist/sqlite-spellfix-*.tar.gz 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sqlite-spellfix 2 | 3 | This python package includes a loadable `spellfix1` extension module for sqlite. This allows other python packages to use this extension without requiring dependencies outside of the python ecosystem. For more details on the extension itself, see [the official documentation](https://www.sqlite.org/spellfix1.html). 4 | 5 | ## Installation 6 | 7 | ### Latest Release 8 | 9 | Install the `sqlite-spellfix` package from pypi. 10 | 11 | ### Current Development Version 12 | 13 | Install via pip 14 | 15 | ```sh 16 | pip install git+https://github.com/karlb/sqlite-spellfix 17 | ``` 18 | 19 | or add this to you requirements.txt: 20 | 21 | ``` 22 | git+https://github.com/karlb/sqlite-spellfix 23 | ``` 24 | 25 | 26 | ## Usage 27 | 28 | 29 | ```python 30 | import sqlite3 31 | import sqlite_spellfix 32 | 33 | conn = sqlite3.connect(":memory:") 34 | conn.enable_load_extension(True) 35 | conn.load_extension(sqlite_spellfix.extension_path()) 36 | # now use as described in https://www.sqlite.org/spellfix1.html 37 | ``` 38 | 39 | ## See Also 40 | * [sqlite-icu python package](https://github.com/karlb/sqlite-icu) 41 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension # type: ignore 2 | 3 | # read the contents of README 4 | from os import path 5 | this_directory = path.abspath(path.dirname(__file__)) 6 | with open(path.join(this_directory, "README.md"), encoding="utf-8") as f: 7 | long_description = f.read() 8 | 9 | spellfix1 = Extension("spellfix1", sources=["spellfix.c"]) 10 | 11 | setup( 12 | name="sqlite-spellfix", 13 | version="1.1", 14 | description="Loadable spellfix1 extension for sqlite", 15 | py_modules=["sqlite_spellfix"], 16 | ext_modules=[spellfix1], 17 | url="http://github.com/karlb/sqlite-spellfix", 18 | long_description=long_description, 19 | long_description_content_type="text/markdown", 20 | setup_requires=['wheel'], 21 | ) 22 | -------------------------------------------------------------------------------- /spellfix.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** 2012 April 10 3 | ** 4 | ** The author disclaims copyright to this source code. In place of 5 | ** a legal notice, here is a blessing: 6 | ** 7 | ** May you do good and not evil. 8 | ** May you find forgiveness for yourself and forgive others. 9 | ** May you share freely, never taking more than you give. 10 | ** 11 | ************************************************************************* 12 | ** 13 | ** This module implements the spellfix1 VIRTUAL TABLE that can be used 14 | ** to search a large vocabulary for close matches. See separate 15 | ** documentation (http://www.sqlite.org/spellfix1.html) for details. 16 | */ 17 | #include "sqlite3ext.h" 18 | SQLITE_EXTENSION_INIT1 19 | 20 | #ifndef SQLITE_AMALGAMATION 21 | # if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 22 | # define NDEBUG 1 23 | # endif 24 | # if defined(NDEBUG) && defined(SQLITE_DEBUG) 25 | # undef NDEBUG 26 | # endif 27 | # include 28 | # include 29 | # include 30 | # include 31 | # define ALWAYS(X) 1 32 | # define NEVER(X) 0 33 | typedef unsigned char u8; 34 | typedef unsigned short u16; 35 | #endif 36 | #include 37 | 38 | #ifndef SQLITE_OMIT_VIRTUALTABLE 39 | 40 | /* 41 | ** Character classes for ASCII characters: 42 | ** 43 | ** 0 '' Silent letters: H W 44 | ** 1 'A' Any vowel: A E I O U (Y) 45 | ** 2 'B' A bilabeal stop or fricative: B F P V W 46 | ** 3 'C' Other fricatives or back stops: C G J K Q S X Z 47 | ** 4 'D' Alveolar stops: D T 48 | ** 5 'H' Letter H at the beginning of a word 49 | ** 6 'L' Glide: L 50 | ** 7 'R' Semivowel: R 51 | ** 8 'M' Nasals: M N 52 | ** 9 'Y' Letter Y at the beginning of a word. 53 | ** 10 '9' Digits: 0 1 2 3 4 5 6 7 8 9 54 | ** 11 ' ' White space 55 | ** 12 '?' Other. 56 | */ 57 | #define CCLASS_SILENT 0 58 | #define CCLASS_VOWEL 1 59 | #define CCLASS_B 2 60 | #define CCLASS_C 3 61 | #define CCLASS_D 4 62 | #define CCLASS_H 5 63 | #define CCLASS_L 6 64 | #define CCLASS_R 7 65 | #define CCLASS_M 8 66 | #define CCLASS_Y 9 67 | #define CCLASS_DIGIT 10 68 | #define CCLASS_SPACE 11 69 | #define CCLASS_OTHER 12 70 | 71 | /* 72 | ** The following table gives the character class for non-initial ASCII 73 | ** characters. 74 | */ 75 | static const unsigned char midClass[] = { 76 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 77 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 78 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 79 | /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 80 | /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, 81 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 82 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 83 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 84 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 85 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 86 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, 87 | /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, 88 | /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, 89 | /* ' */ CCLASS_SILENT, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, 90 | /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, 91 | /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, 92 | /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, 93 | /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, 94 | /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, 95 | /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, 96 | /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, 97 | /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, 98 | /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, 99 | /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, 100 | /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, 101 | /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, 102 | /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, 103 | /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, 104 | /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, 105 | /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_VOWEL, 106 | /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, 107 | /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, 108 | /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, 109 | /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, 110 | /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, 111 | /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, 112 | /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, 113 | /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, 114 | /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, 115 | /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, 116 | /* x */ CCLASS_C, /* y */ CCLASS_VOWEL, /* z */ CCLASS_C, 117 | /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, 118 | /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, 119 | }; 120 | /* 121 | ** This tables gives the character class for ASCII characters that form the 122 | ** initial character of a word. The only difference from midClass is with 123 | ** the letters H, W, and Y. 124 | */ 125 | static const unsigned char initClass[] = { 126 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 127 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 128 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 129 | /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 130 | /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, 131 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 132 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 133 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 134 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 135 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 136 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, 137 | /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, 138 | /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, 139 | /* ' */ CCLASS_OTHER, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, 140 | /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, 141 | /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, 142 | /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, 143 | /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, 144 | /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, 145 | /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, 146 | /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, 147 | /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, 148 | /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, 149 | /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, 150 | /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, 151 | /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, 152 | /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, 153 | /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, 154 | /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, 155 | /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_Y, 156 | /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, 157 | /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, 158 | /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, 159 | /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, 160 | /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, 161 | /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, 162 | /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, 163 | /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, 164 | /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, 165 | /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, 166 | /* x */ CCLASS_C, /* y */ CCLASS_Y, /* z */ CCLASS_C, 167 | /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, 168 | /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, 169 | }; 170 | 171 | /* 172 | ** Mapping from the character class number (0-13) to a symbol for each 173 | ** character class. Note that initClass[] can be used to map the class 174 | ** symbol back into the class number. 175 | */ 176 | static const unsigned char className[] = ".ABCDHLRMY9 ?"; 177 | 178 | /* 179 | ** Generate a "phonetic hash" from a string of ASCII characters 180 | ** in zIn[0..nIn-1]. 181 | ** 182 | ** * Map characters by character class as defined above. 183 | ** * Omit double-letters 184 | ** * Omit vowels beside R and L 185 | ** * Omit T when followed by CH 186 | ** * Omit W when followed by R 187 | ** * Omit D when followed by J or G 188 | ** * Omit K in KN or G in GN at the beginning of a word 189 | ** 190 | ** Space to hold the result is obtained from sqlite3_malloc() 191 | ** 192 | ** Return NULL if memory allocation fails. 193 | */ 194 | static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){ 195 | unsigned char *zOut = sqlite3_malloc64( nIn + 1 ); 196 | int i; 197 | int nOut = 0; 198 | char cPrev = 0x77; 199 | char cPrevX = 0x77; 200 | const unsigned char *aClass = initClass; 201 | 202 | if( zOut==0 ) return 0; 203 | if( nIn>2 ){ 204 | switch( zIn[0] ){ 205 | case 'g': 206 | case 'k': { 207 | if( zIn[1]=='n' ){ zIn++; nIn--; } 208 | break; 209 | } 210 | } 211 | } 212 | for(i=0; i=0 ); 236 | if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c; 237 | } 238 | zOut[nOut] = 0; 239 | return zOut; 240 | } 241 | 242 | /* 243 | ** This is an SQL function wrapper around phoneticHash(). See 244 | ** the description of phoneticHash() for additional information. 245 | */ 246 | static void phoneticHashSqlFunc( 247 | sqlite3_context *context, 248 | int argc, 249 | sqlite3_value **argv 250 | ){ 251 | const unsigned char *zIn; 252 | unsigned char *zOut; 253 | 254 | zIn = sqlite3_value_text(argv[0]); 255 | if( zIn==0 ) return; 256 | zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0])); 257 | if( zOut==0 ){ 258 | sqlite3_result_error_nomem(context); 259 | }else{ 260 | sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); 261 | } 262 | } 263 | 264 | /* 265 | ** Return the character class number for a character given its 266 | ** context. 267 | */ 268 | static char characterClass(char cPrev, char c){ 269 | return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f]; 270 | } 271 | 272 | /* 273 | ** Return the cost of inserting or deleting character c immediately 274 | ** following character cPrev. If cPrev==0, that means c is the first 275 | ** character of the word. 276 | */ 277 | static int insertOrDeleteCost(char cPrev, char c, char cNext){ 278 | char classC = characterClass(cPrev, c); 279 | char classCprev; 280 | 281 | if( classC==CCLASS_SILENT ){ 282 | /* Insert or delete "silent" characters such as H or W */ 283 | return 1; 284 | } 285 | if( cPrev==c ){ 286 | /* Repeated characters, or miss a repeat */ 287 | return 10; 288 | } 289 | if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){ 290 | return 20; /* Insert a vowel before or after 'r' */ 291 | } 292 | classCprev = characterClass(cPrev, cPrev); 293 | if( classC==classCprev ){ 294 | if( classC==CCLASS_VOWEL ){ 295 | /* Remove or add a new vowel to a vowel cluster */ 296 | return 15; 297 | }else{ 298 | /* Remove or add a consonant not in the same class */ 299 | return 50; 300 | } 301 | } 302 | 303 | /* any other character insertion or deletion */ 304 | return 100; 305 | } 306 | 307 | /* 308 | ** Divide the insertion cost by this factor when appending to the 309 | ** end of the word. 310 | */ 311 | #define FINAL_INS_COST_DIV 4 312 | 313 | /* 314 | ** Return the cost of substituting cTo in place of cFrom assuming 315 | ** the previous character is cPrev. If cPrev==0 then cTo is the first 316 | ** character of the word. 317 | */ 318 | static int substituteCost(char cPrev, char cFrom, char cTo){ 319 | char classFrom, classTo; 320 | if( cFrom==cTo ){ 321 | /* Exact match */ 322 | return 0; 323 | } 324 | if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){ 325 | /* differ only in case */ 326 | return 0; 327 | } 328 | classFrom = characterClass(cPrev, cFrom); 329 | classTo = characterClass(cPrev, cTo); 330 | if( classFrom==classTo ){ 331 | /* Same character class */ 332 | return 40; 333 | } 334 | if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y 335 | && classTo>=CCLASS_B && classTo<=CCLASS_Y ){ 336 | /* Convert from one consonant to another, but in a different class */ 337 | return 75; 338 | } 339 | /* Any other subsitution */ 340 | return 100; 341 | } 342 | 343 | /* 344 | ** Given two strings zA and zB which are pure ASCII, return the cost 345 | ** of transforming zA into zB. If zA ends with '*' assume that it is 346 | ** a prefix of zB and give only minimal penalty for extra characters 347 | ** on the end of zB. 348 | ** 349 | ** Smaller numbers mean a closer match. 350 | ** 351 | ** Negative values indicate an error: 352 | ** -1 One of the inputs is NULL 353 | ** -2 Non-ASCII characters on input 354 | ** -3 Unable to allocate memory 355 | ** 356 | ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes 357 | ** of zB that matched the pattern in zA. If zA does not end with a '*', 358 | ** then this value is always the number of bytes in zB (i.e. strlen(zB)). 359 | ** If zA does end in a '*', then it is the number of bytes in the prefix 360 | ** of zB that was deemed to match zA. 361 | */ 362 | static int editdist1(const char *zA, const char *zB, int *pnMatch){ 363 | int nA, nB; /* Number of characters in zA[] and zB[] */ 364 | int xA, xB; /* Loop counters for zA[] and zB[] */ 365 | char cA = 0, cB; /* Current character of zA and zB */ 366 | char cAprev, cBprev; /* Previous character of zA and zB */ 367 | char cAnext, cBnext; /* Next character in zA and zB */ 368 | int d; /* North-west cost value */ 369 | int dc = 0; /* North-west character value */ 370 | int res; /* Final result */ 371 | int *m; /* The cost matrix */ 372 | char *cx; /* Corresponding character values */ 373 | int *toFree = 0; /* Malloced space */ 374 | int nMatch = 0; 375 | int mStack[60+15]; /* Stack space to use if not too much is needed */ 376 | 377 | /* Early out if either input is NULL */ 378 | if( zA==0 || zB==0 ) return -1; 379 | 380 | /* Skip any common prefix */ 381 | while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; } 382 | if( pnMatch ) *pnMatch = nMatch; 383 | if( zA[0]==0 && zB[0]==0 ) return 0; 384 | 385 | #if 0 386 | printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' '); 387 | #endif 388 | 389 | /* Verify input strings and measure their lengths */ 390 | for(nA=0; zA[nA]; nA++){ 391 | if( zA[nA]&0x80 ) return -2; 392 | } 393 | for(nB=0; zB[nB]; nB++){ 394 | if( zB[nB]&0x80 ) return -2; 395 | } 396 | 397 | /* Special processing if either string is empty */ 398 | if( nA==0 ){ 399 | cBprev = (char)dc; 400 | for(xB=res=0; (cB = zB[xB])!=0; xB++){ 401 | res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV; 402 | cBprev = cB; 403 | } 404 | return res; 405 | } 406 | if( nB==0 ){ 407 | cAprev = (char)dc; 408 | for(xA=res=0; (cA = zA[xA])!=0; xA++){ 409 | res += insertOrDeleteCost(cAprev, cA, zA[xA+1]); 410 | cAprev = cA; 411 | } 412 | return res; 413 | } 414 | 415 | /* A is a prefix of B */ 416 | if( zA[0]=='*' && zA[1]==0 ) return 0; 417 | 418 | /* Allocate and initialize the Wagner matrix */ 419 | if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){ 420 | m = mStack; 421 | }else{ 422 | m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 ); 423 | if( m==0 ) return -3; 424 | } 425 | cx = (char*)&m[nB+1]; 426 | 427 | /* Compute the Wagner edit distance */ 428 | m[0] = 0; 429 | cx[0] = (char)dc; 430 | cBprev = (char)dc; 431 | for(xB=1; xB<=nB; xB++){ 432 | cBnext = zB[xB]; 433 | cB = zB[xB-1]; 434 | cx[xB] = cB; 435 | m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext); 436 | cBprev = cB; 437 | } 438 | cAprev = (char)dc; 439 | for(xA=1; xA<=nA; xA++){ 440 | int lastA = (xA==nA); 441 | cA = zA[xA-1]; 442 | cAnext = zA[xA]; 443 | if( cA=='*' && lastA ) break; 444 | d = m[0]; 445 | dc = cx[0]; 446 | m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext); 447 | cBprev = 0; 448 | for(xB=1; xB<=nB; xB++){ 449 | int totalCost, insCost, delCost, subCost, ncx; 450 | cB = zB[xB-1]; 451 | cBnext = zB[xB]; 452 | 453 | /* Cost to insert cB */ 454 | insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext); 455 | if( lastA ) insCost /= FINAL_INS_COST_DIV; 456 | 457 | /* Cost to delete cA */ 458 | delCost = insertOrDeleteCost(cx[xB], cA, cBnext); 459 | 460 | /* Cost to substitute cA->cB */ 461 | subCost = substituteCost(cx[xB-1], cA, cB); 462 | 463 | /* Best cost */ 464 | totalCost = insCost + m[xB-1]; 465 | ncx = cB; 466 | if( (delCost + m[xB])nLang; i++){ 644 | EditDist3Cost *pCost, *pNext; 645 | pCost = p->a[i].pCost; 646 | while( pCost ){ 647 | pNext = pCost->pNext; 648 | sqlite3_free(pCost); 649 | pCost = pNext; 650 | } 651 | } 652 | sqlite3_free(p->a); 653 | memset(p, 0, sizeof(*p)); 654 | } 655 | static void editDist3ConfigDelete(void *pIn){ 656 | EditDist3Config *p = (EditDist3Config*)pIn; 657 | editDist3ConfigClear(p); 658 | sqlite3_free(p); 659 | } 660 | 661 | /* Compare the FROM values of two EditDist3Cost objects, for sorting. 662 | ** Return negative, zero, or positive if the A is less than, equal to, 663 | ** or greater than B. 664 | */ 665 | static int editDist3CostCompare(EditDist3Cost *pA, EditDist3Cost *pB){ 666 | int n = pA->nFrom; 667 | int rc; 668 | if( n>pB->nFrom ) n = pB->nFrom; 669 | rc = strncmp(pA->a, pB->a, n); 670 | if( rc==0 ) rc = pA->nFrom - pB->nFrom; 671 | return rc; 672 | } 673 | 674 | /* 675 | ** Merge together two sorted lists of EditDist3Cost objects, in order 676 | ** of increasing FROM. 677 | */ 678 | static EditDist3Cost *editDist3CostMerge( 679 | EditDist3Cost *pA, 680 | EditDist3Cost *pB 681 | ){ 682 | EditDist3Cost *pHead = 0; 683 | EditDist3Cost **ppTail = &pHead; 684 | EditDist3Cost *p; 685 | while( pA && pB ){ 686 | if( editDist3CostCompare(pA,pB)<=0 ){ 687 | p = pA; 688 | pA = pA->pNext; 689 | }else{ 690 | p = pB; 691 | pB = pB->pNext; 692 | } 693 | *ppTail = p; 694 | ppTail = &p->pNext; 695 | } 696 | if( pA ){ 697 | *ppTail = pA; 698 | }else{ 699 | *ppTail = pB; 700 | } 701 | return pHead; 702 | } 703 | 704 | /* 705 | ** Sort a list of EditDist3Cost objects into order of increasing FROM 706 | */ 707 | static EditDist3Cost *editDist3CostSort(EditDist3Cost *pList){ 708 | EditDist3Cost *ap[60], *p; 709 | int i; 710 | int mx = 0; 711 | ap[0] = 0; 712 | ap[1] = 0; 713 | while( pList ){ 714 | p = pList; 715 | pList = p->pNext; 716 | p->pNext = 0; 717 | for(i=0; ap[i]; i++){ 718 | p = editDist3CostMerge(ap[i],p); 719 | ap[i] = 0; 720 | } 721 | ap[i] = p; 722 | if( i>mx ){ 723 | mx = i; 724 | ap[i+1] = 0; 725 | } 726 | } 727 | p = 0; 728 | for(i=0; i<=mx; i++){ 729 | if( ap[i] ) p = editDist3CostMerge(p,ap[i]); 730 | } 731 | return p; 732 | } 733 | 734 | /* 735 | ** Load all edit-distance weights from a table. 736 | */ 737 | static int editDist3ConfigLoad( 738 | EditDist3Config *p, /* The edit distance configuration to load */ 739 | sqlite3 *db, /* Load from this database */ 740 | const char *zTable /* Name of the table from which to load */ 741 | ){ 742 | sqlite3_stmt *pStmt; 743 | int rc, rc2; 744 | char *zSql; 745 | int iLangPrev = -9999; 746 | EditDist3Lang *pLang = 0; 747 | 748 | zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost" 749 | " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable); 750 | if( zSql==0 ) return SQLITE_NOMEM; 751 | rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); 752 | sqlite3_free(zSql); 753 | if( rc ) return rc; 754 | editDist3ConfigClear(p); 755 | while( sqlite3_step(pStmt)==SQLITE_ROW ){ 756 | int iLang = sqlite3_column_int(pStmt, 0); 757 | const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1); 758 | int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0; 759 | const char *zTo = (const char*)sqlite3_column_text(pStmt, 2); 760 | int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0; 761 | int iCost = sqlite3_column_int(pStmt, 3); 762 | 763 | assert( zFrom!=0 || nFrom==0 ); 764 | assert( zTo!=0 || nTo==0 ); 765 | if( nFrom>100 || nTo>100 ) continue; 766 | if( iCost<0 ) continue; 767 | if( iCost>=10000 ) continue; /* Costs above 10K are considered infinite */ 768 | if( pLang==0 || iLang!=iLangPrev ){ 769 | EditDist3Lang *pNew; 770 | pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0])); 771 | if( pNew==0 ){ rc = SQLITE_NOMEM; break; } 772 | p->a = pNew; 773 | pLang = &p->a[p->nLang]; 774 | p->nLang++; 775 | pLang->iLang = iLang; 776 | pLang->iInsCost = 100; 777 | pLang->iDelCost = 100; 778 | pLang->iSubCost = 150; 779 | pLang->pCost = 0; 780 | iLangPrev = iLang; 781 | } 782 | if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){ 783 | pLang->iDelCost = iCost; 784 | }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){ 785 | pLang->iInsCost = iCost; 786 | }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){ 787 | pLang->iSubCost = iCost; 788 | }else{ 789 | EditDist3Cost *pCost; 790 | int nExtra = nFrom + nTo - 4; 791 | if( nExtra<0 ) nExtra = 0; 792 | pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra ); 793 | if( pCost==0 ){ rc = SQLITE_NOMEM; break; } 794 | pCost->nFrom = (u8)nFrom; 795 | pCost->nTo = (u8)nTo; 796 | pCost->iCost = (u16)iCost; 797 | memcpy(pCost->a, zFrom, nFrom); 798 | memcpy(pCost->a + nFrom, zTo, nTo); 799 | pCost->pNext = pLang->pCost; 800 | pLang->pCost = pCost; 801 | } 802 | } 803 | rc2 = sqlite3_finalize(pStmt); 804 | if( rc==SQLITE_OK ) rc = rc2; 805 | if( rc==SQLITE_OK ){ 806 | int iLang; 807 | for(iLang=0; iLangnLang; iLang++){ 808 | p->a[iLang].pCost = editDist3CostSort(p->a[iLang].pCost); 809 | } 810 | } 811 | return rc; 812 | } 813 | 814 | /* 815 | ** Return the length (in bytes) of a utf-8 character. Or return a maximum 816 | ** of N. 817 | */ 818 | static int utf8Len(unsigned char c, int N){ 819 | int len = 1; 820 | if( c>0x7f ){ 821 | if( (c&0xe0)==0xc0 ){ 822 | len = 2; 823 | }else if( (c&0xf0)==0xe0 ){ 824 | len = 3; 825 | }else{ 826 | len = 4; 827 | } 828 | } 829 | if( len>N ) len = N; 830 | return len; 831 | } 832 | 833 | /* 834 | ** Return TRUE (non-zero) if the To side of the given cost matches 835 | ** the given string. 836 | */ 837 | static int matchTo(EditDist3Cost *p, const char *z, int n){ 838 | assert( n>0 ); 839 | if( p->a[p->nFrom]!=z[0] ) return 0; 840 | if( p->nTo>n ) return 0; 841 | if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0; 842 | return 1; 843 | } 844 | 845 | /* 846 | ** Return TRUE (non-zero) if the From side of the given cost matches 847 | ** the given string. 848 | */ 849 | static int matchFrom(EditDist3Cost *p, const char *z, int n){ 850 | assert( p->nFrom<=n ); 851 | if( p->nFrom ){ 852 | if( p->a[0]!=z[0] ) return 0; 853 | if( strncmp(p->a, z, p->nFrom)!=0 ) return 0; 854 | } 855 | return 1; 856 | } 857 | 858 | /* 859 | ** Return TRUE (non-zero) of the next FROM character and the next TO 860 | ** character are the same. 861 | */ 862 | static int matchFromTo( 863 | EditDist3FromString *pStr, /* Left hand string */ 864 | int n1, /* Index of comparison character on the left */ 865 | const char *z2, /* Right-handl comparison character */ 866 | int n2 /* Bytes remaining in z2[] */ 867 | ){ 868 | int b1 = pStr->a[n1].nByte; 869 | if( b1>n2 ) return 0; 870 | assert( b1>0 ); 871 | if( pStr->z[n1]!=z2[0] ) return 0; 872 | if( strncmp(pStr->z+n1, z2, b1)!=0 ) return 0; 873 | return 1; 874 | } 875 | 876 | /* 877 | ** Delete an EditDist3FromString objecct 878 | */ 879 | static void editDist3FromStringDelete(EditDist3FromString *p){ 880 | int i; 881 | if( p ){ 882 | for(i=0; in; i++){ 883 | sqlite3_free(p->a[i].apDel); 884 | sqlite3_free(p->a[i].apSubst); 885 | } 886 | sqlite3_free(p); 887 | } 888 | } 889 | 890 | /* 891 | ** Create a EditDist3FromString object. 892 | */ 893 | static EditDist3FromString *editDist3FromStringNew( 894 | const EditDist3Lang *pLang, 895 | const char *z, 896 | int n 897 | ){ 898 | EditDist3FromString *pStr; 899 | EditDist3Cost *p; 900 | int i; 901 | 902 | if( z==0 ) return 0; 903 | if( n<0 ) n = (int)strlen(z); 904 | pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 ); 905 | if( pStr==0 ) return 0; 906 | pStr->a = (EditDist3From*)&pStr[1]; 907 | memset(pStr->a, 0, sizeof(pStr->a[0])*n); 908 | pStr->n = n; 909 | pStr->z = (char*)&pStr->a[n]; 910 | memcpy(pStr->z, z, n+1); 911 | if( n && z[n-1]=='*' ){ 912 | pStr->isPrefix = 1; 913 | n--; 914 | pStr->n--; 915 | pStr->z[n] = 0; 916 | }else{ 917 | pStr->isPrefix = 0; 918 | } 919 | 920 | for(i=0; ia[i]; 922 | memset(pFrom, 0, sizeof(*pFrom)); 923 | pFrom->nByte = utf8Len((unsigned char)z[i], n-i); 924 | for(p=pLang->pCost; p; p=p->pNext){ 925 | EditDist3Cost **apNew; 926 | if( i+p->nFrom>n ) continue; 927 | if( matchFrom(p, z+i, n-i)==0 ) continue; 928 | if( p->nTo==0 ){ 929 | apNew = sqlite3_realloc64(pFrom->apDel, 930 | sizeof(*apNew)*(pFrom->nDel+1)); 931 | if( apNew==0 ) break; 932 | pFrom->apDel = apNew; 933 | apNew[pFrom->nDel++] = p; 934 | }else{ 935 | apNew = sqlite3_realloc64(pFrom->apSubst, 936 | sizeof(*apNew)*(pFrom->nSubst+1)); 937 | if( apNew==0 ) break; 938 | pFrom->apSubst = apNew; 939 | apNew[pFrom->nSubst++] = p; 940 | } 941 | } 942 | if( p ){ 943 | editDist3FromStringDelete(pStr); 944 | pStr = 0; 945 | break; 946 | } 947 | } 948 | return pStr; 949 | } 950 | 951 | /* 952 | ** Update entry m[i] such that it is the minimum of its current value 953 | ** and m[j]+iCost. 954 | */ 955 | static void updateCost( 956 | unsigned int *m, 957 | int i, 958 | int j, 959 | int iCost 960 | ){ 961 | unsigned int b; 962 | assert( iCost>=0 ); 963 | assert( iCost<10000 ); 964 | b = m[j] + iCost; 965 | if( bpCost; p; p=p->pNext){ 1028 | EditDist3Cost **apNew; 1029 | if( p->nFrom>0 ) break; 1030 | if( i2+p->nTo>n2 ) continue; 1031 | if( p->a[0]>z2[i2] ) break; 1032 | if( matchTo(p, z2+i2, n2-i2)==0 ) continue; 1033 | a2[i2].nIns++; 1034 | apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns); 1035 | if( apNew==0 ){ 1036 | res = -1; /* Out of memory */ 1037 | goto editDist3Abort; 1038 | } 1039 | a2[i2].apIns = apNew; 1040 | a2[i2].apIns[a2[i2].nIns-1] = p; 1041 | } 1042 | } 1043 | 1044 | /* Prepare to compute the minimum edit distance */ 1045 | szRow = f.n+1; 1046 | memset(m, 0x01, (n2+1)*szRow*sizeof(m[0])); 1047 | m[0] = 0; 1048 | 1049 | /* First fill in the top-row of the matrix with FROM deletion costs */ 1050 | for(i1=0; i1iDelCost); 1053 | for(k=0; knFrom, i1, p->iCost); 1056 | } 1057 | } 1058 | 1059 | /* Fill in all subsequent rows, top-to-bottom, left-to-right */ 1060 | for(i2=0; i2iInsCost); 1067 | for(k=0; knTo), rxp, p->iCost); 1070 | } 1071 | for(i1=0; i1iDelCost); 1082 | for(k=0; knFrom, cxp, p->iCost); 1085 | } 1086 | updateCost(m, cx, cxu, pLang->iInsCost); 1087 | if( matchFromTo(&f, i1, z2+i2, n2-i2) ){ 1088 | updateCost(m, cx, cxd, 0); 1089 | } 1090 | updateCost(m, cx, cxd, pLang->iSubCost); 1091 | for(k=0; knFrom+szRow*p->nTo, cxd, p->iCost); 1095 | } 1096 | } 1097 | } 1098 | } 1099 | 1100 | #if 0 /* Enable for debugging */ 1101 | printf(" ^"); 1102 | for(i1=0; i19999 ) printf(" ****"); 1107 | else printf(" %4d", v); 1108 | } 1109 | printf("\n"); 1110 | for(i2=0; i29999 ) printf(" ****"); 1115 | else printf(" %4d", v); 1116 | } 1117 | printf("\n"); 1118 | } 1119 | #endif 1120 | 1121 | /* Free memory allocations and return the result */ 1122 | res = (int)m[szRow*(n2+1)-1]; 1123 | n = n2; 1124 | if( f.isPrefix ){ 1125 | for(i2=1; i2<=n2; i2++){ 1126 | int b = m[szRow*i2-1]; 1127 | if( b<=res ){ 1128 | res = b; 1129 | n = i2 - 1; 1130 | } 1131 | } 1132 | } 1133 | if( pnMatch ){ 1134 | int nExtra = 0; 1135 | for(k=0; knLang; i++){ 1156 | if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i]; 1157 | } 1158 | return &editDist3Lang; 1159 | } 1160 | 1161 | /* 1162 | ** Function: editdist3(A,B,iLang) 1163 | ** editdist3(tablename) 1164 | ** 1165 | ** Return the cost of transforming string A into string B using edit 1166 | ** weights for iLang. 1167 | ** 1168 | ** The second form loads edit weights into memory from a table. 1169 | */ 1170 | static void editDist3SqlFunc( 1171 | sqlite3_context *context, 1172 | int argc, 1173 | sqlite3_value **argv 1174 | ){ 1175 | EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context); 1176 | sqlite3 *db = sqlite3_context_db_handle(context); 1177 | int rc; 1178 | if( argc==1 ){ 1179 | const char *zTable = (const char*)sqlite3_value_text(argv[0]); 1180 | rc = editDist3ConfigLoad(pConfig, db, zTable); 1181 | if( rc ) sqlite3_result_error_code(context, rc); 1182 | }else{ 1183 | const char *zA = (const char*)sqlite3_value_text(argv[0]); 1184 | const char *zB = (const char*)sqlite3_value_text(argv[1]); 1185 | int nA = sqlite3_value_bytes(argv[0]); 1186 | int nB = sqlite3_value_bytes(argv[1]); 1187 | int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0; 1188 | const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang); 1189 | EditDist3FromString *pFrom; 1190 | int dist; 1191 | 1192 | pFrom = editDist3FromStringNew(pLang, zA, nA); 1193 | if( pFrom==0 ){ 1194 | sqlite3_result_error_nomem(context); 1195 | return; 1196 | } 1197 | dist = editDist3Core(pFrom, zB, nB, pLang, 0); 1198 | editDist3FromStringDelete(pFrom); 1199 | if( dist==(-1) ){ 1200 | sqlite3_result_error_nomem(context); 1201 | }else{ 1202 | sqlite3_result_int(context, dist); 1203 | } 1204 | } 1205 | } 1206 | 1207 | /* 1208 | ** Register the editDist3 function with SQLite 1209 | */ 1210 | static int editDist3Install(sqlite3 *db){ 1211 | int rc; 1212 | EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) ); 1213 | if( pConfig==0 ) return SQLITE_NOMEM; 1214 | memset(pConfig, 0, sizeof(*pConfig)); 1215 | rc = sqlite3_create_function_v2(db, "editdist3", 1216 | 2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig, 1217 | editDist3SqlFunc, 0, 0, 0); 1218 | if( rc==SQLITE_OK ){ 1219 | rc = sqlite3_create_function_v2(db, "editdist3", 1220 | 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig, 1221 | editDist3SqlFunc, 0, 0, 0); 1222 | } 1223 | if( rc==SQLITE_OK ){ 1224 | rc = sqlite3_create_function_v2(db, "editdist3", 1225 | 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig, 1226 | editDist3SqlFunc, 0, 0, editDist3ConfigDelete); 1227 | }else{ 1228 | sqlite3_free(pConfig); 1229 | } 1230 | return rc; 1231 | } 1232 | /* End configurable cost unicode edit distance routines 1233 | ****************************************************************************** 1234 | ****************************************************************************** 1235 | ** Begin transliterate unicode-to-ascii implementation 1236 | */ 1237 | 1238 | #if !SQLITE_AMALGAMATION 1239 | /* 1240 | ** This lookup table is used to help decode the first byte of 1241 | ** a multi-byte UTF8 character. 1242 | */ 1243 | static const unsigned char sqlite3Utf8Trans1[] = { 1244 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1245 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 1246 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 1247 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 1248 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1249 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 1250 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1251 | 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, 1252 | }; 1253 | #endif 1254 | 1255 | /* 1256 | ** Return the value of the first UTF-8 character in the string. 1257 | */ 1258 | static int utf8Read(const unsigned char *z, int n, int *pSize){ 1259 | int c, i; 1260 | 1261 | /* All callers to this routine (in the current implementation) 1262 | ** always have n>0. */ 1263 | if( NEVER(n==0) ){ 1264 | c = i = 0; 1265 | }else{ 1266 | c = z[0]; 1267 | i = 1; 1268 | if( c>=0xc0 ){ 1269 | c = sqlite3Utf8Trans1[c-0xc0]; 1270 | while( i0 ){ 1723 | c = utf8Read(zIn, nIn, &sz); 1724 | zIn += sz; 1725 | nIn -= sz; 1726 | if( c<=127 ){ 1727 | zOut[nOut++] = (unsigned char)c; 1728 | }else{ 1729 | int xTop, xBtm, x; 1730 | const Transliteration *tbl = spellfixFindTranslit(c, &xTop); 1731 | xBtm = 0; 1732 | while( xTop>=xBtm ){ 1733 | x = (xTop + xBtm)/2; 1734 | if( tbl[x].cFrom==c ){ 1735 | zOut[nOut++] = tbl[x].cTo0; 1736 | if( tbl[x].cTo1 ){ 1737 | zOut[nOut++] = tbl[x].cTo1; 1738 | if( tbl[x].cTo2 ){ 1739 | zOut[nOut++] = tbl[x].cTo2; 1740 | if( tbl[x].cTo3 ){ 1741 | zOut[nOut++] = tbl[x].cTo3; 1742 | #ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS 1743 | if( tbl[x].cTo4 ){ 1744 | zOut[nOut++] = tbl[x].cTo4; 1745 | } 1746 | #endif /* SQLITE_SPELLFIX_5BYTE_MAPPINGS */ 1747 | } 1748 | } 1749 | } 1750 | c = 0; 1751 | break; 1752 | }else if( tbl[x].cFrom>c ){ 1753 | xTop = x-1; 1754 | }else{ 1755 | xBtm = x+1; 1756 | } 1757 | } 1758 | if( c ) zOut[nOut++] = '?'; 1759 | } 1760 | } 1761 | zOut[nOut] = 0; 1762 | return zOut; 1763 | } 1764 | 1765 | /* 1766 | ** Return the number of characters in the shortest prefix of the input 1767 | ** string that transliterates to an ASCII string nTrans bytes or longer. 1768 | ** Or, if the transliteration of the input string is less than nTrans 1769 | ** bytes in size, return the number of characters in the input string. 1770 | */ 1771 | static int translen_to_charlen(const char *zIn, int nIn, int nTrans){ 1772 | int i, c, sz, nOut; 1773 | int nChar; 1774 | 1775 | i = nOut = 0; 1776 | for(nChar=0; i=128 ){ 1782 | int xTop, xBtm, x; 1783 | const Transliteration *tbl = spellfixFindTranslit(c, &xTop); 1784 | xBtm = 0; 1785 | while( xTop>=xBtm ){ 1786 | x = (xTop + xBtm)/2; 1787 | if( tbl[x].cFrom==c ){ 1788 | if( tbl[x].cTo1 ){ 1789 | nOut++; 1790 | if( tbl[x].cTo2 ){ 1791 | nOut++; 1792 | if( tbl[x].cTo3 ){ 1793 | nOut++; 1794 | } 1795 | } 1796 | } 1797 | break; 1798 | }else if( tbl[x].cFrom>c ){ 1799 | xTop = x-1; 1800 | }else{ 1801 | xBtm = x+1; 1802 | } 1803 | } 1804 | } 1805 | } 1806 | 1807 | return nChar; 1808 | } 1809 | 1810 | 1811 | /* 1812 | ** spellfix1_translit(X) 1813 | ** 1814 | ** Convert a string that contains non-ASCII Roman characters into 1815 | ** pure ASCII. 1816 | */ 1817 | static void transliterateSqlFunc( 1818 | sqlite3_context *context, 1819 | int argc, 1820 | sqlite3_value **argv 1821 | ){ 1822 | const unsigned char *zIn = sqlite3_value_text(argv[0]); 1823 | int nIn = sqlite3_value_bytes(argv[0]); 1824 | unsigned char *zOut = transliterate(zIn, nIn); 1825 | if( zOut==0 ){ 1826 | sqlite3_result_error_nomem(context); 1827 | }else{ 1828 | sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); 1829 | } 1830 | } 1831 | 1832 | /* 1833 | ** spellfix1_scriptcode(X) 1834 | ** 1835 | ** Try to determine the dominant script used by the word X and return 1836 | ** its ISO 15924 numeric code. 1837 | ** 1838 | ** The current implementation only understands the following scripts: 1839 | ** 1840 | ** 215 (Latin) 1841 | ** 220 (Cyrillic) 1842 | ** 200 (Greek) 1843 | ** 1844 | ** This routine will return 998 if the input X contains characters from 1845 | ** two or more of the above scripts or 999 if X contains no characters 1846 | ** from any of the above scripts. 1847 | */ 1848 | static void scriptCodeSqlFunc( 1849 | sqlite3_context *context, 1850 | int argc, 1851 | sqlite3_value **argv 1852 | ){ 1853 | const unsigned char *zIn = sqlite3_value_text(argv[0]); 1854 | int nIn = sqlite3_value_bytes(argv[0]); 1855 | int c, sz; 1856 | int scriptMask = 0; 1857 | int res; 1858 | int seenDigit = 0; 1859 | # define SCRIPT_LATIN 0x0001 1860 | # define SCRIPT_CYRILLIC 0x0002 1861 | # define SCRIPT_GREEK 0x0004 1862 | # define SCRIPT_HEBREW 0x0008 1863 | # define SCRIPT_ARABIC 0x0010 1864 | 1865 | while( nIn>0 ){ 1866 | c = utf8Read(zIn, nIn, &sz); 1867 | zIn += sz; 1868 | nIn -= sz; 1869 | if( c<0x02af ){ 1870 | if( c>=0x80 || midClass[c&0x7f]='0' && c<='9' ){ 1873 | seenDigit = 1; 1874 | } 1875 | }else if( c>=0x0400 && c<=0x04ff ){ 1876 | scriptMask |= SCRIPT_CYRILLIC; 1877 | }else if( c>=0x0386 && c<=0x03ce ){ 1878 | scriptMask |= SCRIPT_GREEK; 1879 | }else if( c>=0x0590 && c<=0x05ff ){ 1880 | scriptMask |= SCRIPT_HEBREW; 1881 | }else if( c>=0x0600 && c<=0x06ff ){ 1882 | scriptMask |= SCRIPT_ARABIC; 1883 | } 1884 | } 1885 | if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN; 1886 | switch( scriptMask ){ 1887 | case 0: res = 999; break; 1888 | case SCRIPT_LATIN: res = 215; break; 1889 | case SCRIPT_CYRILLIC: res = 220; break; 1890 | case SCRIPT_GREEK: res = 200; break; 1891 | case SCRIPT_HEBREW: res = 125; break; 1892 | case SCRIPT_ARABIC: res = 160; break; 1893 | default: res = 998; break; 1894 | } 1895 | sqlite3_result_int(context, res); 1896 | } 1897 | 1898 | /* End transliterate 1899 | ****************************************************************************** 1900 | ****************************************************************************** 1901 | ** Begin spellfix1 virtual table. 1902 | */ 1903 | 1904 | /* Maximum length of a phonehash used for querying the shadow table */ 1905 | #define SPELLFIX_MX_HASH 32 1906 | 1907 | /* Maximum number of hash strings to examine per query */ 1908 | #define SPELLFIX_MX_RUN 1 1909 | 1910 | typedef struct spellfix1_vtab spellfix1_vtab; 1911 | typedef struct spellfix1_cursor spellfix1_cursor; 1912 | 1913 | /* Fuzzy-search virtual table object */ 1914 | struct spellfix1_vtab { 1915 | sqlite3_vtab base; /* Base class - must be first */ 1916 | sqlite3 *db; /* Database connection */ 1917 | char *zDbName; /* Name of database holding this table */ 1918 | char *zTableName; /* Name of the virtual table */ 1919 | char *zCostTable; /* Table holding edit-distance cost numbers */ 1920 | EditDist3Config *pConfig3; /* Parsed edit distance costs */ 1921 | }; 1922 | 1923 | /* Fuzzy-search cursor object */ 1924 | struct spellfix1_cursor { 1925 | sqlite3_vtab_cursor base; /* Base class - must be first */ 1926 | spellfix1_vtab *pVTab; /* The table to which this cursor belongs */ 1927 | char *zPattern; /* rhs of MATCH clause */ 1928 | int idxNum; /* idxNum value passed to xFilter() */ 1929 | int nRow; /* Number of rows of content */ 1930 | int nAlloc; /* Number of allocated rows */ 1931 | int iRow; /* Current row of content */ 1932 | int iLang; /* Value of the langid= constraint */ 1933 | int iTop; /* Value of the top= constraint */ 1934 | int iScope; /* Value of the scope= constraint */ 1935 | int nSearch; /* Number of vocabulary items checked */ 1936 | sqlite3_stmt *pFullScan; /* Shadow query for a full table scan */ 1937 | struct spellfix1_row { /* For each row of content */ 1938 | sqlite3_int64 iRowid; /* Rowid for this row */ 1939 | char *zWord; /* Text for this row */ 1940 | int iRank; /* Rank for this row */ 1941 | int iDistance; /* Distance from pattern for this row */ 1942 | int iScore; /* Score for sorting */ 1943 | int iMatchlen; /* Value of matchlen column (or -1) */ 1944 | char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */ 1945 | } *a; 1946 | }; 1947 | 1948 | /* 1949 | ** Construct one or more SQL statements from the format string given 1950 | ** and then evaluate those statements. The success code is written 1951 | ** into *pRc. 1952 | ** 1953 | ** If *pRc is initially non-zero then this routine is a no-op. 1954 | */ 1955 | static void spellfix1DbExec( 1956 | int *pRc, /* Success code */ 1957 | sqlite3 *db, /* Database in which to run SQL */ 1958 | const char *zFormat, /* Format string for SQL */ 1959 | ... /* Arguments to the format string */ 1960 | ){ 1961 | va_list ap; 1962 | char *zSql; 1963 | if( *pRc ) return; 1964 | va_start(ap, zFormat); 1965 | zSql = sqlite3_vmprintf(zFormat, ap); 1966 | va_end(ap); 1967 | if( zSql==0 ){ 1968 | *pRc = SQLITE_NOMEM; 1969 | }else{ 1970 | *pRc = sqlite3_exec(db, zSql, 0, 0, 0); 1971 | sqlite3_free(zSql); 1972 | } 1973 | } 1974 | 1975 | /* 1976 | ** xDisconnect/xDestroy method for the fuzzy-search module. 1977 | */ 1978 | static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){ 1979 | spellfix1_vtab *p = (spellfix1_vtab*)pVTab; 1980 | int rc = SQLITE_OK; 1981 | if( isDestroy ){ 1982 | sqlite3 *db = p->db; 1983 | spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"", 1984 | p->zDbName, p->zTableName); 1985 | } 1986 | if( rc==SQLITE_OK ){ 1987 | sqlite3_free(p->zTableName); 1988 | editDist3ConfigDelete(p->pConfig3); 1989 | sqlite3_free(p->zCostTable); 1990 | sqlite3_free(p); 1991 | } 1992 | return rc; 1993 | } 1994 | static int spellfix1Disconnect(sqlite3_vtab *pVTab){ 1995 | return spellfix1Uninit(0, pVTab); 1996 | } 1997 | static int spellfix1Destroy(sqlite3_vtab *pVTab){ 1998 | return spellfix1Uninit(1, pVTab); 1999 | } 2000 | 2001 | /* 2002 | ** Make a copy of a string. Remove leading and trailing whitespace 2003 | ** and dequote it. 2004 | */ 2005 | static char *spellfix1Dequote(const char *zIn){ 2006 | char *zOut; 2007 | int i, j; 2008 | char c; 2009 | while( isspace((unsigned char)zIn[0]) ) zIn++; 2010 | zOut = sqlite3_mprintf("%s", zIn); 2011 | if( zOut==0 ) return 0; 2012 | i = (int)strlen(zOut); 2013 | #if 0 /* The parser will never leave spaces at the end */ 2014 | while( i>0 && isspace(zOut[i-1]) ){ i--; } 2015 | #endif 2016 | zOut[i] = 0; 2017 | c = zOut[0]; 2018 | if( c=='\'' || c=='"' ){ 2019 | for(i=1, j=0; ALWAYS(zOut[i]); i++){ 2020 | zOut[j++] = zOut[i]; 2021 | if( zOut[i]==c ){ 2022 | if( zOut[i+1]==c ){ 2023 | i++; 2024 | }else{ 2025 | zOut[j-1] = 0; 2026 | break; 2027 | } 2028 | } 2029 | } 2030 | } 2031 | return zOut; 2032 | } 2033 | 2034 | 2035 | /* 2036 | ** xConnect/xCreate method for the spellfix1 module. Arguments are: 2037 | ** 2038 | ** argv[0] -> module name ("spellfix1") 2039 | ** argv[1] -> database name 2040 | ** argv[2] -> table name 2041 | ** argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter) 2042 | */ 2043 | static int spellfix1Init( 2044 | int isCreate, 2045 | sqlite3 *db, 2046 | void *pAux, 2047 | int argc, const char *const*argv, 2048 | sqlite3_vtab **ppVTab, 2049 | char **pzErr 2050 | ){ 2051 | spellfix1_vtab *pNew = 0; 2052 | /* const char *zModule = argv[0]; // not used */ 2053 | const char *zDbName = argv[1]; 2054 | const char *zTableName = argv[2]; 2055 | int nDbName; 2056 | int rc = SQLITE_OK; 2057 | int i; 2058 | 2059 | nDbName = (int)strlen(zDbName); 2060 | pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1); 2061 | if( pNew==0 ){ 2062 | rc = SQLITE_NOMEM; 2063 | }else{ 2064 | memset(pNew, 0, sizeof(*pNew)); 2065 | pNew->zDbName = (char*)&pNew[1]; 2066 | memcpy(pNew->zDbName, zDbName, nDbName+1); 2067 | pNew->zTableName = sqlite3_mprintf("%s", zTableName); 2068 | pNew->db = db; 2069 | if( pNew->zTableName==0 ){ 2070 | rc = SQLITE_NOMEM; 2071 | }else{ 2072 | sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS); 2073 | rc = sqlite3_declare_vtab(db, 2074 | "CREATE TABLE x(word,rank,distance,langid, " 2075 | "score, matchlen, phonehash HIDDEN, " 2076 | "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, " 2077 | "soundslike HIDDEN, command HIDDEN)" 2078 | ); 2079 | #define SPELLFIX_COL_WORD 0 2080 | #define SPELLFIX_COL_RANK 1 2081 | #define SPELLFIX_COL_DISTANCE 2 2082 | #define SPELLFIX_COL_LANGID 3 2083 | #define SPELLFIX_COL_SCORE 4 2084 | #define SPELLFIX_COL_MATCHLEN 5 2085 | #define SPELLFIX_COL_PHONEHASH 6 2086 | #define SPELLFIX_COL_TOP 7 2087 | #define SPELLFIX_COL_SCOPE 8 2088 | #define SPELLFIX_COL_SRCHCNT 9 2089 | #define SPELLFIX_COL_SOUNDSLIKE 10 2090 | #define SPELLFIX_COL_COMMAND 11 2091 | } 2092 | if( rc==SQLITE_OK && isCreate ){ 2093 | spellfix1DbExec(&rc, db, 2094 | "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n" 2095 | " id INTEGER PRIMARY KEY,\n" 2096 | " rank INT,\n" 2097 | " langid INT,\n" 2098 | " word TEXT,\n" 2099 | " k1 TEXT,\n" 2100 | " k2 TEXT\n" 2101 | ");\n", 2102 | zDbName, zTableName 2103 | ); 2104 | spellfix1DbExec(&rc, db, 2105 | "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" " 2106 | "ON \"%w_vocab\"(langid,k2);", 2107 | zDbName, zTableName, zTableName 2108 | ); 2109 | } 2110 | for(i=3; rc==SQLITE_OK && ibase); 2124 | }else{ 2125 | *ppVTab = (sqlite3_vtab *)pNew; 2126 | } 2127 | return rc; 2128 | } 2129 | 2130 | /* 2131 | ** The xConnect and xCreate methods 2132 | */ 2133 | static int spellfix1Connect( 2134 | sqlite3 *db, 2135 | void *pAux, 2136 | int argc, const char *const*argv, 2137 | sqlite3_vtab **ppVTab, 2138 | char **pzErr 2139 | ){ 2140 | return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr); 2141 | } 2142 | static int spellfix1Create( 2143 | sqlite3 *db, 2144 | void *pAux, 2145 | int argc, const char *const*argv, 2146 | sqlite3_vtab **ppVTab, 2147 | char **pzErr 2148 | ){ 2149 | return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr); 2150 | } 2151 | 2152 | /* 2153 | ** Clear all of the content from a cursor. 2154 | */ 2155 | static void spellfix1ResetCursor(spellfix1_cursor *pCur){ 2156 | int i; 2157 | for(i=0; inRow; i++){ 2158 | sqlite3_free(pCur->a[i].zWord); 2159 | } 2160 | pCur->nRow = 0; 2161 | pCur->iRow = 0; 2162 | pCur->nSearch = 0; 2163 | if( pCur->pFullScan ){ 2164 | sqlite3_finalize(pCur->pFullScan); 2165 | pCur->pFullScan = 0; 2166 | } 2167 | } 2168 | 2169 | /* 2170 | ** Resize the cursor to hold up to N rows of content 2171 | */ 2172 | static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){ 2173 | struct spellfix1_row *aNew; 2174 | assert( N>=pCur->nRow ); 2175 | aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N); 2176 | if( aNew==0 && N>0 ){ 2177 | spellfix1ResetCursor(pCur); 2178 | sqlite3_free(pCur->a); 2179 | pCur->nAlloc = 0; 2180 | pCur->a = 0; 2181 | }else{ 2182 | pCur->nAlloc = N; 2183 | pCur->a = aNew; 2184 | } 2185 | } 2186 | 2187 | 2188 | /* 2189 | ** Close a fuzzy-search cursor. 2190 | */ 2191 | static int spellfix1Close(sqlite3_vtab_cursor *cur){ 2192 | spellfix1_cursor *pCur = (spellfix1_cursor *)cur; 2193 | spellfix1ResetCursor(pCur); 2194 | spellfix1ResizeCursor(pCur, 0); 2195 | sqlite3_free(pCur->zPattern); 2196 | sqlite3_free(pCur); 2197 | return SQLITE_OK; 2198 | } 2199 | 2200 | #define SPELLFIX_IDXNUM_MATCH 0x01 /* word MATCH $str */ 2201 | #define SPELLFIX_IDXNUM_LANGID 0x02 /* langid == $langid */ 2202 | #define SPELLFIX_IDXNUM_TOP 0x04 /* top = $top */ 2203 | #define SPELLFIX_IDXNUM_SCOPE 0x08 /* scope = $scope */ 2204 | #define SPELLFIX_IDXNUM_DISTLT 0x10 /* distance < $distance */ 2205 | #define SPELLFIX_IDXNUM_DISTLE 0x20 /* distance <= $distance */ 2206 | #define SPELLFIX_IDXNUM_ROWID 0x40 /* rowid = $rowid */ 2207 | #define SPELLFIX_IDXNUM_DIST (0x10|0x20) /* DISTLT and DISTLE */ 2208 | 2209 | /* 2210 | ** 2211 | ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined 2212 | ** above. 2213 | ** 2214 | ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid 2215 | ** if specified and in that order. 2216 | */ 2217 | static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ 2218 | int iPlan = 0; 2219 | int iLangTerm = -1; 2220 | int iTopTerm = -1; 2221 | int iScopeTerm = -1; 2222 | int iDistTerm = -1; 2223 | int iRowidTerm = -1; 2224 | int i; 2225 | const struct sqlite3_index_constraint *pConstraint; 2226 | pConstraint = pIdxInfo->aConstraint; 2227 | for(i=0; inConstraint; i++, pConstraint++){ 2228 | if( pConstraint->usable==0 ) continue; 2229 | 2230 | /* Terms of the form: word MATCH $str */ 2231 | if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0 2232 | && pConstraint->iColumn==SPELLFIX_COL_WORD 2233 | && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH 2234 | ){ 2235 | iPlan |= SPELLFIX_IDXNUM_MATCH; 2236 | pIdxInfo->aConstraintUsage[i].argvIndex = 1; 2237 | pIdxInfo->aConstraintUsage[i].omit = 1; 2238 | } 2239 | 2240 | /* Terms of the form: langid = $langid */ 2241 | if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0 2242 | && pConstraint->iColumn==SPELLFIX_COL_LANGID 2243 | && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ 2244 | ){ 2245 | iPlan |= SPELLFIX_IDXNUM_LANGID; 2246 | iLangTerm = i; 2247 | } 2248 | 2249 | /* Terms of the form: top = $top */ 2250 | if( (iPlan & SPELLFIX_IDXNUM_TOP)==0 2251 | && pConstraint->iColumn==SPELLFIX_COL_TOP 2252 | && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ 2253 | ){ 2254 | iPlan |= SPELLFIX_IDXNUM_TOP; 2255 | iTopTerm = i; 2256 | } 2257 | 2258 | /* Terms of the form: scope = $scope */ 2259 | if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0 2260 | && pConstraint->iColumn==SPELLFIX_COL_SCOPE 2261 | && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ 2262 | ){ 2263 | iPlan |= SPELLFIX_IDXNUM_SCOPE; 2264 | iScopeTerm = i; 2265 | } 2266 | 2267 | /* Terms of the form: distance < $dist or distance <= $dist */ 2268 | if( (iPlan & SPELLFIX_IDXNUM_DIST)==0 2269 | && pConstraint->iColumn==SPELLFIX_COL_DISTANCE 2270 | && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT 2271 | || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) 2272 | ){ 2273 | if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){ 2274 | iPlan |= SPELLFIX_IDXNUM_DISTLT; 2275 | }else{ 2276 | iPlan |= SPELLFIX_IDXNUM_DISTLE; 2277 | } 2278 | iDistTerm = i; 2279 | } 2280 | 2281 | /* Terms of the form: distance < $dist or distance <= $dist */ 2282 | if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0 2283 | && pConstraint->iColumn<0 2284 | && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ 2285 | ){ 2286 | iPlan |= SPELLFIX_IDXNUM_ROWID; 2287 | iRowidTerm = i; 2288 | } 2289 | } 2290 | if( iPlan&SPELLFIX_IDXNUM_MATCH ){ 2291 | int idx = 2; 2292 | pIdxInfo->idxNum = iPlan; 2293 | if( pIdxInfo->nOrderBy==1 2294 | && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE 2295 | && pIdxInfo->aOrderBy[0].desc==0 2296 | ){ 2297 | pIdxInfo->orderByConsumed = 1; /* Default order by iScore */ 2298 | } 2299 | if( iPlan&SPELLFIX_IDXNUM_LANGID ){ 2300 | pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++; 2301 | pIdxInfo->aConstraintUsage[iLangTerm].omit = 1; 2302 | } 2303 | if( iPlan&SPELLFIX_IDXNUM_TOP ){ 2304 | pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++; 2305 | pIdxInfo->aConstraintUsage[iTopTerm].omit = 1; 2306 | } 2307 | if( iPlan&SPELLFIX_IDXNUM_SCOPE ){ 2308 | pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++; 2309 | pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1; 2310 | } 2311 | if( iPlan&SPELLFIX_IDXNUM_DIST ){ 2312 | pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++; 2313 | pIdxInfo->aConstraintUsage[iDistTerm].omit = 1; 2314 | } 2315 | pIdxInfo->estimatedCost = 1e5; 2316 | }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){ 2317 | pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID; 2318 | pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1; 2319 | pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1; 2320 | pIdxInfo->estimatedCost = 5; 2321 | }else{ 2322 | pIdxInfo->idxNum = 0; 2323 | pIdxInfo->estimatedCost = 1e50; 2324 | } 2325 | return SQLITE_OK; 2326 | } 2327 | 2328 | /* 2329 | ** Open a new fuzzy-search cursor. 2330 | */ 2331 | static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ 2332 | spellfix1_vtab *p = (spellfix1_vtab*)pVTab; 2333 | spellfix1_cursor *pCur; 2334 | pCur = sqlite3_malloc64( sizeof(*pCur) ); 2335 | if( pCur==0 ) return SQLITE_NOMEM; 2336 | memset(pCur, 0, sizeof(*pCur)); 2337 | pCur->pVTab = p; 2338 | *ppCursor = &pCur->base; 2339 | return SQLITE_OK; 2340 | } 2341 | 2342 | /* 2343 | ** Adjust a distance measurement by the words rank in order to show 2344 | ** preference to common words. 2345 | */ 2346 | static int spellfix1Score(int iDistance, int iRank){ 2347 | int iLog2; 2348 | for(iLog2=0; iRank>0; iLog2++, iRank>>=1){} 2349 | return iDistance + 32 - iLog2; 2350 | } 2351 | 2352 | /* 2353 | ** Compare two spellfix1_row objects for sorting purposes in qsort() such 2354 | ** that they sort in order of increasing distance. 2355 | */ 2356 | static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){ 2357 | const struct spellfix1_row *a = (const struct spellfix1_row*)A; 2358 | const struct spellfix1_row *b = (const struct spellfix1_row*)B; 2359 | return a->iScore - b->iScore; 2360 | } 2361 | 2362 | /* 2363 | ** A structure used to pass information from spellfix1FilterForMatch() 2364 | ** into spellfix1RunQuery(). 2365 | */ 2366 | typedef struct MatchQuery { 2367 | spellfix1_cursor *pCur; /* The cursor being queried */ 2368 | sqlite3_stmt *pStmt; /* shadow table query statment */ 2369 | char zHash[SPELLFIX_MX_HASH]; /* The current phonehash for zPattern */ 2370 | const char *zPattern; /* Transliterated input string */ 2371 | int nPattern; /* Length of zPattern */ 2372 | EditDist3FromString *pMatchStr3; /* Original unicode string */ 2373 | EditDist3Config *pConfig3; /* Edit-distance cost coefficients */ 2374 | const EditDist3Lang *pLang; /* The selected language coefficients */ 2375 | int iLang; /* The language id */ 2376 | int iScope; /* Default scope */ 2377 | int iMaxDist; /* Maximum allowed edit distance, or -1 */ 2378 | int rc; /* Error code */ 2379 | int nRun; /* Number of prior runs for the same zPattern */ 2380 | char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH]; /* Prior hashes */ 2381 | } MatchQuery; 2382 | 2383 | /* 2384 | ** Run a query looking for the best matches against zPattern using 2385 | ** zHash as the character class seed hash. 2386 | */ 2387 | static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){ 2388 | const char *zK1; 2389 | const char *zWord; 2390 | int iDist; 2391 | int iRank; 2392 | int iScore; 2393 | int iWorst = 0; 2394 | int idx; 2395 | int idxWorst = -1; 2396 | int i; 2397 | int iScope = p->iScope; 2398 | spellfix1_cursor *pCur = p->pCur; 2399 | sqlite3_stmt *pStmt = p->pStmt; 2400 | char zHash1[SPELLFIX_MX_HASH]; 2401 | char zHash2[SPELLFIX_MX_HASH]; 2402 | char *zClass; 2403 | int nClass; 2404 | int rc; 2405 | 2406 | if( pCur->a==0 || p->rc ) return; /* Prior memory allocation failure */ 2407 | zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery); 2408 | if( zClass==0 ){ 2409 | p->rc = SQLITE_NOMEM; 2410 | return; 2411 | } 2412 | nClass = (int)strlen(zClass); 2413 | if( nClass>SPELLFIX_MX_HASH-2 ){ 2414 | nClass = SPELLFIX_MX_HASH-2; 2415 | zClass[nClass] = 0; 2416 | } 2417 | if( nClass<=iScope ){ 2418 | if( nClass>2 ){ 2419 | iScope = nClass-1; 2420 | }else{ 2421 | iScope = nClass; 2422 | } 2423 | } 2424 | memcpy(zHash1, zClass, iScope); 2425 | sqlite3_free(zClass); 2426 | zHash1[iScope] = 0; 2427 | memcpy(zHash2, zHash1, iScope); 2428 | zHash2[iScope] = 'Z'; 2429 | zHash2[iScope+1] = 0; 2430 | #if SPELLFIX_MX_RUN>1 2431 | for(i=0; inRun; i++){ 2432 | if( strcmp(p->azPrior[i], zHash1)==0 ) return; 2433 | } 2434 | #endif 2435 | assert( p->nRunazPrior[p->nRun++], zHash1, iScope+1); 2437 | if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM 2438 | || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM 2439 | ){ 2440 | p->rc = SQLITE_NOMEM; 2441 | return; 2442 | } 2443 | #if SPELLFIX_MX_RUN>1 2444 | for(i=0; inRow; i++){ 2445 | if( pCur->a[i].iScore>iWorst ){ 2446 | iWorst = pCur->a[i].iScore; 2447 | idxWorst = i; 2448 | } 2449 | } 2450 | #endif 2451 | while( sqlite3_step(pStmt)==SQLITE_ROW ){ 2452 | int iMatchlen = -1; 2453 | iRank = sqlite3_column_int(pStmt, 2); 2454 | if( p->pMatchStr3 ){ 2455 | int nWord = sqlite3_column_bytes(pStmt, 1); 2456 | zWord = (const char*)sqlite3_column_text(pStmt, 1); 2457 | iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen); 2458 | }else{ 2459 | zK1 = (const char*)sqlite3_column_text(pStmt, 3); 2460 | if( zK1==0 ) continue; 2461 | iDist = editdist1(p->zPattern, zK1, 0); 2462 | } 2463 | if( iDist<0 ){ 2464 | p->rc = SQLITE_NOMEM; 2465 | break; 2466 | } 2467 | pCur->nSearch++; 2468 | 2469 | /* If there is a "distance < $dist" or "distance <= $dist" constraint, 2470 | ** check if this row meets it. If not, jump back up to the top of the 2471 | ** loop to process the next row. Otherwise, if the row does match the 2472 | ** distance constraint, check if the pCur->a[] array is already full. 2473 | ** If it is and no explicit "top = ?" constraint was present in the 2474 | ** query, grow the array to ensure there is room for the new entry. */ 2475 | assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) ); 2476 | if( p->iMaxDist>=0 ){ 2477 | if( iDist>p->iMaxDist ) continue; 2478 | if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){ 2479 | spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10); 2480 | if( pCur->a==0 ) break; 2481 | } 2482 | } 2483 | 2484 | iScore = spellfix1Score(iDist,iRank); 2485 | if( pCur->nRownAlloc ){ 2486 | idx = pCur->nRow; 2487 | }else if( iScorea[idx].zWord); 2490 | }else{ 2491 | continue; 2492 | } 2493 | 2494 | pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1)); 2495 | if( pCur->a[idx].zWord==0 ){ 2496 | p->rc = SQLITE_NOMEM; 2497 | break; 2498 | } 2499 | pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0); 2500 | pCur->a[idx].iRank = iRank; 2501 | pCur->a[idx].iDistance = iDist; 2502 | pCur->a[idx].iScore = iScore; 2503 | pCur->a[idx].iMatchlen = iMatchlen; 2504 | memcpy(pCur->a[idx].zHash, zHash1, iScope+1); 2505 | if( pCur->nRownAlloc ) pCur->nRow++; 2506 | if( pCur->nRow==pCur->nAlloc ){ 2507 | iWorst = pCur->a[0].iScore; 2508 | idxWorst = 0; 2509 | for(i=1; inRow; i++){ 2510 | iScore = pCur->a[i].iScore; 2511 | if( iWorstrc = rc; 2520 | } 2521 | 2522 | /* 2523 | ** This version of the xFilter method work if the MATCH term is present 2524 | ** and we are doing a scan. 2525 | */ 2526 | static int spellfix1FilterForMatch( 2527 | spellfix1_cursor *pCur, 2528 | int argc, 2529 | sqlite3_value **argv 2530 | ){ 2531 | int idxNum = pCur->idxNum; 2532 | const unsigned char *zMatchThis; /* RHS of the MATCH operator */ 2533 | EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */ 2534 | char *zPattern; /* Transliteration of zMatchThis */ 2535 | int nPattern; /* Length of zPattern */ 2536 | int iLimit = 20; /* Max number of rows of output */ 2537 | int iScope = 3; /* Use this many characters of zClass */ 2538 | int iLang = 0; /* Language code */ 2539 | char *zSql; /* SQL of shadow table query */ 2540 | sqlite3_stmt *pStmt = 0; /* Shadow table query */ 2541 | int rc; /* Result code */ 2542 | int idx = 1; /* Next available filter parameter */ 2543 | spellfix1_vtab *p = pCur->pVTab; /* The virtual table that owns pCur */ 2544 | MatchQuery x; /* For passing info to RunQuery() */ 2545 | 2546 | /* Load the cost table if we have not already done so */ 2547 | if( p->zCostTable!=0 && p->pConfig3==0 ){ 2548 | p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) ); 2549 | if( p->pConfig3==0 ) return SQLITE_NOMEM; 2550 | memset(p->pConfig3, 0, sizeof(p->pConfig3[0])); 2551 | rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable); 2552 | if( rc ) return rc; 2553 | } 2554 | memset(&x, 0, sizeof(x)); 2555 | x.iScope = 3; /* Default scope if none specified by "WHERE scope=N" */ 2556 | x.iMaxDist = -1; /* Maximum allowed edit distance */ 2557 | 2558 | if( idxNum&2 ){ 2559 | iLang = sqlite3_value_int(argv[idx++]); 2560 | } 2561 | if( idxNum&4 ){ 2562 | iLimit = sqlite3_value_int(argv[idx++]); 2563 | if( iLimit<1 ) iLimit = 1; 2564 | } 2565 | if( idxNum&8 ){ 2566 | x.iScope = sqlite3_value_int(argv[idx++]); 2567 | if( x.iScope<1 ) x.iScope = 1; 2568 | if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2; 2569 | } 2570 | if( idxNum&(16|32) ){ 2571 | x.iMaxDist = sqlite3_value_int(argv[idx++]); 2572 | if( idxNum&16 ) x.iMaxDist--; 2573 | if( x.iMaxDist<0 ) x.iMaxDist = 0; 2574 | } 2575 | spellfix1ResetCursor(pCur); 2576 | spellfix1ResizeCursor(pCur, iLimit); 2577 | zMatchThis = sqlite3_value_text(argv[0]); 2578 | if( zMatchThis==0 ) return SQLITE_OK; 2579 | if( p->pConfig3 ){ 2580 | x.pLang = editDist3FindLang(p->pConfig3, iLang); 2581 | pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1); 2582 | if( pMatchStr3==0 ){ 2583 | x.rc = SQLITE_NOMEM; 2584 | goto filter_exit; 2585 | } 2586 | }else{ 2587 | x.pLang = 0; 2588 | } 2589 | zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0])); 2590 | sqlite3_free(pCur->zPattern); 2591 | pCur->zPattern = zPattern; 2592 | if( zPattern==0 ){ 2593 | x.rc = SQLITE_NOMEM; 2594 | goto filter_exit; 2595 | } 2596 | nPattern = (int)strlen(zPattern); 2597 | if( zPattern[nPattern-1]=='*' ) nPattern--; 2598 | zSql = sqlite3_mprintf( 2599 | "SELECT id, word, rank, coalesce(k1,word)" 2600 | " FROM \"%w\".\"%w_vocab\"" 2601 | " WHERE langid=%d AND k2>=?1 AND k2zDbName, p->zTableName, iLang 2603 | ); 2604 | if( zSql==0 ){ 2605 | x.rc = SQLITE_NOMEM; 2606 | pStmt = 0; 2607 | goto filter_exit; 2608 | } 2609 | rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); 2610 | sqlite3_free(zSql); 2611 | pCur->iLang = iLang; 2612 | x.pCur = pCur; 2613 | x.pStmt = pStmt; 2614 | x.zPattern = zPattern; 2615 | x.nPattern = nPattern; 2616 | x.pMatchStr3 = pMatchStr3; 2617 | x.iLang = iLang; 2618 | x.rc = rc; 2619 | x.pConfig3 = p->pConfig3; 2620 | if( x.rc==SQLITE_OK ){ 2621 | spellfix1RunQuery(&x, zPattern, nPattern); 2622 | } 2623 | 2624 | if( pCur->a ){ 2625 | qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare); 2626 | pCur->iTop = iLimit; 2627 | pCur->iScope = iScope; 2628 | }else{ 2629 | x.rc = SQLITE_NOMEM; 2630 | } 2631 | 2632 | filter_exit: 2633 | sqlite3_finalize(pStmt); 2634 | editDist3FromStringDelete(pMatchStr3); 2635 | return x.rc; 2636 | } 2637 | 2638 | /* 2639 | ** This version of xFilter handles a full-table scan case 2640 | */ 2641 | static int spellfix1FilterForFullScan( 2642 | spellfix1_cursor *pCur, 2643 | int argc, 2644 | sqlite3_value **argv 2645 | ){ 2646 | int rc = SQLITE_OK; 2647 | int idxNum = pCur->idxNum; 2648 | char *zSql; 2649 | spellfix1_vtab *pVTab = pCur->pVTab; 2650 | spellfix1ResetCursor(pCur); 2651 | assert( idxNum==0 || idxNum==64 ); 2652 | zSql = sqlite3_mprintf( 2653 | "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s", 2654 | pVTab->zDbName, pVTab->zTableName, 2655 | ((idxNum & 64) ? " WHERE rowid=?" : "") 2656 | ); 2657 | if( zSql==0 ) return SQLITE_NOMEM; 2658 | rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0); 2659 | sqlite3_free(zSql); 2660 | if( rc==SQLITE_OK && (idxNum & 64) ){ 2661 | assert( argc==1 ); 2662 | rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]); 2663 | } 2664 | pCur->nRow = pCur->iRow = 0; 2665 | if( rc==SQLITE_OK ){ 2666 | rc = sqlite3_step(pCur->pFullScan); 2667 | if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; } 2668 | if( rc==SQLITE_DONE ){ rc = SQLITE_OK; } 2669 | }else{ 2670 | pCur->iRow = 0; 2671 | } 2672 | return rc; 2673 | } 2674 | 2675 | 2676 | /* 2677 | ** Called to "rewind" a cursor back to the beginning so that 2678 | ** it starts its output over again. Always called at least once 2679 | ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call. 2680 | */ 2681 | static int spellfix1Filter( 2682 | sqlite3_vtab_cursor *cur, 2683 | int idxNum, const char *idxStr, 2684 | int argc, sqlite3_value **argv 2685 | ){ 2686 | spellfix1_cursor *pCur = (spellfix1_cursor *)cur; 2687 | int rc; 2688 | pCur->idxNum = idxNum; 2689 | if( idxNum & 1 ){ 2690 | rc = spellfix1FilterForMatch(pCur, argc, argv); 2691 | }else{ 2692 | rc = spellfix1FilterForFullScan(pCur, argc, argv); 2693 | } 2694 | return rc; 2695 | } 2696 | 2697 | 2698 | /* 2699 | ** Advance a cursor to its next row of output 2700 | */ 2701 | static int spellfix1Next(sqlite3_vtab_cursor *cur){ 2702 | spellfix1_cursor *pCur = (spellfix1_cursor *)cur; 2703 | int rc = SQLITE_OK; 2704 | if( pCur->iRow < pCur->nRow ){ 2705 | if( pCur->pFullScan ){ 2706 | rc = sqlite3_step(pCur->pFullScan); 2707 | if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow; 2708 | if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK; 2709 | }else{ 2710 | pCur->iRow++; 2711 | } 2712 | } 2713 | return rc; 2714 | } 2715 | 2716 | /* 2717 | ** Return TRUE if we are at the end-of-file 2718 | */ 2719 | static int spellfix1Eof(sqlite3_vtab_cursor *cur){ 2720 | spellfix1_cursor *pCur = (spellfix1_cursor *)cur; 2721 | return pCur->iRow>=pCur->nRow; 2722 | } 2723 | 2724 | /* 2725 | ** Return columns from the current row. 2726 | */ 2727 | static int spellfix1Column( 2728 | sqlite3_vtab_cursor *cur, 2729 | sqlite3_context *ctx, 2730 | int i 2731 | ){ 2732 | spellfix1_cursor *pCur = (spellfix1_cursor*)cur; 2733 | if( pCur->pFullScan ){ 2734 | if( i<=SPELLFIX_COL_LANGID ){ 2735 | sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i)); 2736 | }else{ 2737 | sqlite3_result_null(ctx); 2738 | } 2739 | return SQLITE_OK; 2740 | } 2741 | switch( i ){ 2742 | case SPELLFIX_COL_WORD: { 2743 | sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC); 2744 | break; 2745 | } 2746 | case SPELLFIX_COL_RANK: { 2747 | sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank); 2748 | break; 2749 | } 2750 | case SPELLFIX_COL_DISTANCE: { 2751 | sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance); 2752 | break; 2753 | } 2754 | case SPELLFIX_COL_LANGID: { 2755 | sqlite3_result_int(ctx, pCur->iLang); 2756 | break; 2757 | } 2758 | case SPELLFIX_COL_SCORE: { 2759 | sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore); 2760 | break; 2761 | } 2762 | case SPELLFIX_COL_MATCHLEN: { 2763 | int iMatchlen = pCur->a[pCur->iRow].iMatchlen; 2764 | if( iMatchlen<0 ){ 2765 | int nPattern = (int)strlen(pCur->zPattern); 2766 | char *zWord = pCur->a[pCur->iRow].zWord; 2767 | int nWord = (int)strlen(zWord); 2768 | 2769 | if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){ 2770 | char *zTranslit; 2771 | int res; 2772 | zTranslit = (char *)transliterate((unsigned char *)zWord, nWord); 2773 | if( !zTranslit ) return SQLITE_NOMEM; 2774 | res = editdist1(pCur->zPattern, zTranslit, &iMatchlen); 2775 | sqlite3_free(zTranslit); 2776 | if( res<0 ) return SQLITE_NOMEM; 2777 | iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen); 2778 | }else{ 2779 | iMatchlen = utf8Charlen(zWord, nWord); 2780 | } 2781 | } 2782 | 2783 | sqlite3_result_int(ctx, iMatchlen); 2784 | break; 2785 | } 2786 | case SPELLFIX_COL_PHONEHASH: { 2787 | sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC); 2788 | break; 2789 | } 2790 | case SPELLFIX_COL_TOP: { 2791 | sqlite3_result_int(ctx, pCur->iTop); 2792 | break; 2793 | } 2794 | case SPELLFIX_COL_SCOPE: { 2795 | sqlite3_result_int(ctx, pCur->iScope); 2796 | break; 2797 | } 2798 | case SPELLFIX_COL_SRCHCNT: { 2799 | sqlite3_result_int(ctx, pCur->nSearch); 2800 | break; 2801 | } 2802 | default: { 2803 | sqlite3_result_null(ctx); 2804 | break; 2805 | } 2806 | } 2807 | return SQLITE_OK; 2808 | } 2809 | 2810 | /* 2811 | ** The rowid. 2812 | */ 2813 | static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ 2814 | spellfix1_cursor *pCur = (spellfix1_cursor*)cur; 2815 | if( pCur->pFullScan ){ 2816 | *pRowid = sqlite3_column_int64(pCur->pFullScan, 4); 2817 | }else{ 2818 | *pRowid = pCur->a[pCur->iRow].iRowid; 2819 | } 2820 | return SQLITE_OK; 2821 | } 2822 | 2823 | /* 2824 | ** This function is called by the xUpdate() method. It returns a string 2825 | ** containing the conflict mode that xUpdate() should use for the current 2826 | ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE". 2827 | */ 2828 | static const char *spellfix1GetConflict(sqlite3 *db){ 2829 | static const char *azConflict[] = { 2830 | /* Note: Instead of "FAIL" - "ABORT". */ 2831 | "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE" 2832 | }; 2833 | int eConflict = sqlite3_vtab_on_conflict(db); 2834 | 2835 | assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE 2836 | || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT 2837 | || eConflict==SQLITE_REPLACE 2838 | ); 2839 | assert( SQLITE_ROLLBACK==1 ); 2840 | assert( SQLITE_IGNORE==2 ); 2841 | assert( SQLITE_FAIL==3 ); 2842 | assert( SQLITE_ABORT==4 ); 2843 | assert( SQLITE_REPLACE==5 ); 2844 | 2845 | return azConflict[eConflict-1]; 2846 | } 2847 | 2848 | /* 2849 | ** The xUpdate() method. 2850 | */ 2851 | static int spellfix1Update( 2852 | sqlite3_vtab *pVTab, 2853 | int argc, 2854 | sqlite3_value **argv, 2855 | sqlite_int64 *pRowid 2856 | ){ 2857 | int rc = SQLITE_OK; 2858 | sqlite3_int64 rowid, newRowid; 2859 | spellfix1_vtab *p = (spellfix1_vtab*)pVTab; 2860 | sqlite3 *db = p->db; 2861 | 2862 | if( argc==1 ){ 2863 | /* A delete operation on the rowid given by argv[0] */ 2864 | rowid = *pRowid = sqlite3_value_int64(argv[0]); 2865 | spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" " 2866 | " WHERE id=%lld", 2867 | p->zDbName, p->zTableName, rowid); 2868 | }else{ 2869 | const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]); 2870 | int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]); 2871 | int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]); 2872 | int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]); 2873 | const unsigned char *zSoundslike = 2874 | sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]); 2875 | int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]); 2876 | char *zK1, *zK2; 2877 | int i; 2878 | char c; 2879 | const char *zConflict = spellfix1GetConflict(db); 2880 | 2881 | if( zWord==0 ){ 2882 | /* Inserts of the form: INSERT INTO table(command) VALUES('xyzzy'); 2883 | ** cause zWord to be NULL, so we look at the "command" column to see 2884 | ** what special actions to take */ 2885 | const char *zCmd = 2886 | (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]); 2887 | if( zCmd==0 ){ 2888 | pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word", 2889 | p->zTableName); 2890 | return SQLITE_CONSTRAINT_NOTNULL; 2891 | } 2892 | if( strcmp(zCmd,"reset")==0 ){ 2893 | /* Reset the edit cost table (if there is one). */ 2894 | editDist3ConfigDelete(p->pConfig3); 2895 | p->pConfig3 = 0; 2896 | return SQLITE_OK; 2897 | } 2898 | if( strncmp(zCmd,"edit_cost_table=",16)==0 ){ 2899 | editDist3ConfigDelete(p->pConfig3); 2900 | p->pConfig3 = 0; 2901 | sqlite3_free(p->zCostTable); 2902 | p->zCostTable = spellfix1Dequote(zCmd+16); 2903 | if( p->zCostTable==0 ) return SQLITE_NOMEM; 2904 | if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){ 2905 | sqlite3_free(p->zCostTable); 2906 | p->zCostTable = 0; 2907 | } 2908 | return SQLITE_OK; 2909 | } 2910 | pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"", 2911 | p->zTableName, zCmd); 2912 | return SQLITE_ERROR; 2913 | } 2914 | if( iRank<1 ) iRank = 1; 2915 | if( zSoundslike ){ 2916 | zK1 = (char*)transliterate(zSoundslike, nSoundslike); 2917 | }else{ 2918 | zK1 = (char*)transliterate(zWord, nWord); 2919 | } 2920 | if( zK1==0 ) return SQLITE_NOMEM; 2921 | for(i=0; (c = zK1[i])!=0; i++){ 2922 | if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A'; 2923 | } 2924 | zK2 = (char*)phoneticHash((const unsigned char*)zK1, i); 2925 | if( zK2==0 ){ 2926 | sqlite3_free(zK1); 2927 | return SQLITE_NOMEM; 2928 | } 2929 | if( sqlite3_value_type(argv[0])==SQLITE_NULL ){ 2930 | if( sqlite3_value_type(argv[1])==SQLITE_NULL ){ 2931 | spellfix1DbExec(&rc, db, 2932 | "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) " 2933 | "VALUES(%d,%d,%Q,nullif(%Q,%Q),%Q)", 2934 | p->zDbName, p->zTableName, 2935 | iRank, iLang, zWord, zK1, zWord, zK2 2936 | ); 2937 | }else{ 2938 | newRowid = sqlite3_value_int64(argv[1]); 2939 | spellfix1DbExec(&rc, db, 2940 | "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) " 2941 | "VALUES(%lld,%d,%d,%Q,nullif(%Q,%Q),%Q)", 2942 | zConflict, p->zDbName, p->zTableName, 2943 | newRowid, iRank, iLang, zWord, zK1, zWord, zK2 2944 | ); 2945 | } 2946 | *pRowid = sqlite3_last_insert_rowid(db); 2947 | }else{ 2948 | rowid = sqlite3_value_int64(argv[0]); 2949 | newRowid = *pRowid = sqlite3_value_int64(argv[1]); 2950 | spellfix1DbExec(&rc, db, 2951 | "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d," 2952 | " word=%Q, k1=nullif(%Q,%Q), k2=%Q WHERE id=%lld", 2953 | zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang, 2954 | zWord, zK1, zWord, zK2, rowid 2955 | ); 2956 | } 2957 | sqlite3_free(zK1); 2958 | sqlite3_free(zK2); 2959 | } 2960 | return rc; 2961 | } 2962 | 2963 | /* 2964 | ** Rename the spellfix1 table. 2965 | */ 2966 | static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){ 2967 | spellfix1_vtab *p = (spellfix1_vtab*)pVTab; 2968 | sqlite3 *db = p->db; 2969 | int rc = SQLITE_OK; 2970 | char *zNewName = sqlite3_mprintf("%s", zNew); 2971 | if( zNewName==0 ){ 2972 | return SQLITE_NOMEM; 2973 | } 2974 | spellfix1DbExec(&rc, db, 2975 | "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"", 2976 | p->zDbName, p->zTableName, zNewName 2977 | ); 2978 | if( rc==SQLITE_OK ){ 2979 | sqlite3_free(p->zTableName); 2980 | p->zTableName = zNewName; 2981 | }else{ 2982 | sqlite3_free(zNewName); 2983 | } 2984 | return rc; 2985 | } 2986 | 2987 | 2988 | /* 2989 | ** A virtual table module that provides fuzzy search. 2990 | */ 2991 | static sqlite3_module spellfix1Module = { 2992 | 0, /* iVersion */ 2993 | spellfix1Create, /* xCreate - handle CREATE VIRTUAL TABLE */ 2994 | spellfix1Connect, /* xConnect - reconnected to an existing table */ 2995 | spellfix1BestIndex, /* xBestIndex - figure out how to do a query */ 2996 | spellfix1Disconnect, /* xDisconnect - close a connection */ 2997 | spellfix1Destroy, /* xDestroy - handle DROP TABLE */ 2998 | spellfix1Open, /* xOpen - open a cursor */ 2999 | spellfix1Close, /* xClose - close a cursor */ 3000 | spellfix1Filter, /* xFilter - configure scan constraints */ 3001 | spellfix1Next, /* xNext - advance a cursor */ 3002 | spellfix1Eof, /* xEof - check for end of scan */ 3003 | spellfix1Column, /* xColumn - read data */ 3004 | spellfix1Rowid, /* xRowid - read data */ 3005 | spellfix1Update, /* xUpdate */ 3006 | 0, /* xBegin */ 3007 | 0, /* xSync */ 3008 | 0, /* xCommit */ 3009 | 0, /* xRollback */ 3010 | 0, /* xFindMethod */ 3011 | spellfix1Rename, /* xRename */ 3012 | 0, /* xSavepoint */ 3013 | 0, /* xRelease */ 3014 | 0, /* xRollbackTo */ 3015 | 0, /* xShadowName */ 3016 | 0 /* xIntegrity */ 3017 | }; 3018 | 3019 | /* 3020 | ** Register the various functions and the virtual table. 3021 | */ 3022 | static int spellfix1Register(sqlite3 *db){ 3023 | int rc = SQLITE_OK; 3024 | int i; 3025 | rc = sqlite3_create_function(db, "spellfix1_translit", 1, 3026 | SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, 3027 | transliterateSqlFunc, 0, 0); 3028 | if( rc==SQLITE_OK ){ 3029 | rc = sqlite3_create_function(db, "spellfix1_editdist", 2, 3030 | SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, 3031 | editdistSqlFunc, 0, 0); 3032 | } 3033 | if( rc==SQLITE_OK ){ 3034 | rc = sqlite3_create_function(db, "spellfix1_phonehash", 1, 3035 | SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, 3036 | phoneticHashSqlFunc, 0, 0); 3037 | } 3038 | if( rc==SQLITE_OK ){ 3039 | rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1, 3040 | SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, 3041 | scriptCodeSqlFunc, 0, 0); 3042 | } 3043 | if( rc==SQLITE_OK ){ 3044 | rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0); 3045 | } 3046 | if( rc==SQLITE_OK ){ 3047 | rc = editDist3Install(db); 3048 | } 3049 | 3050 | /* Verify sanity of the translit[] table */ 3051 | for(i=0; i