├── .gitignore ├── .clang-format ├── docs ├── json1.md ├── crypto.md ├── math.md ├── text.md ├── ipaddr.md ├── uuid.md ├── third-party.md ├── unicode.md ├── stats.md ├── re.md ├── fuzzy.md ├── fileio.md └── vsv.md ├── src ├── crypto │ ├── sha1.h │ ├── md5.h │ ├── sha2.h │ ├── md5.c │ └── sha1.c ├── fuzzy │ ├── fuzzy.h │ ├── hamming.c │ ├── common.h │ ├── levenshtein.c │ ├── optimal_string_alignment.c │ ├── phonetic.c │ ├── soundex.c │ ├── refined_soundex.c │ ├── damerau_levenshtein.c │ ├── jaro_winkler.c │ ├── common.c │ ├── editdist.c │ └── caverphone.c ├── re.h ├── sqlite3-crypto.c ├── sqlite3-text.c ├── sqlite3-ipaddr.c ├── sqlite3-uuid.c ├── sqlite3-math.c ├── sqlite3-re.c ├── sqlite3-fuzzy.c └── sqlite3-stats.c ├── test ├── vsv.sql ├── unicode.sql ├── re.sql ├── text.sql ├── uuid.sql ├── crypto.sql ├── json1.sql ├── stats.sql ├── ipaddr.sql ├── fileio.sql ├── math.sql └── fuzzy.sql ├── LICENSE ├── .github └── workflows │ ├── build.yml │ └── publish.yml ├── Makefile └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | dist -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Chromium 2 | IndentWidth: 4 3 | ColumnLimit: 100 -------------------------------------------------------------------------------- /docs/json1.md: -------------------------------------------------------------------------------- 1 | # json1: JSON handling in SQLite 2 | 3 | This is the 'native' SQLite [JSON1 extension](https://sqlite.org/json1.html). 4 | It's often compiled into SQLite build, but in case your build doesn't include it - I've compiled it separately. 5 | 6 | ## Usage 7 | 8 | ``` 9 | sqlite> .load ./json1 10 | sqlite> select json_object("answer", 42); 11 | ``` 12 | 13 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 14 | -------------------------------------------------------------------------------- /src/crypto/sha1.h: -------------------------------------------------------------------------------- 1 | // Adapted from https://sqlite.org/src/file/ext/misc/sha1.c 2 | // Public domain 3 | 4 | #ifndef __SHA1_H__ 5 | #define __SHA1_H__ 6 | 7 | #include 8 | 9 | #define SHA1_BLOCK_SIZE 20 10 | 11 | typedef struct SHA1Context { 12 | unsigned int state[5]; 13 | unsigned int count[2]; 14 | unsigned char buffer[64]; 15 | } SHA1Context; 16 | 17 | void* sha1_init(); 18 | void sha1_update(SHA1Context* ctx, const unsigned char data[], size_t len); 19 | int sha1_final(SHA1Context* ctx, unsigned char hash[]); 20 | 21 | #endif -------------------------------------------------------------------------------- /docs/crypto.md: -------------------------------------------------------------------------------- 1 | # crypto: Secure hashes in SQLite 2 | 3 | Secure hash and message digest functions. 4 | 5 | Provides following functions: 6 | 7 | - `md5(data)`, 8 | - `sha1(data)`, 9 | - `sha256(data)`, 10 | - `sha384(data)`, 11 | - `sha512(data)`. 12 | 13 | Each function expects `data` to be `TEXT` or `BLOB`. Returns a `BLOB` hash. Use the `hex()` function to convert it to hex string. 14 | 15 | ## Usage 16 | 17 | ``` 18 | sqlite> select hex(md5('abc')); 19 | 900150983CD24FB0D6963F7D28E17F72 20 | ``` 21 | 22 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 23 | -------------------------------------------------------------------------------- /test/vsv.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/vsv 5 | 6 | .shell echo '11,Diane,London' > people.csv 7 | .shell echo '22,Grace,Berlin' >> people.csv 8 | .shell echo '33,Alice,Paris' >> people.csv 9 | 10 | create virtual table people using vsv( 11 | filename=people.csv, 12 | schema="create table people(id integer, name text, city text)", 13 | columns=3, 14 | affinity=integer 15 | ); 16 | select '01', count(*) = 3 from people; 17 | select '02', (id, name, city) = (22, 'Grace', 'Berlin') from people where id = 22; 18 | select '03', typeof(id) = 'integer' from people where id = 22; 19 | 20 | .shell rm -f people.csv -------------------------------------------------------------------------------- /test/unicode.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | -- .load dist/unicode 5 | select load_extension('dist/unicode'); 6 | 7 | select '01', lower('hElLo') = 'hello'; 8 | select '02', nlower('hElLo') = 'hello'; 9 | select '03', upper('hElLo') = 'HELLO'; 10 | select '04', nupper('hElLo') = 'HELLO'; 11 | select '05', casefold('hElLo') = 'hello'; 12 | 13 | select '11', lower('пРиВеТ') = 'привет'; 14 | select '12', nlower('пРиВеТ') = 'привет'; 15 | select '13', upper('пРиВеТ') = 'ПРИВЕТ'; 16 | select '14', nupper('пРиВеТ') = 'ПРИВЕТ'; 17 | select '15', casefold('пРиВеТ') = 'привет'; 18 | 19 | select '21', unaccent('hôtel') = 'hotel'; 20 | 21 | select '31', like('hEl_o', 'hello') = 1; 22 | select '32', like('пРиВ_Т', 'привет') = 1; 23 | select '33', ('привет' like 'пРиВ_Т') = 1; 24 | -------------------------------------------------------------------------------- /docs/math.md: -------------------------------------------------------------------------------- 1 | # math: Mathematics in SQLite 2 | 3 | Common math functions for SQLite versions before 3.35. 4 | Extracted from SQLite 3.35.4 source code ([func.c](https://sqlite.org/src/file/src/func.c)). 5 | 6 | Provides following functions: 7 | 8 | - rounding: `ceil`, `floor`, `trunc`; 9 | - logarithmic: `ln`, `log10`, `log2`, `log`; 10 | - arithmetic: `pow`, `sqrt`, `mod`; 11 | - trigonometric: `cos`, `sin`, `tan`; 12 | - hyperbolic: `cosh`, `sinh`, `tanh`; 13 | - inverse trigonometric: `acos`, `asin`, `atan`, `atan2`; 14 | - inverse hyperbolic: `acosh`, `asinh`, `atanh`; 15 | - angular measures: `radians`, `degrees`; 16 | - `pi`. 17 | 18 | [Full description](https://sqlite.org/lang_mathfunc.html) 19 | 20 | ## Usage 21 | 22 | ``` 23 | sqlite> .load ./math 24 | sqlite> select sqrt(9); 25 | ``` 26 | 27 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 28 | -------------------------------------------------------------------------------- /src/fuzzy/fuzzy.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Ross Bayer, MIT License 2 | // https://github.com/Rostepher/libstrcmp 3 | 4 | #ifndef FUZZY_H 5 | #define FUZZY_H 6 | 7 | // distance metrics 8 | int damerau_levenshtein(const char*, const char*); 9 | int hamming(const char*, const char*); 10 | double jaro(const char*, const char*); 11 | double jaro_winkler(const char*, const char*); 12 | unsigned levenshtein(const char*, const char*); 13 | unsigned optimal_string_alignment(const char*, const char*); 14 | int edit_distance(const char*, const char*, int*); 15 | 16 | // phonetics 17 | char* caverphone(const char*); 18 | char* soundex(const char*); 19 | char* refined_soundex(const char*); 20 | unsigned char* phonetic_hash(const unsigned char*, int); 21 | 22 | // translit 23 | unsigned char* transliterate(const unsigned char*, int); 24 | int translen_to_charlen(const char*, int, int); 25 | int script_code(const unsigned char*, int); 26 | 27 | #endif -------------------------------------------------------------------------------- /test/re.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/re 5 | select '01', regexp_replace('the year is 2021', '[0-9]+', '2050') = 'the year is 2050'; 6 | select '02', regexp_replace('the year is 2021', '2k21', '2050') = 'the year is 2021'; 7 | select '03', regexp_replace('10 10 10', '10$', '') = '10 10 '; 8 | select '04', regexp_replace('10 10 10', '^10', '') = ' 10 10'; 9 | select '05', regexp_replace('hello', 'h', '') = 'ello'; 10 | select '06', regexp_replace('hello', 'h', '.') = '.ello'; 11 | select '07', regexp_replace('hello', 'h', '..') = '..ello'; 12 | select '08', regexp_replace('hello', 'e', '') = 'hllo'; 13 | select '09', regexp_replace('hello', 'e', '.') = 'h.llo'; 14 | select '10', regexp_replace('hello', 'e', '..') = 'h..llo'; 15 | select '11', regexp_replace('hello', 'o', '') = 'hell'; 16 | select '12', regexp_replace('hello', 'o', '.') = 'hell.'; 17 | select '13', regexp_replace('hello', 'o', '..') = 'hell..'; 18 | -------------------------------------------------------------------------------- /docs/text.md: -------------------------------------------------------------------------------- 1 | # text: Text manipulation in SQLite 2 | 3 | Additional string functions. 4 | Adapted from [extension-functions.c](https://sqlite.org/contrib/) by Liam Healy. 5 | 6 | Provides following functions: 7 | 8 | ### `reverse(source)` 9 | 10 | Returns reversed string. 11 | 12 | ``` 13 | sqlite> select reverse('hello world'); 14 | dlrow olleh 15 | ``` 16 | 17 | ### `split_part(source, sep, part)` 18 | 19 | Splits `source` string on `sep` and returns the given `part` (counting from one). 20 | 21 | ``` 22 | sqlite> select split_part('one;two;three', ';', 2); 23 | two 24 | sqlite> select split_part('one;;three', ';', 2); 25 | 26 | ``` 27 | 28 | If `sep` is composed of multiple characters, each character is treated as separator. E.g.: 29 | 30 | ``` 31 | sqlite> select split_part('one/two\three', '/\', 2); 32 | two 33 | ``` 34 | 35 | Only ASCII (1-byte) symbols are supported as separators. 36 | 37 | ## Usage 38 | 39 | ``` 40 | sqlite> .load ./text 41 | sqlite> select reverse('hello'); 42 | ``` 43 | 44 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 45 | -------------------------------------------------------------------------------- /test/text.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/text 5 | 6 | -- Reverse string 7 | select '01', reverse(null) is NULL; 8 | select '02', reverse('hello') = 'olleh'; 9 | select '03', reverse('привет') = 'тевирп'; 10 | select '04', reverse("𐌀𐌁𐌂") = '𐌂𐌁𐌀'; 11 | select '05', reverse('hello 42@ world') = 'dlrow @24 olleh'; 12 | 13 | -- Extract part from string 14 | select '11', split_part(NULL, ',', 2) is NULL; 15 | select '12', split_part('', ',', 2) = ''; 16 | select '13', split_part('one,two,three', ',', 2) = 'two'; 17 | select '14', split_part('one|two|three', '|', 2) = 'two'; 18 | select '15', split_part('один,два,три', ',', 2) = 'два'; 19 | select '16', split_part('one,two,three', ',', 10) = ''; 20 | select '17', split_part('one,two,three', ';', 2) = ''; 21 | select '18', split_part('one,two,three', '', 1) = 'one,two,three'; 22 | select '19', split_part('one,two,three', NULL, 2) is NULL; 23 | select '20', split_part('one,,,four', ',', 2) = ''; 24 | select '21', split_part('one,,,four', ',', 4) = 'four'; 25 | select '22', split_part('one/two|three', '/|', 2) = 'two'; -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021+ Anton Zhiyanov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/crypto/md5.h: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * Filename: md5.h 3 | * Author: Brad Conte (brad AT bradconte.com) 4 | * Source: https://github.com/B-Con/crypto-algorithms 5 | * License: Public Domain 6 | * Details: Defines the API for the corresponding MD5 implementation. 7 | *********************************************************************/ 8 | 9 | #ifndef MD5_H 10 | #define MD5_H 11 | 12 | /*************************** HEADER FILES ***************************/ 13 | #include 14 | 15 | /****************************** MACROS ******************************/ 16 | #define MD5_BLOCK_SIZE 16 // MD5 outputs a 16 byte digest 17 | 18 | /**************************** DATA TYPES ****************************/ 19 | typedef unsigned char BYTE; // 8-bit byte 20 | typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines 21 | 22 | typedef struct { 23 | BYTE data[64]; 24 | WORD datalen; 25 | unsigned long long bitlen; 26 | WORD state[4]; 27 | } MD5_CTX; 28 | 29 | /*********************** FUNCTION DECLARATIONS **********************/ 30 | void* md5_init(); 31 | void md5_update(MD5_CTX* ctx, const BYTE data[], size_t len); 32 | int md5_final(MD5_CTX* ctx, BYTE hash[]); 33 | 34 | #endif // MD5_H 35 | -------------------------------------------------------------------------------- /test/uuid.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/uuid 5 | 6 | -- uuid4 7 | select '01', uuid4() like '________-____-4___-____-____________'; 8 | select '02', gen_random_uuid() like '________-____-4___-____-____________'; 9 | 10 | -- uuid_str 11 | select '03', uuid_str('d5a80b20-0d8f-11e5-b8cb-080027b6ec40') = 'd5a80b20-0d8f-11e5-b8cb-080027b6ec40'; 12 | select '04', uuid_str('d5a80b200d8f11e5b8cb080027b6ec40') = 'd5a80b20-0d8f-11e5-b8cb-080027b6ec40'; 13 | select '05', uuid_str('{d5a80b20-0d8f-11e5-b8cb-080027b6ec40}') = 'd5a80b20-0d8f-11e5-b8cb-080027b6ec40'; 14 | select '06', uuid_str('D5A80B20-0D8F-11E5-B8CB-080027B6EC40') = 'd5a80b20-0d8f-11e5-b8cb-080027b6ec40'; 15 | select '07', uuid_str(randomblob(16)) like '________-____-____-____-____________'; 16 | select '08', uuid_str(uuid4()) like '________-____-4___-____-____________'; 17 | select '09', uuid_str('hello') is null; 18 | select '10', uuid_str('') is null; 19 | select '11', uuid_str(null) is null; 20 | 21 | -- uuid_blob 22 | select '12', typeof(uuid_blob('d5a80b20-0d8f-11e5-b8cb-080027b6ec40')) = 'blob'; 23 | select '13', typeof(uuid_blob(uuid4())) = 'blob'; 24 | select '14', uuid_blob('hello') is null; 25 | select '15', uuid_blob('') is null; 26 | select '16', uuid_blob(null) is null; -------------------------------------------------------------------------------- /docs/ipaddr.md: -------------------------------------------------------------------------------- 1 | # ipaddr: IP address manipulation in SQLite 2 | 3 | Functions to manipulate IPs and subnets. Created by [Vincent Bernat](https://github.com/vincentbernat). 4 | 5 | ⚠️ This extension is not available on Windows. 6 | 7 | ### `ipfamily(ip)` 8 | 9 | Returns the family of a specified IP address. 10 | 11 | ``` 12 | sqlite> select ipfamily('192.168.1.1'); 13 | 4 14 | ``` 15 | 16 | ### `iphost(ip)` 17 | 18 | Returns the host part of an IP address. 19 | 20 | ``` 21 | sqlite> select iphost('2001:db8::123/64'); 22 | 2001:db8::123 23 | ``` 24 | 25 | ### `ipmasklen(ip)` 26 | 27 | Returns the prefix length of an IP address. 28 | 29 | ``` 30 | sqlite> select ipmasklen('192.168.16.12/24'); 31 | 24 32 | ``` 33 | 34 | ### `ipnetwork(ip)` 35 | 36 | Returns the network part of an IP address. 37 | 38 | ``` 39 | sqlite> select ipnetwork('192.168.16.12/24'); 40 | 192.168.16.0/24 41 | ``` 42 | 43 | ### `ipcontains(subnet, ip)` 44 | 45 | Returns `1` if `subnet` contains `ip` (which can be another subnet). 46 | `0` otherwise. 47 | 48 | ``` 49 | sqlite> select ipcontains('192.168.16.0/24', '192.168.16.3'); 50 | 1 51 | ``` 52 | 53 | ## Usage 54 | 55 | ``` 56 | sqlite> .load ./ipaddr 57 | sqlite> select ipfamily('2001:db8::1'); 58 | 6 59 | ``` 60 | 61 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 62 | -------------------------------------------------------------------------------- /docs/uuid.md: -------------------------------------------------------------------------------- 1 | # uuid: Universally Unique IDentifiers (UUIDs) in SQLite 2 | 3 | Limited support for [RFC 4122](https://www.ietf.org/rfc/rfc4122.txt) compliant UUIDs: 4 | 5 | - Generate a version 4 (random) UUID. 6 | - Convert a 16-byte blob into a well-formed UUID string and vice versa. 7 | 8 | Adapted from [uuid.c](https://sqlite.org/src/file/ext/misc/uuid.c) by D. Richard Hipp. 9 | 10 | Provides following functions: 11 | 12 | ### `uuid4()` 13 | 14 | Generates a version 4 (random) UUID as a string. Aliased as `gen_random_uuid()` for PostgreSQL compatibility. 15 | 16 | ``` 17 | sqlite> select uuid4(); 18 | c476b6e9-35f1-4afd-9552-704cd7edbe27 19 | 20 | sqlite> select gen_random_uuid(); 21 | 8d144638-3baf-4901-a554-b541142c152b 22 | ``` 23 | 24 | ### `uuid_str(X)` 25 | 26 | Converts a UUID `X` into a well-formed UUID string. `X` can be either a string or a blob. 27 | 28 | ``` 29 | sqlite> select uuid_str(randomblob(16)); 30 | fb6f9675-7509-d8b7-0891-00d4e6230894 31 | ``` 32 | 33 | ### `uuid_blob(X)` 34 | 35 | Converts a UUID `X` into a 16-byte blob. X can be either a string or a blob. 36 | 37 | ``` 38 | sqlite> select hex(uuid_blob(uuid4())); 39 | 7192B1B452964E809500CF0364476CD3 40 | ``` 41 | 42 | ## Usage 43 | 44 | ``` 45 | sqlite> .load ./uuid 46 | sqlite> select uuid4(); 47 | ``` 48 | 49 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 50 | -------------------------------------------------------------------------------- /docs/third-party.md: -------------------------------------------------------------------------------- 1 | # Third-party authors 2 | 3 | SQLean relies heavily on third-party SQLite extensions and open source libraries. Some of them are public domain, others use free permissive licenses. SQLean does not use code distributed under copyleft or non-free licenses. 4 | 5 | | Library | Author | License | 6 | | ------- | ------ | ------- | 7 | | [crypto-algorithms](https://github.com/B-Con/crypto-algorithms) | Brad Conte | Public Domain | 8 | | extension-functions.c | Liam Healy | Public Domain | 9 | | [fileio.c](https://www.sqlite.org/src/file/ext/misc/fileio.c) | D. Richard Hipp | Public Domain | 10 | | [libstrcmp](https://github.com/Rostepher/libstrcmp) | Ross Bayer | MIT License | 11 | | [percentile.c](https://sqlite.org/src/file/ext/misc/percentile.c) | D. Richard Hipp | Public Domain | 12 | | [regexp.old](https://github.com/garyhouston/regexp.old) | Henry Spencer | Spencer License 94 | 13 | | [series.c](https://sqlite.org/src/file/ext/misc/series.c) | D. Richard Hipp | Public Domain | 14 | | [sha1.c](https://sqlite.org/src/file/ext/misc/sha1.c) | D. Richard Hipp | Public Domain | 15 | | sha2.c | [Aaron D. Gifford](https://aarongifford.com/) | 3-Clause BSD License | 16 | | [sqlite3_unicode](https://github.com/Zensey/sqlite3_unicode) | Unknow Author | Public Domain | 17 | | [uuid.c](https://sqlite.org/src/file/ext/misc/uuid.c) | D. Richard Hipp | Public Domain | 18 | | [vsv.c](http://www.dessus.com/files/vsv.c) | Keith Medcalf | Public Domain | 19 | -------------------------------------------------------------------------------- /src/fuzzy/hamming.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Ross Bayer, MIT License 2 | // https://github.com/Rostepher/libstrcmp 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "common.h" 9 | 10 | /// Computes and returns the hamming distance between two strings. Both strings 11 | /// must have the same length and not be NULL. More information about the 12 | /// algorithm can be found here: 13 | /// http://en.wikipedia.org/wiki/Hamming_distance 14 | /// 15 | /// @param str1 first non NULL string 16 | /// @param str2 second non NULL string 17 | /// 18 | /// @returns hamming distance or -1 if str1 and st2 did not have the same 19 | /// length or if one or both str1 and str2 were NULL 20 | int hamming(const char* str1, const char* str2) { 21 | // strings cannot be NULL 22 | assert(str1 != NULL); 23 | assert(str2 != NULL); 24 | 25 | size_t str1_len = strlen(str1); 26 | size_t str2_len = strlen(str2); 27 | 28 | // handle cases where strings have different lengths 29 | if (str1_len != str2_len) { 30 | return -1; 31 | } 32 | 33 | // return 0 if strings are both empty, but not NULL 34 | if (str1_len == 0 && str2_len == 0) { 35 | return 0; 36 | } 37 | 38 | int dist = 0; 39 | while (str1_len > 0 && str2_len > 0) { 40 | dist += (NOT_EQ(*str1, *str2)); 41 | str1++, str2++; 42 | str1_len--, str2_len--; 43 | } 44 | 45 | return dist; 46 | } -------------------------------------------------------------------------------- /test/crypto.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/crypto 5 | 6 | select '01', md5(null) is NULL; 7 | select '02', hex(md5('')) = upper('d41d8cd98f00b204e9800998ecf8427e'); 8 | select '03', hex(md5('abc')) = upper('900150983cd24fb0d6963f7d28e17f72'); 9 | 10 | select '11', sha1(null) is NULL; 11 | select '12', hex(sha1('')) = upper('da39a3ee5e6b4b0d3255bfef95601890afd80709'); 12 | select '13', hex(sha1('abc')) = upper('a9993e364706816aba3e25717850c26c9cd0d89d'); 13 | 14 | select '21', sha256(null) is NULL; 15 | select '22', hex(sha256('')) = upper('e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'); 16 | select '23', hex(sha256('abc')) = upper('ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad'); 17 | 18 | select '31', sha384(null) is NULL; 19 | select '32', hex(sha384('')) = upper('38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b'); 20 | select '33', hex(sha384('abc')) = upper('cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed8086072ba1e7cc2358baeca134c825a7'); 21 | 22 | select '41', sha512(null) is NULL; 23 | select '42', hex(sha512('')) = upper('cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'); 24 | select '43', hex(sha512('abc')) = upper('ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f'); 25 | -------------------------------------------------------------------------------- /test/json1.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/stats 5 | .load dist/json1 6 | 7 | -- total of 17 functions 8 | 9 | select '01', json('{"answer" : 42}') = '{"answer":42}'; 10 | select '02', json_array(1, 2, 3, 4) = '[1,2,3,4]'; 11 | select '03', json_array_length('[1,2,3,4]') = 4; 12 | select '04', json_object('answer', 42) = '{"answer":42}'; 13 | 14 | select '11', json_extract('{"answer":42}', '$.answer') = 42; 15 | select '12', json_insert('[1,2,3]', '$[#]', 42) = '[1,2,3,42]'; 16 | select '13', json_replace('{"answer":42}', '$.answer', 'no') = '{"answer":"no"}'; 17 | select '14', json_set('{"answer":42}', '$.useful', false) = '{"answer":42,"useful":0}'; 18 | select '15', json_patch('{"a":1,"b":2,"c":3}', '{"b":10,"d":11}') = '{"a":1,"b":10,"c":3,"d":11}'; 19 | select '16', json_remove('{"answer":42,"useful":0}', '$.useful') = '{"answer":42}'; 20 | 21 | select '21', json_type('{"answer":42}') = 'object'; 22 | select '22', json_valid('{"answer":42}') = 1; 23 | select '23', json_quote('answer') = '"answer"'; 24 | 25 | select '31', json_group_array(value) = '[1,2,3,4]' from generate_series(1,4); 26 | select '32', json_group_object('v', value) = '{"v":1,"v":2,"v":3,"v":4}' from generate_series(1,4); 27 | 28 | select '41', sum(value) = 10 from json_each('[1,2,3,4]'); 29 | select '42', sum(value) = 10 from json_each('{"a":[1,2,3,4]}', '$.a'); 30 | select '43', count(*) = 6 from json_tree('{"a":[1,2,3,4]}'); 31 | select '44', count(*) = 5 from json_tree('{"a":[1,2,3,4]}', '$.a'); 32 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | paths: 7 | - .github/** 8 | - src/** 9 | - test/** 10 | - Makefile 11 | pull_request: 12 | branches: [main] 13 | workflow_dispatch: 14 | 15 | env: 16 | SQLITE_RELEASE_YEAR: "2021" 17 | SQLITE_VERSION: "3360000" 18 | SQLITE_BRANCH: "3.36" 19 | 20 | jobs: 21 | build: 22 | name: Build for ${{ matrix.os }} 23 | runs-on: ${{ matrix.os }} 24 | strategy: 25 | matrix: 26 | include: 27 | - os: ubuntu-latest 28 | - os: windows-latest 29 | - os: macos-latest 30 | 31 | steps: 32 | - uses: actions/checkout@v2 33 | 34 | - name: Download SQLite sources 35 | shell: bash 36 | run: | 37 | make prepare-dist 38 | make download-sqlite 39 | make download-external 40 | 41 | - name: Build for Linux 42 | if: matrix.os == 'ubuntu-latest' 43 | run: | 44 | make compile-linux 45 | make test-all 46 | 47 | - name: Build for Windows 48 | if: matrix.os == 'windows-latest' 49 | shell: bash 50 | run: make compile-windows 51 | 52 | - name: Build for macOS 53 | if: matrix.os == 'macos-latest' 54 | run: make compile-macos 55 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | workflow_dispatch: 8 | 9 | env: 10 | SQLITE_RELEASE_YEAR: "2021" 11 | SQLITE_VERSION: "3360000" 12 | SQLITE_BRANCH: "3.36" 13 | 14 | jobs: 15 | publish: 16 | name: Publish for ${{ matrix.os }} 17 | runs-on: ${{ matrix.os }} 18 | strategy: 19 | matrix: 20 | include: 21 | - os: ubuntu-latest 22 | - os: windows-latest 23 | - os: macos-latest 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | 28 | - name: Download SQLite sources 29 | shell: bash 30 | run: | 31 | make prepare-dist 32 | make download-sqlite 33 | make download-external 34 | 35 | - name: Build for Linux 36 | if: matrix.os == 'ubuntu-latest' 37 | run: make compile-linux 38 | 39 | - name: Build for Windows 40 | if: matrix.os == 'windows-latest' 41 | shell: bash 42 | run: make compile-windows 43 | 44 | - name: Build for macOS 45 | if: matrix.os == 'macos-latest' 46 | run: make compile-macos 47 | 48 | - name: Upload binaries to release 49 | uses: svenstaro/upload-release-action@v2 50 | with: 51 | repo_token: ${{ secrets.GITHUB_TOKEN }} 52 | file: dist/* 53 | file_glob: true 54 | tag: ${{ github.ref }} 55 | -------------------------------------------------------------------------------- /test/stats.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/stats 5 | 6 | select '01', percentile(value, 25) = 25.5 from generate_series(1, 99); 7 | select '02', percentile_25(value) = 25.5 from generate_series(1, 99); 8 | 9 | select '03', percentile(value, 50) = 50 from generate_series(1, 99); 10 | select '04', median(value) = 50 from generate_series(1, 99); 11 | 12 | select '05', percentile(value, 75) = 74.5 from generate_series(1, 99); 13 | select '06', percentile_75(value) = 74.5 from generate_series(1, 99); 14 | 15 | select '07', percentile(value, 90) = 89.2 from generate_series(1, 99); 16 | select '08', percentile_90(value) = 89.2 from generate_series(1, 99); 17 | 18 | select '09', percentile(value, 95) = 95.05 from generate_series(1, 100); 19 | select '10', percentile_95(value) = 95.05 from generate_series(1, 100); 20 | 21 | select '11', percentile(value, 99) = 98.02 from generate_series(1, 99); 22 | select '12', percentile_99(value) = 98.02 from generate_series(1, 99); 23 | 24 | select '21', round(stddev(value), 1) = 28.7 from generate_series(1, 99); 25 | select '22', round(stddev_samp(value), 1) = 28.7 from generate_series(1, 99); 26 | select '23', round(stddev_pop(value), 1) = 28.6 from generate_series(1, 99); 27 | 28 | select '31', variance(value) = 825 from generate_series(1, 99); 29 | select '32', var_samp(value) = 825 from generate_series(1, 99); 30 | select '33', round(var_pop(value), 0) = 817 from generate_series(1, 99); 31 | 32 | select '41', (count(*), min(value), max(value)) = (99, 1, 99) from generate_series(1, 99); 33 | select '42', (count(*), min(value), max(value)) = (20, 0, 95) from generate_series(0, 99, 5); 34 | with tmp as (select * from generate_series(20) limit 10) 35 | select '43', (count(*), min(value), max(value)) = (10, 20, 29) from tmp; -------------------------------------------------------------------------------- /src/re.h: -------------------------------------------------------------------------------- 1 | // Originally by Henry Spencer, Spencer License 94 2 | // https://github.com/garyhouston/regexp.old 3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License 4 | 5 | /* 6 | * Copyright (c) 1986, 1993, 1995 by University of Toronto. 7 | * Written by Henry Spencer. Not derived from licensed software. 8 | * 9 | * Permission is granted to anyone to use this software for any 10 | * purpose on any computer system, and to redistribute it in any way, 11 | * subject to the following restrictions: 12 | * 13 | * 1. The author is not responsible for the consequences of use of 14 | * this software, no matter how awful, even if they arise 15 | * from defects in it. 16 | * 17 | * 2. The origin of this software must not be misrepresented, either 18 | * by explicit claim or by omission. 19 | * 20 | * 3. Altered versions must be plainly marked as such, and must not 21 | * be misrepresented (by explicit claim or omission) as being 22 | * the original software. 23 | * 24 | * 4. This notice must not be removed or altered. 25 | */ 26 | 27 | /* 28 | * Definitions etc. for regexp(3) routines. 29 | * 30 | * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], 31 | * not the System V one. 32 | */ 33 | #define NSUBEXP 10 34 | typedef struct regexp { 35 | char* startp[NSUBEXP]; 36 | char* endp[NSUBEXP]; 37 | char regstart; /* Internal use only. */ 38 | char reganch; /* Internal use only. */ 39 | char* regmust; /* Internal use only. */ 40 | int regmlen; /* Internal use only. */ 41 | char program[1]; /* Unwarranted chumminess with compiler. */ 42 | } regexp; 43 | 44 | regexp* re_compile(const char* re); 45 | int re_execute(regexp* rp, const char* s); 46 | int re_substitute(const regexp* rp, const char* src, char* dst); 47 | void re_error(char* message); 48 | -------------------------------------------------------------------------------- /src/fuzzy/common.h: -------------------------------------------------------------------------------- 1 | // Adapted from the spellfix SQLite exension, Public Domain 2 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c 3 | 4 | #ifndef COMMON_H 5 | #define COMMON_H 6 | 7 | /* 8 | ** Character classes for ASCII characters: 9 | ** 10 | ** 0 '' Silent letters: H W 11 | ** 1 'A' Any vowel: A E I O U (Y) 12 | ** 2 'B' A bilabeal stop or fricative: B F P V W 13 | ** 3 'C' Other fricatives or back stops: C G J K Q S X Z 14 | ** 4 'D' Alveolar stops: D T 15 | ** 5 'H' Letter H at the beginning of a word 16 | ** 6 'L' Glide: L 17 | ** 7 'R' Semivowel: R 18 | ** 8 'M' Nasals: M N 19 | ** 9 'Y' Letter Y at the beginning of a word. 20 | ** 10 '9' Digits: 0 1 2 3 4 5 6 7 8 9 21 | ** 11 ' ' White space 22 | ** 12 '?' Other. 23 | */ 24 | #define CCLASS_SILENT 0 25 | #define CCLASS_VOWEL 1 26 | #define CCLASS_B 2 27 | #define CCLASS_C 3 28 | #define CCLASS_D 4 29 | #define CCLASS_H 5 30 | #define CCLASS_L 6 31 | #define CCLASS_R 7 32 | #define CCLASS_M 8 33 | #define CCLASS_Y 9 34 | #define CCLASS_DIGIT 10 35 | #define CCLASS_SPACE 11 36 | #define CCLASS_OTHER 12 37 | 38 | #define SCRIPT_LATIN 0x0001 39 | #define SCRIPT_CYRILLIC 0x0002 40 | #define SCRIPT_GREEK 0x0004 41 | #define SCRIPT_HEBREW 0x0008 42 | #define SCRIPT_ARABIC 0x0010 43 | 44 | #define ALWAYS(X) 1 45 | #define NEVER(X) 0 46 | 47 | // Copyright (c) 2014 Ross Bayer, MIT License 48 | // https://github.com/Rostepher/libstrcmp 49 | 50 | #define EQ(a, b) ((a) == (b)) 51 | #define NOT_EQ(a, b) !EQ(a, b) 52 | 53 | #define MIN(a, b) ((a) < (b)) ? (a) : (b) 54 | #define MIN3(a, b, c) MIN(MIN(a, b), c) 55 | #define MIN4(a, b, c, d) MIN(MIN(a, b), MIN(c, d)) 56 | 57 | #define MAX(a, b) ((a) > (b)) ? (a) : (b) 58 | #define MAX3(a, b, c) MAX(MAX(a, b), c) 59 | #define MAX4(a, b, c, d) MAX(MAX(a, b), MAX(b, c)) 60 | 61 | #endif -------------------------------------------------------------------------------- /test/ipaddr.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Vincent Bernat, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/ipaddr 5 | select '00', ipfamily('192.168.16.12') = 4; 6 | select '01', ipfamily('192.168.16.12/24') = 4; 7 | select '02', ipfamily('2001:db8::1') = 6; 8 | select '03', ipfamily('2001:db8::1/64') = 6; 9 | select '04', iphost('192.168.160.120') = '192.168.160.120'; 10 | select '05', iphost('192.168.16.12/24') = '192.168.16.12'; 11 | select '06', iphost('2001:db8::1/64') = '2001:db8::1'; 12 | select '07', iphost('2001:db8::1') = '2001:db8::1'; 13 | select '08', ipmasklen('192.168.16.12') = 32; 14 | select '09', ipmasklen('192.168.16.12/24') = 24; 15 | select '10', ipmasklen('2001:db8::1/64') = 64; 16 | select '11', ipmasklen('2001:db8::1') = 128; 17 | select '12', ipnetwork('192.168.160.120/24') = '192.168.160.0/24'; 18 | select '13', ipnetwork('192.168.160.128/26') = '192.168.160.128/26'; 19 | select '14', ipnetwork('192.168.160.120') = '192.168.160.120/32'; 20 | select '15', ipnetwork('2001:db8::1/64') = '2001:db8::/64'; 21 | select '16', ipnetwork('2001:db8::1') = '2001:db8::1/128'; 22 | select '17', ipnetwork('2001:db8:1::1/48') = '2001:db8:1::/48'; 23 | select '18', ipnetwork('2001:db8:1::1/47') = '2001:db8::/47'; 24 | select '19', ipcontains('192.168.16.0/24', '192.168.16.3') = 1; 25 | select '20', ipcontains('192.168.15.0/24', '192.168.16.3') = 0; 26 | select '21', ipcontains('2001:db8::/64', '2001:db8::17') = 1; 27 | select '22', ipcontains('2001:db8:1::/64', '2001:db8::17') = 0; 28 | select '23', ipcontains('192.168.16.0/24', '192.168.16.0/26') = 1; 29 | select '24', ipcontains('192.168.16.0/27', '192.168.16.0/26') = 0; 30 | select '25', ipcontains('192.168.16.0/25', '192.168.16.128/26') = 0; 31 | select '26', ipcontains('2001:db8::/48', '2001:db8::/64') = 1; 32 | select '27', ipcontains('2001:db8::/56', '2001:db8::/48') = 0; 33 | select '28', ipcontains('2001:db8::/56', '2001:db8:1::/64') = 0; 34 | -------------------------------------------------------------------------------- /docs/unicode.md: -------------------------------------------------------------------------------- 1 | # unicode: Unicode support for SQLite 2 | 3 | Implements case-insensitive string comparison for Unicode strings. Has no external dependencies (like libicu). Adapted from [sqlite3_unicode](https://github.com/Zensey/sqlite3_unicode). 4 | 5 | Provides the following unicode features: 6 | 7 | - `upper()` and `lower()` functions to normalize case. 8 | - `like()` function and `LIKE` operator with case-independent matching. 9 | - `unaccent()` function to normalize strings by removing accents. 10 | 11 | Tries to override the default NOCASE case-insensitive collation sequence to support UTF-8 characters (available in SQLite CLI and C API only). 12 | 13 | ### Upper and lower 14 | 15 | ``` 16 | sqlite> select upper('привет'); 17 | ПРИВЕТ 18 | sqlite> select nupper('привет'); 19 | ПРИВЕТ 20 | ``` 21 | 22 | `nupper()` is an alias for `upper()` in case the latter is already overridden by some other extension. 23 | 24 | ``` 25 | sqlite> select lower('ПРИВЕТ'); 26 | привет 27 | sqlite> select nlower('ПРИВЕТ'); 28 | привет 29 | ``` 30 | 31 | `nlower()` is an alias for `lower()` in case the latter is already overridden by some other extension. 32 | 33 | ### Case-insensitive LIKE 34 | 35 | The pattern in `like()` function goes first: 36 | 37 | ``` 38 | sqlite> select like('пРиВ_Т', 'привет'); 39 | 1 40 | ``` 41 | 42 | The pattern in `LIKE` operator goes second: 43 | 44 | ``` 45 | sqlite> select 'привет' like 'пРиВ_Т'; 46 | 1 47 | ``` 48 | 49 | ### Unaccent 50 | 51 | ``` 52 | sqlite> select unaccent('hôtel'); 53 | hotel 54 | ``` 55 | 56 | ## Usage 57 | 58 | Before: 59 | 60 | ``` 61 | sqlite> select upper('hello'); 62 | HELLO 63 | sqlite> select upper('привет'); 64 | привет 65 | ``` 66 | 67 | After: 68 | 69 | ``` 70 | sqlite> .load ./unicode 71 | sqlite> select upper('hello'); 72 | HELLO 73 | sqlite> select upper('привет'); 74 | ПРИВЕТ 75 | ``` 76 | 77 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 78 | -------------------------------------------------------------------------------- /docs/stats.md: -------------------------------------------------------------------------------- 1 | # stats: Mathematical statistics in SQLite 2 | 3 | Common statistical functions. Adapted from [extension-functions.c](https://sqlite.org/contrib/) by Liam Healy, [percentile.c](https://sqlite.org/src/file/ext/misc/percentile.c) and [series.c](https://sqlite.org/src/file/ext/misc/series.c) by D. Richard Hipp. 4 | 5 | ### Aggregate functions 6 | 7 | - `median(x)` — median (50th percentile), 8 | - `percentile_25(x)` — 25th percentile, 9 | - `percentile_75(x)` — 75th percentile, 10 | - `percentile_90(x)` — 90th percentile, 11 | - `percentile_95(x)` — 95th percentile, 12 | - `percentile_99(x)` — 99th percentile, 13 | - `percentile(x, perc)` — custom percentile (`perc` between 0 and 100), 14 | - `stddev(x)` or `stddev_samp(x)` — sample standard deviation, 15 | - `stddev_pop(x)` — population standard deviation, 16 | - `variance(x)` or `var_samp(x)` — sample variance, 17 | - `var_pop(x)` — population variance. 18 | 19 | ### generate_series(start[, stop[, step]]) 20 | 21 | This table-valued function generates a sequence of integer values starting with `start`, ending with `stop` (inclusive) with an optional `step`. 22 | 23 | Generate all integers from 1 to 99: 24 | 25 | ```sql 26 | select * from generate_series(1, 99); 27 | ``` 28 | 29 | Generate all multiples of 5 less than or equal to 100: 30 | 31 | ```sql 32 | select * from generate_series(5, 100, 5); 33 | ``` 34 | 35 | Generate 20 random integer values: 36 | 37 | ```sql 38 | select random() from generate_series(1, 20); 39 | ``` 40 | 41 | The `generate_series()` table has a single result column named `value` holding integer values, and a number of rows determined by the parameters `start`, `stop`, and `step`. The first row of the table has a value of `start`. Subsequent rows increase by `step` up to `stop`. 42 | 43 | `stop` defaults to 9223372036854775807. `step` defaults to 1. 44 | 45 | ## Usage 46 | 47 | ``` 48 | sqlite> .load ./stats 49 | sqlite> select median(value) from generate_series(1, 99); 50 | ``` 51 | 52 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 53 | -------------------------------------------------------------------------------- /test/fileio.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/fileio 5 | 6 | -- lsdir 7 | select '01', (name, mode, size) = ('LICENSE', 33188, 1108) from lsdir('LICENSE'); 8 | select '02', count(*) >= 10 from lsdir('test'); 9 | select '03', count(*) = 0 from lsdir('whatever.txt'); 10 | .shell mkdir parentdir 11 | .shell touch parentdir/parent.txt 12 | .shell mkdir parentdir/subdir 13 | .shell touch parentdir/subdir/child.txt 14 | select '04', count(*) = 3 from lsdir('parentdir'); 15 | select '05', count(*) = 3 from lsdir('parentdir', false); 16 | select '06', count(*) = 4 from lsdir('parentdir', true); 17 | .shell rm -rf parentdir 18 | 19 | -- lsmode 20 | select '11', lsmode(16877) = 'drwxr-xr-x'; 21 | select '12', lsmode(33206) = '-rw-rw-rw-'; 22 | select '13', lsmode(33188) = '-rw-r--r--'; 23 | select '14', lsmode(384) = '?rw-------'; 24 | select '15', lsmode(420) = '?rw-r--r--'; 25 | select '16', lsmode(436) = '?rw-rw-r--'; 26 | select '17', lsmode(438) = '?rw-rw-rw-'; 27 | select '18', lsmode(493) = '?rwxr-xr-x'; 28 | select '19', lsmode(511) = '?rwxrwxrwx'; 29 | 30 | -- mkdir 31 | .shell rm -rf hellodir 32 | select '21', mkdir('hellodir') is null; 33 | select '22', (name, mode) = ('hellodir', 16877) from fsdir('hellodir'); 34 | 35 | -- readfile 36 | .shell rm -f hello.txt 37 | .shell printf 'hello world' > hello.txt 38 | select '31', typeof(readfile('hello.txt')) = 'blob'; 39 | select '32', length(readfile('hello.txt')) = 11; 40 | select '33', readfile('whatever') is null; 41 | 42 | -- symlink 43 | .shell rm -f hello.txt 44 | .shell printf 'hello world' > hello.txt 45 | select '41', symlink('hello.txt', 'hello.lnk') is null; 46 | select '42', length(readfile('hello.lnk')) = 11; 47 | 48 | -- writefile 49 | .shell rm -f hello.txt 50 | select '51', writefile('hello.txt', 'hello world') = 11; 51 | select '52', (name, mode) = ('hello.txt', 33206) from fsdir('hello.txt'); 52 | select '53', writefile('hello.txt', 'hello world', 420) = 11; 53 | select '54', (name, mode) = ('hello.txt', 33188) from fsdir('hello.txt'); 54 | 55 | .shell rm -rf hellodir 56 | .shell rm -f hello.txt 57 | .shell rm -f hello.lnk 58 | -------------------------------------------------------------------------------- /test/math.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/math 5 | 6 | -- total of 30 functions 7 | 8 | -- rounding (4) 9 | select '01', ceil(3.3) = 4; 10 | select '02', ceil(-3.9) = -3; 11 | select '03', ceiling(3.3) = 4; 12 | select '04', ceiling(-3.9) = -3; 13 | select '05', floor(3.9) = 3; 14 | select '06', floor(-3.9) = -4; 15 | select '07', trunc(3.3) = 3; 16 | select '08', trunc(3.9) = 3; 17 | select '09', trunc(-3.3) = -3; 18 | select '10', trunc(-3.9) = -3; 19 | 20 | -- log (5) 21 | select '11', round(ln(2.71828*2.71828)) = 2; 22 | select '12', round(log(100)) = 2; 23 | select '13', round(log10(100)) = 2; 24 | select '14', round(log2(4)) = 2; 25 | select '15', round(log(3,9)) = 2; 26 | 27 | -- power (4) 28 | select '16', round(exp(2), 3) = round(2.71828*2.71828, 3); 29 | select '17', pow(2, 10) = 1024; 30 | select '18', power(2, 10) = 1024; 31 | select '19', sqrt(100) = 10; 32 | 33 | -- trigonometric (3) 34 | select '21', cos(0) = 1; 35 | select '22', round(cos(pi()/2)) = 0; 36 | select '23', sin(0) = 0; 37 | select '24', round(sin(pi()/2)) = 1; 38 | select '25', tan(0) = 0; 39 | select '26', round(tan(0.8)) = 1; 40 | 41 | -- hyperbolic (3) 42 | select '31', cosh(0) = 1; 43 | select '32', round(cosh(2.07)) = 4; 44 | select '33', sinh(0) = 0; 45 | select '34', round(sinh(2.1)) = 4; 46 | select '35', tanh(0) = 0; 47 | select '36', round(tanh(3)) = 1; 48 | 49 | -- inverse trigonometric (4) 50 | select '41', acos(1) = 0; 51 | select '42', round(acos(0), 2) = round(pi()/2, 2); 52 | select '43', asin(0) = 0; 53 | select '44', round(asin(1), 2) = round(0.5*pi(), 2); 54 | select '45', atan(0) = 0; 55 | select '46', round(atan(pi()/2)) = 1; 56 | select '47', round(atan2(1, 2), 2) = 0.46; 57 | select '48', round(atan2(pi(), 2)) = 1; 58 | 59 | -- inverse hyperbolic (3) 60 | select '51', acosh(1) = 0; 61 | select '52', round(acosh(4)) = 2; 62 | select '53', asinh(0) = 0; 63 | select '54', round(asinh(4)) = 2; 64 | select '55', atanh(0) = 0; 65 | select '56', round(atanh(0.8)) = 1; 66 | 67 | -- angular measures (2) 68 | select '61', radians(0) = 0; 69 | select '62', round(radians(180), 2) = round(pi(), 2); 70 | select '63', degrees(0) = 0; 71 | select '64', round(degrees(pi())) = 180; 72 | 73 | -- other (2) 74 | select '71', mod(10,3) = 1; 75 | select '72', round(pi(), 5) = 3.14159; -------------------------------------------------------------------------------- /src/fuzzy/levenshtein.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Ross Bayer, MIT License 2 | // https://github.com/Rostepher/libstrcmp 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "common.h" 9 | 10 | /// Calculates and returns the Levenshtein distance of two non NULL strings. 11 | /// More information about the algorithm can be found here: 12 | /// https://en.wikipedia.org/wiki/Levenshtein_distance 13 | /// 14 | /// @param str1 first non NULL string 15 | /// @param str2 second non NULL string 16 | /// 17 | /// @returns the levenshtein distance of str1 and str2 18 | unsigned levenshtein(const char* str1, const char* str2) { 19 | // strings cannot be NULL 20 | assert(str1 != NULL); 21 | assert(str2 != NULL); 22 | 23 | size_t str1_len = strlen(str1); 24 | size_t str2_len = strlen(str2); 25 | 26 | // handle cases where one or both strings are empty 27 | if (str1_len == 0) { 28 | return str2_len; 29 | } 30 | if (str2_len == 0) { 31 | return str1_len; 32 | } 33 | 34 | // remove common substring 35 | while (str1_len > 0 && str2_len > 0 && EQ(str1[0], str2[0])) { 36 | str1++, str2++; 37 | str1_len--, str2_len--; 38 | } 39 | 40 | // declare variables 41 | unsigned row, col; 42 | unsigned last_diag, cur, cost; 43 | 44 | // initialize array to hold values 45 | unsigned* vector = calloc(str1_len + 1, sizeof(unsigned)); 46 | for (col = 1; col <= str1_len; col++) { 47 | vector[col] = col; 48 | } 49 | 50 | // itterate through the imagined rows of arrays 51 | for (row = 1; row <= str2_len + 1; row++) { 52 | vector[0] = row; 53 | last_diag = row - 1; // remember the last first slot 54 | 55 | // itterate throught each member of the vector 56 | for (col = 1; col <= str1_len; col++) { 57 | // remember the diagonal before overwriting the array 58 | cur = vector[col]; 59 | 60 | // calculate the cost 61 | cost = EQ(str1[col - 1], str2[row - 1]) ? 0 : 1; 62 | 63 | // determine min of the possible values 64 | vector[col] = MIN3(vector[col] + 1, vector[col - 1] + 1, last_diag + cost); 65 | 66 | // remember the new last_diag 67 | last_diag = cur; 68 | } 69 | } 70 | 71 | free(vector); 72 | return last_diag; 73 | } -------------------------------------------------------------------------------- /docs/re.md: -------------------------------------------------------------------------------- 1 | # re: Regular expressions in SQLite 2 | 3 | Regexp search and replace functions. 4 | Adapted from [regexp.old](https://github.com/garyhouston/regexp.old) by Henry Spencer. 5 | 6 | Provides following functions: 7 | 8 | ### `REGEXP` statement 9 | 10 | Checks if source string matches pattern. 11 | 12 | ``` 13 | sqlite> select true where 'the year is 2021' regexp '[0-9]+'; 14 | 1 15 | ``` 16 | 17 | ### `regexp_like(source, pattern)` 18 | 19 | Checks if source string matches pattern. 20 | 21 | ``` 22 | sqlite> select regexp_like('the year is 2021', '[0-9]+'); 23 | 1 24 | sqlite> select regexp_like('the year is 2021', '2k21'); 25 | 0 26 | ``` 27 | 28 | ### `regexp_substr(source, pattern)` 29 | 30 | Returns source substring matching pattern. 31 | 32 | ``` 33 | sqlite> select regexp_substr('the year is 2021', '[0-9]+'); 34 | 2021 35 | sqlite> select regexp_substr('the year is 2021', '2k21'); 36 | 37 | ``` 38 | 39 | ### `regexp_replace(source, pattern, replacement)` 40 | 41 | Replaces matching substring with replacement string. 42 | 43 | ``` 44 | sqlite> select regexp_replace('the year is 2021', '[0-9]+', '2050'); 45 | the year is 2050 46 | sqlite> select regexp_replace('the year is 2021', '2k21', '2050'); 47 | the year is 2021 48 | ``` 49 | 50 | Supports backreferences to captured groups `\1` trough `\9` in replacement string: 51 | 52 | ``` 53 | sqlite> select regexp_replace('the year is 2021', '([0-9]+)', '\1 or 2050'); 54 | the year is 2021 or 2050 55 | ``` 56 | 57 | ## Supported syntax 58 | 59 | The following regular expression syntax is supported: 60 | 61 | ``` 62 | X* zero or more occurrences of X 63 | X+ one or more occurrences of X 64 | X? zero or one occurrences of X 65 | (X) match X 66 | X|Y X or Y 67 | ^X X occurring at the beginning of the string 68 | X$ X occurring at the end of the string 69 | . Match any single character 70 | \c Character c where c is one of \{}()[]|*+?. 71 | \c C-language escapes for c in afnrtv. ex: \t or \n 72 | [abc] Any single character from the set abc 73 | [^abc] Any single character not in the set abc 74 | [a-z] Any single character in the range a-z 75 | [^a-z] Any single character not in the range a-z 76 | ``` 77 | 78 | ## Usage 79 | 80 | ``` 81 | sqlite> .load ./re 82 | sqlite> select regexp_like('abcdef', 'b.d'); 83 | ``` 84 | 85 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 86 | -------------------------------------------------------------------------------- /docs/fuzzy.md: -------------------------------------------------------------------------------- 1 | # fuzzy: Fuzzy string matching and phonetics in SQLite 2 | 3 | Fuzzy-matching helpers: 4 | 5 | - Measure distance between two strings. 6 | - Compute phonetic string code. 7 | - Transliterate a string. 8 | 9 | Adapted from [libstrcmp](https://github.com/Rostepher/libstrcmp) by Ross Bayer and [spellfix.c](https://www.sqlite.org/src/file/ext/misc/spellfix.c) by D. Richard Hipp. 10 | 11 | If you want a ready-to-use mechanism to search a large vocabulary for close matches, see the [spellfix](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1002297477) extension. 12 | 13 | ## String distances 14 | 15 | Measure distance between two strings: 16 | 17 | - `dlevenshtein(x, y)` - Damerau-Levenshtein distance, 18 | - `edit_distance(x, y)` - Spellcheck edit distance, 19 | - `hamming(x, y)` - Hamming distance, 20 | - `jaro_winkler(x, y)` - Jaro-Winkler distance, 21 | - `levenshtein(x, y)` - Levenshtein distance, 22 | - `osa_distance(x, y)` - Optimal String Alignment distance. 23 | 24 | ``` 25 | sqlite> select dlevenshtein('awesome', 'aewsme'); 26 | 2 27 | 28 | sqlite> select edit_distance('awesome', 'aewsme'); 29 | 215 30 | 31 | sqlite> select hamming('awesome', 'aewsome'); 32 | 2 33 | 34 | sqlite> select jaro_winkler('awesome', 'aewsme'); 35 | 0.907 36 | 37 | sqlite> select levenshtein('awesome', 'aewsme'); 38 | 3 39 | 40 | sqlite> select osa_distance('awesome', 'aewsme'); 41 | 3 42 | ``` 43 | 44 | Only ASCII strings are supported. 45 | 46 | ## Phonetic codes 47 | 48 | Compute phonetic string code: 49 | 50 | - `caverphone(x)` - Caverphone code, 51 | - `phonetic_hash(x)` - Spellcheck phonetic code, 52 | - `soundex(x)` - Soundex code, 53 | - `rsoundex(x)` - Refined Soundex code. 54 | 55 | ``` 56 | sqlite> select caverphone('awesome'); 57 | AWSM111111 58 | 59 | sqlite> select phonetic_hash('awesome'); 60 | ABACAMA 61 | 62 | sqlite> select soundex('awesome'); 63 | A250 64 | 65 | sqlite> select rsoundex('awesome'); 66 | A03080 67 | ``` 68 | 69 | Only ASCII strings are supported. 70 | 71 | ## Transliteration 72 | 73 | Transliteration converts the input string from UTF-8 into pure ASCII 74 | by converting all non-ASCII characters to some combination of characters 75 | in the ASCII subset. 76 | 77 | Distance and phonetics functions are ASCII-only, so to work 78 | with Unicode string one should transliterate it first. 79 | 80 | ``` 81 | sqlite> select translit('привет'); 82 | privet 83 | ``` 84 | 85 | ## Usage 86 | 87 | ``` 88 | sqlite> .load ./fuzzy 89 | sqlite> select soundex('hello'); 90 | ``` 91 | 92 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 93 | -------------------------------------------------------------------------------- /src/fuzzy/optimal_string_alignment.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Ross Bayer, MIT License 2 | // https://github.com/Rostepher/libstrcmp 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "common.h" 9 | 10 | /// Computes and returns the Optimal String Alignment distance for two non NULL 11 | /// strings. More information about the algorithm can be found here: 12 | /// https://en.wikipedia.org/wiki/Damerau-Levenshtein_distance 13 | /// 14 | /// @param str1 first non NULL string 15 | /// @param str2 second non NULL string 16 | /// 17 | /// @returns optimal string alignment distance for str1 and str2 18 | unsigned optimal_string_alignment(const char* str1, const char* str2) { 19 | // strings cannot be NULL 20 | assert(str1 != NULL); 21 | assert(str2 != NULL); 22 | 23 | size_t str1_len = strlen(str1); 24 | size_t str2_len = strlen(str2); 25 | 26 | // handle cases where one or both strings are empty 27 | if (str1_len == 0) { 28 | return str2_len; 29 | } 30 | if (str2_len == 0) { 31 | return str1_len; 32 | } 33 | 34 | // remove common substring 35 | while (str1_len > 0 && str2_len > 0 && EQ(str1[0], str2[0])) { 36 | str1++, str2++; 37 | str1_len--, str2_len--; 38 | } 39 | 40 | unsigned row, col, cost, result; 41 | 42 | // initialize matrix to hold distance values 43 | unsigned** matrix = malloc((str1_len + 1) * sizeof(unsigned*)); 44 | for (unsigned i = 0; i <= str1_len; i++) { 45 | matrix[i] = calloc((str2_len + 1), sizeof(unsigned)); 46 | } 47 | 48 | // set all the starting values 49 | matrix[0][0] = 0; 50 | for (row = 1; row <= str1_len; row++) { 51 | matrix[row][0] = row; 52 | } 53 | for (col = 1; col <= str2_len; col++) { 54 | matrix[0][col] = col; 55 | } 56 | 57 | // itterate through and fill in the matrix 58 | for (row = 1; row <= str1_len; row++) { 59 | for (col = 1; col <= str2_len; col++) { 60 | cost = EQ(str1[row - 1], str2[col - 1]) ? 0 : 1; 61 | 62 | matrix[row][col] = MIN3(matrix[row - 1][col] + 1, // deletion 63 | matrix[row][col - 1] + 1, // insertion 64 | matrix[row - 1][col - 1] + cost // substitution 65 | ); 66 | 67 | // transpositions 68 | if (row > 1 && col > 1 && EQ(str1[row], str2[col - 1]) && 69 | EQ(str1[row - 1], str2[col])) { 70 | matrix[row][col] = MIN(matrix[row][col], matrix[row - 2][col - 2] + cost); 71 | } 72 | } 73 | } 74 | 75 | result = matrix[str1_len][str2_len]; 76 | 77 | // free allocated memory 78 | for (unsigned i = 0; i < str1_len + 1; i++) { 79 | free(matrix[i]); 80 | } 81 | free(matrix); 82 | 83 | return result; 84 | } -------------------------------------------------------------------------------- /src/fuzzy/phonetic.c: -------------------------------------------------------------------------------- 1 | // Ooriginally from the spellfix SQLite exension, Public Domain 2 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c 3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License 4 | 5 | #include 6 | #include 7 | 8 | #include "common.h" 9 | 10 | extern const unsigned char midClass[]; 11 | extern const unsigned char initClass[]; 12 | extern const unsigned char className[]; 13 | 14 | /* 15 | ** Generate a "phonetic hash" from a string of ASCII characters 16 | ** in zIn[0..nIn-1]. 17 | ** 18 | ** * Map characters by character class as defined above. 19 | ** * Omit double-letters 20 | ** * Omit vowels beside R and L 21 | ** * Omit T when followed by CH 22 | ** * Omit W when followed by R 23 | ** * Omit D when followed by J or G 24 | ** * Omit K in KN or G in GN at the beginning of a word 25 | ** 26 | ** Space to hold the result is obtained from sqlite3_malloc() 27 | ** 28 | ** Return NULL if memory allocation fails. 29 | */ 30 | unsigned char* phonetic_hash(const unsigned char* zIn, int nIn) { 31 | unsigned char* zOut = malloc(nIn + 1); 32 | int i; 33 | int nOut = 0; 34 | char cPrev = 0x77; 35 | char cPrevX = 0x77; 36 | const unsigned char* aClass = initClass; 37 | 38 | if (zOut == 0) 39 | return 0; 40 | if (nIn > 2) { 41 | switch (zIn[0]) { 42 | case 'g': 43 | case 'k': { 44 | if (zIn[1] == 'n') { 45 | zIn++; 46 | nIn--; 47 | } 48 | break; 49 | } 50 | } 51 | } 52 | for (i = 0; i < nIn; i++) { 53 | unsigned char c = zIn[i]; 54 | if (i + 1 < nIn) { 55 | if (c == 'w' && zIn[i + 1] == 'r') 56 | continue; 57 | if (c == 'd' && (zIn[i + 1] == 'j' || zIn[i + 1] == 'g')) 58 | continue; 59 | if (i + 2 < nIn) { 60 | if (c == 't' && zIn[i + 1] == 'c' && zIn[i + 2] == 'h') 61 | continue; 62 | } 63 | } 64 | c = aClass[c & 0x7f]; 65 | if (c == CCLASS_SPACE) 66 | continue; 67 | if (c == CCLASS_OTHER && cPrev != CCLASS_DIGIT) 68 | continue; 69 | aClass = midClass; 70 | if (c == CCLASS_VOWEL && (cPrevX == CCLASS_R || cPrevX == CCLASS_L)) { 71 | continue; /* No vowels beside L or R */ 72 | } 73 | if ((c == CCLASS_R || c == CCLASS_L) && cPrevX == CCLASS_VOWEL) { 74 | nOut--; /* No vowels beside L or R */ 75 | } 76 | cPrev = c; 77 | if (c == CCLASS_SILENT) 78 | continue; 79 | cPrevX = c; 80 | c = className[c]; 81 | assert(nOut >= 0); 82 | if (nOut == 0 || c != zOut[nOut - 1]) 83 | zOut[nOut++] = c; 84 | } 85 | zOut[nOut] = 0; 86 | return zOut; 87 | } -------------------------------------------------------------------------------- /src/fuzzy/soundex.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Ross Bayer, MIT License 2 | // https://github.com/Rostepher/libstrcmp 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "common.h" 10 | 11 | /// Helper function that returns the numeric code for a given char as specified 12 | /// by the soundex algorithm. 13 | /// 14 | /// @param c char to encode 15 | /// 16 | /// @returns char representation of the number associated with the given char 17 | static char encode_char(const char c) { 18 | switch (tolower(c)) { 19 | case 'b': 20 | case 'f': 21 | case 'p': 22 | case 'v': 23 | return '1'; 24 | 25 | case 'c': 26 | case 'g': 27 | case 'j': 28 | case 'k': 29 | case 'q': 30 | case 's': 31 | case 'x': 32 | case 'z': 33 | return '2'; 34 | 35 | case 'd': 36 | case 't': 37 | return '3'; 38 | 39 | case 'l': 40 | return '4'; 41 | 42 | case 'm': 43 | case 'n': 44 | return '5'; 45 | 46 | case 'r': 47 | return '6'; 48 | 49 | default: 50 | break; 51 | } 52 | 53 | return '0'; 54 | } 55 | 56 | /// Computes and returns the soundex representation of a given non NULL string. 57 | /// More information about the algorithm can be found here: 58 | /// https://en.wikipedia.org/wiki/Soundex 59 | /// 60 | /// @param str non NULL string to encode 61 | /// 62 | /// @returns soundex representation of str 63 | char* soundex(const char* str) { 64 | // string cannot be NULL 65 | assert(str != NULL); 66 | 67 | size_t str_len = strlen(str); 68 | 69 | // allocate space for final code and null terminator 70 | char* code = malloc(5 * sizeof(char)); 71 | 72 | // temporary buffer to encode string 73 | char buf[str_len]; 74 | 75 | // set first value to first char in str 76 | code[0] = toupper(str[0]); 77 | 78 | // number of digits in code 79 | unsigned d = 1; 80 | 81 | // encode all chars in str 82 | for (unsigned i = 0; i < str_len; i++) { 83 | buf[i] = encode_char(str[i]); 84 | } 85 | 86 | // add all viable chars to code 87 | for (unsigned i = 1; i < str_len && d < 4; i++) { 88 | // check if current char in buf is not the same as previous char 89 | // and that the current char is not '0' 90 | if (NOT_EQ(buf[i], buf[i - 1]) && NOT_EQ(buf[i], '0')) { 91 | // if digits separated by an 'h' or 'w' are the same, skip them 92 | if (i > 1 && EQ(buf[i], buf[i - 2]) && strchr("hw", str[i - 1])) { 93 | continue; 94 | } 95 | 96 | // add digit to the code 97 | code[d] = buf[i]; 98 | 99 | // increment digit counter 100 | d++; 101 | } 102 | } 103 | 104 | // pad the end of code with '0' if too short 105 | while (d < 4) { 106 | code[d] = '0'; 107 | d++; 108 | } 109 | 110 | // null terminate string 111 | code[d] = '\0'; 112 | 113 | return code; 114 | } -------------------------------------------------------------------------------- /src/fuzzy/refined_soundex.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Ross Bayer, MIT License 2 | // https://github.com/Rostepher/libstrcmp 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "common.h" 10 | 11 | /// Helper function that returns the numeric code for a given char as specified 12 | /// by the refined soundex algorithm. 13 | /// 14 | /// @param c char to encode 15 | /// 16 | /// @returns char representation of the number associated with the given char 17 | static char encode_char(const char c) { 18 | switch (tolower(c)) { 19 | case 'b': 20 | case 'p': 21 | return '1'; 22 | 23 | case 'f': 24 | case 'v': 25 | return '2'; 26 | 27 | case 'c': 28 | case 'k': 29 | case 's': 30 | return '3'; 31 | 32 | case 'g': 33 | case 'j': 34 | return '4'; 35 | 36 | case 'q': 37 | case 'x': 38 | case 'z': 39 | return '5'; 40 | 41 | case 'd': 42 | case 't': 43 | return '6'; 44 | 45 | case 'l': 46 | return '7'; 47 | 48 | case 'm': 49 | case 'n': 50 | return '8'; 51 | 52 | case 'r': 53 | return '9'; 54 | 55 | default: 56 | break; 57 | } 58 | 59 | return '0'; 60 | } 61 | 62 | /// Computes and returns the soundex representation of a given non NULL string. 63 | /// More information about the algorithm can be found here: 64 | /// http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html 65 | /// 66 | /// @param str non NULL string to encode 67 | /// 68 | /// @returns soundex representation of str 69 | char* refined_soundex(const char* str) { 70 | // string cannot be NULL 71 | assert(str != NULL); 72 | 73 | size_t str_len = strlen(str); 74 | 75 | // final code buffer 76 | char code[str_len + 1]; 77 | 78 | // temporary buffer to encode string 79 | char buf[str_len]; 80 | 81 | // set first value to first char in str 82 | code[0] = toupper(str[0]); 83 | 84 | // number of digits in code 85 | unsigned d = 1; 86 | 87 | // encode all chars in str 88 | for (unsigned i = 0; i < str_len; i++) 89 | buf[i] = encode_char(str[i]); 90 | 91 | // add all viable chars to code 92 | char prev = '\0'; 93 | for (unsigned i = 0; i < str_len; i++) { 94 | // check if current char in buf is not the same as previous char 95 | if (NOT_EQ(buf[i], prev)) { 96 | // add digit to the code 97 | code[d] = buf[i]; 98 | 99 | // increment digit counter 100 | d++; 101 | 102 | // set prev to current char 103 | prev = buf[i]; 104 | } 105 | } 106 | 107 | // allocate space for final code 108 | // d will be length of the code + 1 109 | char* result = malloc(d * sizeof(char)); 110 | 111 | // copy final code into result and null terminate 112 | for (unsigned i = 0; i < d; i++) { 113 | result[i] = code[i]; 114 | } 115 | result[d] = '\0'; 116 | 117 | return result; 118 | } -------------------------------------------------------------------------------- /test/fuzzy.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | -- https://github.com/nalgeon/sqlean 3 | 4 | .load dist/fuzzy 5 | 6 | -- Damerau-Levenshtein distance 7 | select '01', dlevenshtein('abc', 'abc') = 0; 8 | select '02', dlevenshtein('abc', '') = 3; 9 | select '03', dlevenshtein('', 'abc') = 3; 10 | select '04', dlevenshtein('abc', 'ab') = 1; 11 | select '05', dlevenshtein('abc', 'abcd') = 1; 12 | select '06', dlevenshtein('abc', 'acb') = 1; 13 | select '07', dlevenshtein('abc', 'ca') = 2; 14 | 15 | -- Hamming distance 16 | select '21', hamming('abc', 'abc') = 0; 17 | select '22', hamming('abc', '') = -1; 18 | select '23', hamming('', 'abc') = -1; 19 | select '24', hamming('hello', 'hellp') = 1; 20 | select '25', hamming('hello', 'heloh') = 2; 21 | 22 | -- Jaro-Winkler distance 23 | select '31', jaro_winkler('abc', 'abc') = 1.0; 24 | select '32', jaro_winkler('abc', '') = 0.0; 25 | select '33', jaro_winkler('', 'abc') = 0.0; 26 | select '34', round(jaro_winkler('my string', 'my tsring'), 3) = 0.974; 27 | select '35', round(jaro_winkler('my string', 'my ntrisg'), 3) = 0.896; 28 | 29 | -- Levenshtein distance 30 | select '41', levenshtein('abc', 'abc') = 0; 31 | select '42', levenshtein('abc', '') = 3; 32 | select '43', levenshtein('', 'abc') = 3; 33 | select '44', levenshtein('abc', 'ab') = 1; 34 | select '45', levenshtein('abc', 'abcd') = 1; 35 | select '46', levenshtein('abc', 'acb') = 2; 36 | select '47', levenshtein('abc', 'ca') = 3; 37 | 38 | -- Optimal String Alignment distance 39 | select '51', osa_distance('abc', 'abc') = 0; 40 | select '52', osa_distance('abc', '') = 3; 41 | select '53', osa_distance('', 'abc') = 3; 42 | select '54', osa_distance('abc', 'ab') = 1; 43 | select '55', osa_distance('abc', 'abcd') = 1; 44 | select '56', osa_distance('abc', 'acb') = 2; 45 | select '57', osa_distance('abc', 'ca') = 3; 46 | 47 | -- Spellcheck edit distance 48 | select '61', edit_distance('abc', 'abc') = 0; 49 | select '62', edit_distance('abc', '') = 300; 50 | select '63', edit_distance('', 'abc') = 75; 51 | select '64', edit_distance('abc', 'ab') = 100; 52 | select '65', edit_distance('abc', 'abcd') = 25; 53 | select '66', edit_distance('abc', 'acb') = 110; 54 | select '67', edit_distance('abc', 'ca') = 225; 55 | 56 | -- Spellcheck phonetic code 57 | select '101', phonetic_hash(null) is null; 58 | select '102', phonetic_hash('') = ''; 59 | select '103', phonetic_hash('phonetics') = 'BAMADAC'; 60 | select '104', phonetic_hash('is') = 'AC'; 61 | select '105', phonetic_hash('awesome') = 'ABACAMA'; 62 | 63 | -- Soundex code 64 | select '111', soundex(null) is null; 65 | select '112', soundex('') = ''; 66 | select '113', soundex('phonetics') = 'P532'; 67 | select '114', soundex('is') = 'I200'; 68 | select '115', soundex('awesome') = 'A250'; 69 | 70 | -- Refined Soundex code 71 | select '121', rsoundex(null) is null; 72 | select '122', rsoundex('') = ''; 73 | select '123', rsoundex('phonetics') = 'P1080603'; 74 | select '124', rsoundex('is') = 'I03'; 75 | select '125', rsoundex('awesome') = 'A03080'; 76 | 77 | -- Caverphone phonetic code 78 | select '131', caverphone(null) is null; 79 | select '132', caverphone('') = ''; 80 | select '133', caverphone('phonetics') = 'FNTKS11111'; 81 | select '134', caverphone('is') = 'AS11111111'; 82 | select '135', caverphone('awesome') = 'AWSM111111'; 83 | -------------------------------------------------------------------------------- /src/fuzzy/damerau_levenshtein.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Ross Bayer, MIT License 2 | // https://github.com/Rostepher/libstrcmp 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "common.h" 9 | 10 | /// Calculates and returns the Damerau-Levenshtein distance of two non NULL 11 | /// strings. More information about the algorithm can be found here: 12 | /// https://en.wikipedia.org/wiki/Damerau-Levenshtein_distance 13 | /// 14 | /// @param str1 first non NULL string 15 | /// @param str2 second non NULL string 16 | /// 17 | /// @returns Damerau-Levenshtein distance of str1 and str2 18 | unsigned damerau_levenshtein(const char* str1, const char* str2) { 19 | // strings cannot be NULL 20 | assert(str1 != NULL); 21 | assert(str2 != NULL); 22 | 23 | // size of the alphabet 24 | const unsigned alpha_size = 255; 25 | 26 | size_t str1_len = strlen(str1); 27 | size_t str2_len = strlen(str2); 28 | 29 | // handle cases where one or both strings are empty 30 | if (str1_len == 0) { 31 | return str2_len; 32 | } 33 | if (str2_len == 0) { 34 | return str1_len; 35 | } 36 | 37 | // remove common substring 38 | while (str1_len > 0 && str2_len > 0 && EQ(str1[0], str2[0])) { 39 | str1++, str2++; 40 | str1_len--, str2_len--; 41 | } 42 | 43 | const unsigned INFINITY = str1_len + str2_len; 44 | unsigned row, col; 45 | 46 | // create "dictionary" 47 | unsigned* dict = calloc(alpha_size, sizeof(unsigned)); 48 | 49 | size_t m_rows = str1_len + 2; // matrix rows 50 | size_t m_cols = str2_len + 2; // matrix cols 51 | 52 | // matrix to hold computed values 53 | unsigned** matrix = malloc(m_rows * sizeof(unsigned*)); 54 | for (unsigned i = 0; i < m_rows; i++) { 55 | matrix[i] = calloc(m_cols, sizeof(unsigned)); 56 | } 57 | 58 | // set all the starting values and add all characters to the dict 59 | matrix[0][0] = INFINITY; 60 | for (row = 1; row < m_rows; row++) { 61 | matrix[row][0] = INFINITY; 62 | matrix[row][1] = row - 1; 63 | } 64 | for (col = 1; col < m_cols; col++) { 65 | matrix[0][col] = INFINITY; 66 | matrix[1][col] = col - 1; 67 | } 68 | 69 | unsigned db; 70 | unsigned i, k; 71 | unsigned cost; 72 | 73 | // fill in the matrix 74 | for (row = 1; row <= str1_len; row++) { 75 | db = 0; 76 | 77 | for (col = 1; col <= str2_len; col++) { 78 | i = dict[(unsigned)str2[col - 1]]; 79 | k = db; 80 | cost = EQ(str1[row - 1], str2[col - 1]) ? 0 : 1; 81 | 82 | if (cost == 0) { 83 | db = col; 84 | } 85 | 86 | matrix[row + 1][col + 1] = 87 | MIN4(matrix[row][col] + cost, matrix[row + 1][col] + 1, matrix[row][col + 1] + 1, 88 | matrix[i][k] + (row - i - 1) + (col - k - 1) + 1); 89 | } 90 | 91 | dict[(unsigned)str1[row - 1]] = row; 92 | } 93 | 94 | unsigned result = matrix[m_rows - 1][m_cols - 1]; 95 | 96 | // free allocated memory 97 | free(dict); 98 | for (unsigned i = 0; i < m_rows; i++) { 99 | free(matrix[i]); 100 | } 101 | free(matrix); 102 | 103 | return result; 104 | } -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | # https://github.com/nalgeon/sqlean 3 | 4 | .PHONY: prepare-dist download-sqlite download-external compile-linux compile-windows compile-macos test test-all 5 | 6 | prepare-dist: 7 | mkdir -p dist 8 | rm -f dist/* 9 | 10 | download-sqlite: 11 | curl -L http://sqlite.org/$(SQLITE_RELEASE_YEAR)/sqlite-amalgamation-$(SQLITE_VERSION).zip --output src.zip 12 | unzip src.zip 13 | mv sqlite-amalgamation-$(SQLITE_VERSION)/* src 14 | 15 | download-external: 16 | curl -L https://github.com/sqlite/sqlite/raw/branch-$(SQLITE_BRANCH)/ext/misc/json1.c --output src/sqlite3-json1.c 17 | curl -L https://github.com/mackyle/sqlite/raw/branch-$(SQLITE_BRANCH)/src/test_windirent.h --output src/test_windirent.h 18 | 19 | compile-linux: 20 | gcc -fPIC -shared src/sqlite3-crypto.c src/crypto/*.c -o dist/crypto.so 21 | gcc -fPIC -shared src/sqlite3-fileio.c -o dist/fileio.so 22 | gcc -fPIC -shared src/sqlite3-fuzzy.c src/fuzzy/*.c -o dist/fuzzy.so 23 | gcc -fPIC -shared src/sqlite3-ipaddr.c -o dist/ipaddr.so 24 | gcc -fPIC -shared src/sqlite3-json1.c -o dist/json1.so 25 | gcc -fPIC -shared src/sqlite3-math.c -o dist/math.so -lm 26 | gcc -fPIC -shared src/sqlite3-re.c src/re.c -o dist/re.so 27 | gcc -fPIC -shared src/sqlite3-stats.c -o dist/stats.so -lm 28 | gcc -fPIC -shared src/sqlite3-text.c -o dist/text.so 29 | gcc -fPIC -shared src/sqlite3-unicode.c -o dist/unicode.so 30 | gcc -fPIC -shared src/sqlite3-uuid.c -o dist/uuid.so 31 | gcc -fPIC -shared src/sqlite3-vsv.c -o dist/vsv.so -lm 32 | 33 | compile-windows: 34 | gcc -shared -I. src/sqlite3-crypto.c src/crypto/*.c -o dist/crypto.dll 35 | gcc -shared -I. src/sqlite3-fileio.c -o dist/fileio.dll 36 | gcc -shared -I. src/sqlite3-fuzzy.c src/fuzzy/*.c -o dist/fuzzy.dll 37 | gcc -shared -I. src/sqlite3-json1.c -o dist/json1.dll 38 | gcc -shared -I. src/sqlite3-math.c -o dist/math.dll -lm 39 | gcc -shared -I. src/sqlite3-re.c src/re.c -o dist/re.dll 40 | gcc -shared -I. src/sqlite3-stats.c -o dist/stats.dll -lm 41 | gcc -shared -I. src/sqlite3-text.c -o dist/text.dll 42 | gcc -shared -I. src/sqlite3-unicode.c -o dist/unicode.dll 43 | gcc -shared -I. src/sqlite3-uuid.c -o dist/uuid.dll 44 | gcc -shared -I. src/sqlite3-vsv.c -o dist/vsv.dll -lm 45 | 46 | compile-macos: 47 | gcc -fPIC -dynamiclib -I src src/sqlite3-crypto.c src/crypto/*.c -o dist/crypto.dylib 48 | gcc -fPIC -dynamiclib -I src src/sqlite3-fileio.c -o dist/fileio.dylib 49 | gcc -fPIC -dynamiclib -I src src/sqlite3-fuzzy.c src/fuzzy/*.c -o dist/fuzzy.dylib 50 | gcc -fPIC -dynamiclib -I src src/sqlite3-ipaddr.c -o dist/ipaddr.dylib 51 | gcc -fPIC -dynamiclib -I src src/sqlite3-json1.c -o dist/json1.dylib 52 | gcc -fPIC -dynamiclib -I src src/sqlite3-math.c -o dist/math.dylib -lm 53 | gcc -fPIC -dynamiclib -I src src/sqlite3-re.c src/re.c -o dist/re.dylib 54 | gcc -fPIC -dynamiclib -I src src/sqlite3-stats.c -o dist/stats.dylib -lm 55 | gcc -fPIC -dynamiclib -I src src/sqlite3-text.c -o dist/text.dylib 56 | gcc -fPIC -dynamiclib -I src src/sqlite3-unicode.c -o dist/unicode.dylib 57 | gcc -fPIC -dynamiclib -I src src/sqlite3-uuid.c -o dist/uuid.dylib 58 | gcc -fPIC -dynamiclib -I src src/sqlite3-vsv.c -o dist/vsv.dylib -lm 59 | 60 | test-all: 61 | make test suite=crypto 62 | make test suite=fileio 63 | make test suite=fuzzy 64 | make test suite=ipaddr 65 | make test suite=json1 66 | make test suite=math 67 | make test suite=re 68 | make test suite=stats 69 | make test suite=text 70 | make test suite=unicode 71 | make test suite=uuid 72 | make test suite=vsv 73 | 74 | # fails if grep does find a failed test case 75 | # https://stackoverflow.com/questions/15367674/bash-one-liner-to-exit-with-the-opposite-status-of-a-grep-command/21788642 76 | test: 77 | @sqlite3 < test/$(suite).sql > test.log 78 | @cat test.log | (! grep -Ex "[0-9]+.[^1]") 79 | -------------------------------------------------------------------------------- /src/crypto/sha2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * FILE: sha2.h 3 | * AUTHOR: Aaron D. Gifford - http://www.aarongifford.com/ 4 | * 5 | * Copyright (c) 2000-2001, Aaron D. Gifford 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 3. Neither the name of the copyright holder nor the names of contributors 17 | * may be used to endorse or promote products derived from this software 18 | * without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND 21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 | * SUCH DAMAGE. 31 | * 32 | * $Id: sha2.h,v 1.1 2001/11/08 00:02:01 adg Exp adg $ 33 | */ 34 | 35 | #ifndef __SHA2_H__ 36 | #define __SHA2_H__ 37 | 38 | #define SHA2_USE_INTTYPES_H 39 | #define SHA2_UNROLL_TRANSFORM 40 | #define NOPROTO 41 | 42 | /* 43 | * Import u_intXX_t size_t type definitions from system headers. You 44 | * may need to change this, or define these things yourself in this 45 | * file. 46 | */ 47 | #include 48 | 49 | #ifdef SHA2_USE_INTTYPES_H 50 | 51 | #include 52 | 53 | #endif /* SHA2_USE_INTTYPES_H */ 54 | 55 | /*** SHA-256/384/512 Various Length Definitions ***********************/ 56 | #define SHA256_BLOCK_LENGTH 64 57 | #define SHA256_DIGEST_LENGTH 32 58 | #define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1) 59 | #define SHA384_BLOCK_LENGTH 128 60 | #define SHA384_DIGEST_LENGTH 48 61 | #define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1) 62 | #define SHA512_BLOCK_LENGTH 128 63 | #define SHA512_DIGEST_LENGTH 64 64 | #define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) 65 | 66 | /*** SHA-256/384/512 Context Structures *******************************/ 67 | 68 | typedef struct _SHA256_CTX { 69 | uint32_t state[8]; 70 | uint64_t bitcount; 71 | uint8_t buffer[SHA256_BLOCK_LENGTH]; 72 | } SHA256_CTX; 73 | 74 | typedef struct _SHA512_CTX { 75 | uint64_t state[8]; 76 | uint64_t bitcount[2]; 77 | uint8_t buffer[SHA512_BLOCK_LENGTH]; 78 | } SHA512_CTX; 79 | 80 | typedef SHA512_CTX SHA384_CTX; 81 | 82 | /*** SHA-256/384/512 Function Prototypes ******************************/ 83 | 84 | void* sha256_init(); 85 | void sha256_update(SHA256_CTX*, const uint8_t*, size_t); 86 | int sha256_final(SHA256_CTX*, uint8_t[SHA256_DIGEST_LENGTH]); 87 | 88 | void* sha384_init(); 89 | void sha384_update(SHA384_CTX*, const uint8_t*, size_t); 90 | int sha384_final(SHA384_CTX*, uint8_t[SHA384_DIGEST_LENGTH]); 91 | 92 | void* sha512_init(); 93 | void sha512_update(SHA512_CTX*, const uint8_t*, size_t); 94 | int sha512_final(SHA512_CTX*, uint8_t[SHA512_DIGEST_LENGTH]); 95 | 96 | #endif // MD5_H -------------------------------------------------------------------------------- /src/sqlite3-crypto.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | // https://github.com/nalgeon/sqlean 3 | 4 | /* 5 | * SQLite secure hash functions. 6 | */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "crypto/md5.h" 13 | #include "crypto/sha1.h" 14 | #include "crypto/sha2.h" 15 | #include "sqlite3ext.h" 16 | 17 | SQLITE_EXTENSION_INIT1 18 | 19 | /** 20 | * Generic compute hash function. Algorithm is encoded in the user data field. 21 | */ 22 | static void sqlite3_hash(sqlite3_context* context, int argc, sqlite3_value** argv) { 23 | assert(argc == 1); 24 | 25 | void* (*init_func)() = NULL; 26 | void (*update_func)(void*, void*, size_t) = NULL; 27 | int (*final_func)(void*, void*) = NULL; 28 | int algo = (intptr_t)sqlite3_user_data(context); 29 | 30 | switch (algo) { 31 | case 1: /* Hardened SHA1 */ 32 | init_func = (void*)sha1_init; 33 | update_func = (void*)sha1_update; 34 | final_func = (void*)sha1_final; 35 | algo = 1; 36 | break; 37 | case 5: /* MD5 */ 38 | init_func = (void*)md5_init; 39 | update_func = (void*)md5_update; 40 | final_func = (void*)md5_final; 41 | algo = 1; 42 | break; 43 | case 2256: /* SHA2-256 */ 44 | init_func = (void*)sha256_init; 45 | update_func = (void*)sha256_update; 46 | final_func = (void*)sha256_final; 47 | algo = 1; 48 | break; 49 | case 2384: /* SHA2-384 */ 50 | init_func = (void*)sha384_init; 51 | update_func = (void*)sha384_update; 52 | final_func = (void*)sha384_final; 53 | algo = 1; 54 | break; 55 | case 2512: /* SHA2-512 */ 56 | init_func = (void*)sha512_init; 57 | update_func = (void*)sha512_update; 58 | final_func = (void*)sha512_final; 59 | algo = 1; 60 | break; 61 | default: 62 | sqlite3_result_error(context, "Unknown Algorithm", -1); 63 | return; 64 | } 65 | 66 | void* ctx; 67 | if (algo) { 68 | ctx = init_func(); 69 | } 70 | if (!ctx) { 71 | sqlite3_result_error(context, "Algorithm could not allocate it's context", -1); 72 | return; 73 | } 74 | 75 | void* data = NULL; 76 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 77 | sqlite3_result_null(context); 78 | return; 79 | } else if (sqlite3_value_type(argv[0]) == SQLITE_BLOB) { 80 | data = (void*)sqlite3_value_blob(argv[0]); 81 | } else { 82 | data = (void*)sqlite3_value_text(argv[0]); 83 | } 84 | size_t datalen = sqlite3_value_bytes(argv[0]); 85 | if (datalen > 0) 86 | update_func(ctx, data, datalen); 87 | 88 | unsigned char hash[128] = {0}; 89 | int hashlen = final_func(ctx, hash); 90 | sqlite3_result_blob(context, hash, hashlen, SQLITE_TRANSIENT); 91 | } 92 | 93 | /* 94 | * Registers the extension. 95 | */ 96 | #ifdef _WIN32 97 | __declspec(dllexport) 98 | #endif 99 | 100 | int sqlite3_crypto_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) { 101 | SQLITE_EXTENSION_INIT2(pApi); 102 | static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC; 103 | sqlite3_create_function(db, "md5", 1, flags, (void*)5, sqlite3_hash, 0, 0); 104 | sqlite3_create_function(db, "sha1", 1, flags, (void*)1, sqlite3_hash, 0, 0); 105 | sqlite3_create_function(db, "sha256", -1, flags, (void*)2256, sqlite3_hash, 0, 0); 106 | sqlite3_create_function(db, "sha384", -1, flags, (void*)2384, sqlite3_hash, 0, 0); 107 | sqlite3_create_function(db, "sha512", -1, flags, (void*)2512, sqlite3_hash, 0, 0); 108 | return SQLITE_OK; 109 | } -------------------------------------------------------------------------------- /src/fuzzy/jaro_winkler.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Ross Bayer, MIT License 2 | // https://github.com/Rostepher/libstrcmp 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "common.h" 10 | 11 | /// Calculates and returns the Jaro distance of two non NULL strings. 12 | /// More information about the algorithm can be found here: 13 | /// http://en.wikipedia.org/wiki/Jaro-Winkler_distance 14 | /// 15 | /// @param str1 first non NULL string 16 | /// @param str2 second non NULL string 17 | /// 18 | /// @returns the jaro distance of str1 and str2 19 | double jaro(const char* str1, const char* str2) { 20 | // strings cannot be NULL 21 | assert(str1 != NULL); 22 | assert(str2 != NULL); 23 | 24 | int str1_len = strlen(str1); 25 | int str2_len = strlen(str2); 26 | 27 | // if both strings are empty return 1 28 | // if only one of the strings is empty return 0 29 | if (str1_len == 0) { 30 | return (str2_len == 0) ? 1.0 : 0.0; 31 | } 32 | 33 | // max distance between two chars to be considered matching 34 | // floor() is ommitted due to integer division rules 35 | int match_dist = (int)MAX(str1_len, str2_len) / 2 - 1; 36 | 37 | // arrays of bools that signify if that char in the matcing string has a 38 | // match 39 | int* str1_matches = calloc(str1_len, sizeof(int)); 40 | int* str2_matches = calloc(str2_len, sizeof(int)); 41 | 42 | // number of matches and transpositions 43 | double matches = 0.0; 44 | double trans = 0.0; 45 | 46 | // find the matches 47 | for (int i = 0; i < str1_len; i++) { 48 | // start and end take into account the match distance 49 | int start = MAX(0, i - match_dist); 50 | int end = MIN(i + match_dist + 1, str2_len); 51 | 52 | for (int k = start; k < end; k++) { 53 | // if str2 already has a match or str1 and str2 are not equal 54 | // continue 55 | if (str2_matches[k] || NOT_EQ(str1[i], str2[k])) { 56 | continue; 57 | } 58 | 59 | // otherwise assume there is a match 60 | str1_matches[i] = true; 61 | str2_matches[k] = true; 62 | matches++; 63 | break; 64 | } 65 | } 66 | 67 | // if there are no matches return 0 68 | if (matches == 0) { 69 | free(str1_matches); 70 | free(str2_matches); 71 | return 0.0; 72 | } 73 | 74 | // count transpositions 75 | int k = 0; 76 | for (int i = 0; i < str1_len; i++) { 77 | // if there are no matches in str1 continue 78 | if (!str1_matches[i]) { 79 | continue; 80 | } 81 | 82 | // while there is no match in str2 increment k 83 | while (!str2_matches[k]) { 84 | k++; 85 | } 86 | 87 | // increment trans 88 | if (NOT_EQ(str1[i], str2[k])) { 89 | trans++; 90 | } 91 | 92 | k++; 93 | } 94 | 95 | // divide the number of transpositions by two as per the algorithm specs 96 | // this division is valid because the counted transpositions include both 97 | // instances of the transposed characters. 98 | trans /= 2.0; 99 | 100 | // free allocated memory 101 | free(str1_matches); 102 | free(str2_matches); 103 | 104 | // return the jaro distance 105 | return ((matches / str1_len) + (matches / str2_len) + ((matches - trans) / matches)) / 3.0; 106 | } 107 | 108 | /// Calculates and returns the Jaro-Winkler distance of two non NULL strings. 109 | /// More information about the algorithm can be found here: 110 | /// http://en.wikipedia.org/wiki/Jaro-Winkler_distance 111 | /// 112 | /// @param str1 first non NULL string 113 | /// @param str2 second non NULL string 114 | /// 115 | /// @returns the jaro-winkler distance of str1 and str2 116 | double jaro_winkler(const char* str1, const char* str2) { 117 | // strings cannot be NULL 118 | assert(str1 != NULL); 119 | assert(str2 != NULL); 120 | 121 | // compute the jaro distance 122 | double dist = jaro(str1, str2); 123 | 124 | // finds the number of common terms in the first 3 strings, max 3. 125 | int prefix_length = 0; 126 | if (strlen(str1) != 0 && strlen(str2) != 0) { 127 | while (prefix_length < 3 && EQ(*str1++, *str2++)) { 128 | prefix_length++; 129 | } 130 | } 131 | 132 | // 0.1 is the default scaling factor 133 | return dist + prefix_length * 0.1 * (1 - dist); 134 | } -------------------------------------------------------------------------------- /docs/fileio.md: -------------------------------------------------------------------------------- 1 | # fileio: Read and write files in SQLite 2 | 3 | Access the file system directly from SQL. Adapted from [fileio.c](https://sqlite.org/src/file/ext/misc/fileio.c) by D. Richard Hipp. 4 | 5 | ### writefile(path, data [,perm [,mtime]]) 6 | 7 | Writes blob `data` to a file specified by `path`. Returns the number of written bytes. If an error occurs, returns NULL. 8 | 9 | ``` 10 | sqlite> select writefile('hello.txt', 'hello world'); 11 | 11 12 | ``` 13 | 14 | The `perm` argument specifies permission bits for the file (octal `666` by default). Expects _decimal_ value, not octal. Here are some popular values: 15 | 16 | | Octal | Decimal | Description | 17 | | ----- | ------- | ----------- | 18 | | 600 | 384 | `rw-------` | 19 | | 644 | 420 | `rw-r--r--` | 20 | | 664 | 436 | `rw-rw-r--` | 21 | | 666 | 438 | `rw-rw-rw-` | 22 | | 755 | 493 | `rwxr-xr-x` | 23 | | 777 | 511 | `rwxrwxrwx` | 24 | 25 | ``` 26 | sqlite> select writefile('hello.txt', 'hello world', 436); 27 | 11 28 | ``` 29 | 30 | If the optional `mtime` argument is present, it expects an integer — the number of seconds since the unix epoch. The modification-time of the target file is set to this value before returning. 31 | 32 | ### readfile(path) 33 | 34 | Reads the file specified by `path` and returns its contents as `blob`. 35 | 36 | ``` 37 | sqlite> select writefile('hello.txt', 'hello world'); 38 | 11 39 | 40 | sqlite> select typeof(readfile('hello.txt')); 41 | blob 42 | 43 | sqlite> select length(readfile('hello.txt')); 44 | 11 45 | ``` 46 | 47 | ### mkdir(path[, perm]) 48 | 49 | Creates a directory named `path` with permission bits `perm` (octal `777` by default). 50 | 51 | ``` 52 | sqlite> mkdir('hellodir') 53 | ``` 54 | 55 | ### symlink(src, dst) 56 | 57 | Creates a symbolic link named `dst`, pointing to `src`. 58 | 59 | ``` 60 | select symlink('hello.txt', 'hello.lnk'); 61 | ``` 62 | 63 | ### lsdir(path[, recursive]) 64 | 65 | Lists files and directories as a virtual table. 66 | 67 | List a single file specified by `path`: 68 | 69 | ``` 70 | sqlite> select * from lsdir('hello.txt'); 71 | ┌───────────┬───────┬────────────┬──────┐ 72 | │ name │ mode │ mtime │ size │ 73 | ├───────────┼───────┼────────────┼──────┤ 74 | │ hello.txt │ 33206 │ 1639516692 │ 11 │ 75 | └───────────┴───────┴────────────┴──────┘ 76 | ``` 77 | 78 | List a whole directory. Lists only the direct children by default: 79 | 80 | ``` 81 | sqlite> select * from lsdir('test') order by name; 82 | ┌─────────────────┬───────┬────────────┬──────┐ 83 | │ name │ mode │ mtime │ size │ 84 | ├─────────────────┼───────┼────────────┼──────┤ 85 | │ test │ 16877 │ 1639514106 │ 384 │ 86 | │ test/crypto.sql │ 33188 │ 1639349274 │ 1426 │ 87 | │ test/fileio.sql │ 33188 │ 1639516282 │ 1606 │ 88 | │ test/fuzzy.sql │ 33188 │ 1639349290 │ 2957 │ 89 | │ ... │ ... │ ... │ ... │ 90 | └─────────────────┴───────┴────────────┴──────┘ 91 | ``` 92 | 93 | List a whole directory recursively. When `recursive = true`, lists all the descendants: 94 | 95 | ``` 96 | sqlite> select * from lsdir('src', true); 97 | ``` 98 | 99 | Each row has the following columns: 100 | 101 | - `name`: Path to file or directory (text value). 102 | - `mode`: File mode (`stat.st_mode`, integer value). 103 | - `mtime`: Last modification time (`stat.st_mtime`, integer number of seconds since the epoch). 104 | - `size`: Total size in bytes (`stat.st_size`, integer value). 105 | 106 | Use `lsmode()` helper function to get a human-readable representation of the `mode`: 107 | 108 | ``` 109 | sqlite> select name, lsmode(mode) from fsdir('test'); 110 | ┌─────────────────┬──────────────┐ 111 | │ name │ lsmode(mode) │ 112 | ├─────────────────┼──────────────┤ 113 | │ test │ drwxr-xr-x │ 114 | │ test/crypto.sq │ -rw-r--r-- │ 115 | │ test/fileio.sql │ -rw-r--r-- │ 116 | │ test/fuzzy.sql │ -rw-r--r-- │ 117 | │ ... │ ... │ 118 | └─────────────────┴──────────────┘ 119 | ``` 120 | 121 | Parameter `path` is an absolute or relative pathname: 122 | 123 | - If the path refers to a file that does not exist — `lsdir()` returns zero rows. 124 | - If the path refers to a regular file or symbolic link — it returns a single row. 125 | - If the path refers to a directory — it returns one row for the directory and one row for each direct child. Optionally returns a row for every descendant, if `recursive = true`. 126 | 127 | ## Usage 128 | 129 | ``` 130 | sqlite> .load ./fileio 131 | sqlite> select readfile('whatever.txt'); 132 | ``` 133 | 134 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 135 | -------------------------------------------------------------------------------- /src/sqlite3-text.c: -------------------------------------------------------------------------------- 1 | // Originally by Liam Healy, Public Domain 2 | // extension-functions.c at https://sqlite.org/contrib/ 3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean, MIT License 4 | 5 | /* 6 | * SQLite text functions. 7 | */ 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "sqlite3ext.h" 15 | 16 | SQLITE_EXTENSION_INIT1 17 | 18 | /** 19 | * From sqlite3 utf.c 20 | */ 21 | static const unsigned char sqlite3Utf8Trans1[] = { 22 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 23 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 24 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 25 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, 26 | }; 27 | 28 | #define READ_UTF8(zIn, zTerm, c) \ 29 | c = *(zIn++); \ 30 | if (c >= 0xc0) { \ 31 | c = sqlite3Utf8Trans1[c - 0xc0]; \ 32 | while (zIn != zTerm && (*zIn & 0xc0) == 0x80) { \ 33 | c = (c << 6) + (0x3f & *(zIn++)); \ 34 | } \ 35 | if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE) { \ 36 | c = 0xFFFD; \ 37 | } \ 38 | } 39 | 40 | /* 41 | * reverse() and friends extracted from 42 | * extension-functions.c (https://sqlite.org/contrib/) 43 | * by Liam Healy 44 | */ 45 | #define advance_char(X) \ 46 | while ((0xc0 & *++(X)) == 0x80) { \ 47 | } 48 | 49 | static int read_char(const unsigned char* str) { 50 | int c; 51 | READ_UTF8(str, 0, c); 52 | return c; 53 | } 54 | 55 | /* 56 | * Returns reversed string. 57 | * reverse("abcde") == "edcba" 58 | */ 59 | static char* reverse(const char* source) { 60 | int len = strlen(source); 61 | char* result = sqlite3_malloc(len + 1); 62 | char* rzt = result + len; 63 | *(rzt--) = '\0'; 64 | 65 | const char* zt = source; 66 | while (read_char((unsigned char*)zt) != 0) { 67 | source = zt; 68 | advance_char(zt); 69 | for (int i = 1; zt - i >= source; ++i) { 70 | *(rzt--) = *(zt - i); 71 | } 72 | } 73 | return result; 74 | } 75 | 76 | static void sqlite3_reverse(sqlite3_context* context, int argc, sqlite3_value** argv) { 77 | assert(argc == 1); 78 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 79 | sqlite3_result_null(context); 80 | return; 81 | } 82 | const char* source = (char*)sqlite3_value_text(argv[0]); 83 | char* result = reverse(source); 84 | sqlite3_result_text(context, result, -1, sqlite3_free); 85 | } 86 | 87 | /* 88 | * strsep() implementation, Windows doesn't have it 89 | * copied from https://unixpapa.com/incnote/string.html 90 | */ 91 | static char* str_sep(char** sp, const char* sep) { 92 | if (sp == NULL || *sp == NULL || **sp == '\0') { 93 | return NULL; 94 | } 95 | char* s = *sp; 96 | char* p = s + strcspn(s, sep); 97 | if (*p != '\0') 98 | *p++ = '\0'; 99 | *sp = p; 100 | return s; 101 | } 102 | 103 | /* 104 | * Splits `source` string on `sep` and returns the given `part` (counting from one) 105 | * split_part("one;two;three", ";", 2) == "two" 106 | */ 107 | static char* split_part(char* source, const char* sep, int64_t part) { 108 | char* token; 109 | int64_t index = 1; 110 | while ((token = str_sep(&source, sep)) != NULL) { 111 | if (index == part) { 112 | break; 113 | } 114 | index++; 115 | } 116 | return token; 117 | } 118 | 119 | static void sqlite3_split_part(sqlite3_context* context, int argc, sqlite3_value** argv) { 120 | assert(argc == 3); 121 | 122 | char* source = (char*)sqlite3_value_text(argv[0]); 123 | if (source == NULL) { 124 | sqlite3_result_null(context); 125 | return; 126 | } 127 | if (strcmp(source, "") == 0) { 128 | sqlite3_result_text(context, "", -1, SQLITE_TRANSIENT); 129 | return; 130 | } 131 | 132 | const char* sep = (const char*)sqlite3_value_text(argv[1]); 133 | if (!sep) { 134 | sqlite3_result_null(context); 135 | return; 136 | } 137 | 138 | if (sqlite3_value_type(argv[2]) != SQLITE_INTEGER) { 139 | sqlite3_result_error(context, "part parameter should be integer", -1); 140 | return; 141 | } 142 | int64_t part = sqlite3_value_int64(argv[2]); 143 | if (part <= 0) { 144 | sqlite3_result_error(context, "part parameter should be > 0", -1); 145 | return; 146 | } 147 | 148 | char* token = split_part(source, sep, part); 149 | 150 | if (token == NULL) { 151 | sqlite3_result_text(context, "", -1, SQLITE_TRANSIENT); 152 | return; 153 | } 154 | sqlite3_result_text(context, token, -1, SQLITE_TRANSIENT); 155 | } 156 | 157 | /* 158 | * Registers the extension. 159 | */ 160 | #ifdef _WIN32 161 | __declspec(dllexport) 162 | #endif 163 | int sqlite3_text_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) { 164 | SQLITE_EXTENSION_INIT2(pApi); 165 | static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC; 166 | sqlite3_create_function(db, "reverse", 1, flags, 0, sqlite3_reverse, 0, 0); 167 | sqlite3_create_function(db, "split_part", 3, flags, 0, sqlite3_split_part, 0, 0); 168 | return SQLITE_OK; 169 | } -------------------------------------------------------------------------------- /docs/vsv.md: -------------------------------------------------------------------------------- 1 | # vsv: CSV files as virtual tables in SQLite 2 | 3 | Provides virtual table for working directly with CSV files, without importing data into the database. Useful for very large datasets. 4 | 5 | Adapted from [vsv.c](http://www.dessus.com/files/vsv.c) by Keith Medcalf. 6 | 7 | ## Example 8 | 9 | For the `people.csv` file with the following data: 10 | 11 | ```csv 12 | 11,Diane,London 13 | 22,Grace,Berlin 14 | 33,Alice,Paris 15 | ``` 16 | 17 | The `vsv` virtual table could look like this: 18 | 19 | ``` 20 | .load ./vsv 21 | 22 | create virtual table people using vsv( 23 | filename=people.csv, 24 | schema="create table people(id integer, name text, city text)", 25 | columns=3, 26 | affinity=integer 27 | ); 28 | ``` 29 | 30 | ``` 31 | select * from people; 32 | ┌────┬───────┬────────┐ 33 | │ id │ name │ city │ 34 | ├────┼───────┼────────┤ 35 | │ 11 │ Diane │ London │ 36 | │ 22 │ Grace │ Berlin │ 37 | │ 33 │ Alice │ Paris │ 38 | └────┴───────┴────────┘ 39 | ``` 40 | 41 | ## Parameters 42 | 43 | The parameters to the vsv module (the vsv(...) part) are as follows: 44 | 45 | ``` 46 | filename=STRING the filename, passed to the Operating System 47 | data=STRING alternative data 48 | schema=STRING Alternate Schema to use 49 | columns=N columns parsed from the VSV file 50 | header=BOOL whether or not a header row is present 51 | skip=N number of leading data rows to skip 52 | rsep=STRING record separator 53 | fsep=STRING field separator 54 | validatetext=BOOL validate UTF-8 encoding of text fields 55 | affinity=AFFINITY affinity to apply to each returned value 56 | nulls=BOOL empty fields are returned as NULL 57 | ``` 58 | 59 | ### Defaults 60 | 61 | ``` 62 | filename / data nothing. You must provide one or the other 63 | it is an error to provide both or neither 64 | 65 | schema nothing. If not provided then one will be 66 | generated for you from the header, or if no 67 | header is available then autogenerated using 68 | field names manufactured as cX where X is the 69 | column number 70 | 71 | columns nothing. If not specified then the number of 72 | columns is determined by counting the fields 73 | in the first record of the VSV file (which 74 | will be the header row if header is specified), 75 | the number of columns is not parsed from the 76 | schema even if one is provided 77 | 78 | header=no no header row in the VSV file 79 | skip=0 do not skip any data rows in the VSV file 80 | fsep=',' default field separator is a comma 81 | rsep='\n' default record separator is a newline 82 | validatetext=no do not validate text field encoding 83 | affinity=none do not apply affinity to each returned value 84 | nulls=off empty fields returned as zero-length 85 | ``` 86 | 87 | ### Options 88 | 89 | The `validatetext` setting will cause the validity of the field 90 | encoding (not its contents) to be verified. It effects how 91 | fields that are supposed to contain text will be returned to 92 | the SQLite3 library in order to prevent invalid utf8 data from 93 | being stored or processed as if it were valid utf8 text. 94 | 95 | The `nulls` option will cause fields that do not contain anything 96 | to return NULL rather than an empty result. Two separators 97 | side-by-each with no intervening characters at all will be 98 | returned as NULL if nulls is true and if nulls is false or 99 | the contents are explicity empty ("") then a 0 length blob 100 | (if affinity=blob) or 0 length text string. 101 | 102 | For the `affinity` setting, the following processing is applied to 103 | each value returned by the VSV virtual table: 104 | 105 | - `none` no affinity is applied, all fields will be 106 | returned as text just like in the original 107 | csv module, embedded nulls will terminate 108 | the text. if validatetext is in effect then 109 | an error will be thrown if the field does 110 | not contain validly encoded text or contains 111 | embedded nulls 112 | - `blob` all fields will be returned as blobs 113 | validatetext has no effect 114 | - `text` all fields will be returned as text just 115 | like in the original csv module, embedded 116 | nulls will terminate the text. 117 | if validatetext is in effect then a blob 118 | will be returned if the field does not 119 | contain validly encoded text or the field 120 | contains embedded nulls 121 | - `integer` if the field data looks like an integer, 122 | (regex "^ _(\+|-)?\d+ _$"), 123 | then an integer will be returned as 124 | provided by the compiler and platform 125 | runtime strtoll function 126 | otherwise the field will be processed as 127 | text as defined above 128 | - `real` if the field data looks like a number, 129 | (regex "^ _(\+|-)?(\d+\.?\d_|\d*\.?\d+)([eE](+|-)?\d+)? *$") 130 | then a double will be returned as 131 | provided by the compiler and platform 132 | runtime strtold function otherwise the 133 | field will be processed as text as 134 | defined above 135 | - `numeric` if the field looks like an integer 136 | (see integer above) that integer will be 137 | returned; if the field looks like a number 138 | (see real above) then the number will 139 | returned as an integer if it has no 140 | fractional part; otherwise a double will be returned 141 | 142 | ### Parameter types 143 | 144 | - `STRING` means a quoted string 145 | - `N` means a whole number not containing a sign 146 | - `BOOL` means something that evaluates as true or false. Case insensitive: `yes`, `no`, `true`, `false`, `1`, `0`. Defaults to `true` 147 | - `AFFINITY` means an SQLite3 type specification. Case insensitive: `none`, `blob`, `text`, `integer`, `real`, `numeric` 148 | - STRING means a quoted string. The quote character may be either 149 | a single quote or a double quote. Two quote characters in a row 150 | will be replaced with a single quote character. STRINGS do not 151 | need to be quoted if it is obvious where they begin and end 152 | (that is, they do not contain a comma). Leading and trailing 153 | spaces will be trimmed from unquoted strings. 154 | 155 | The `separator` string containing exactly one character, or a valid 156 | escape sequence. Recognized escape sequences are: 157 | 158 | ``` 159 | \t horizontal tab, ascii character 9 (0x09) 160 | \n linefeed, ascii character 10 (0x0a) 161 | \v vertical tab, ascii character 11 (0x0b) 162 | \f form feed, ascii character 12 (0x0c) 163 | \xhh specific byte where hh is hexadecimal 164 | ``` 165 | 166 | ## Usage 167 | 168 | ```sql 169 | .load ./vsv 170 | 171 | create virtual table temp.vsv using vsv(...); 172 | select * from vsv; 173 | ``` 174 | 175 | [Download](https://github.com/nalgeon/sqlean/releases/latest) 176 | -------------------------------------------------------------------------------- /src/fuzzy/common.c: -------------------------------------------------------------------------------- 1 | // Originally from the spellfix SQLite exension, Public Domain 2 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c 3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License 4 | 5 | #include "common.h" 6 | 7 | /* 8 | ** The following table gives the character class for non-initial ASCII 9 | ** characters. 10 | */ 11 | const unsigned char midClass[] = { 12 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 13 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 14 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 15 | /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 16 | /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, 17 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 18 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 19 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 20 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 21 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 22 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, 23 | /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, 24 | /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, 25 | /* ' */ CCLASS_SILENT, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, 26 | /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, 27 | /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, 28 | /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, 29 | /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, 30 | /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, 31 | /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, 32 | /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, 33 | /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, 34 | /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, 35 | /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, 36 | /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, 37 | /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, 38 | /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, 39 | /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, 40 | /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, 41 | /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_VOWEL, 42 | /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, 43 | /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, 44 | /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, 45 | /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, 46 | /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, 47 | /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, 48 | /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, 49 | /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, 50 | /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, 51 | /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, 52 | /* x */ CCLASS_C, /* y */ CCLASS_VOWEL, /* z */ CCLASS_C, 53 | /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, 54 | /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, 55 | }; 56 | /* 57 | ** This tables gives the character class for ASCII characters that form the 58 | ** initial character of a word. The only difference from midClass is with 59 | ** the letters H, W, and Y. 60 | */ 61 | const unsigned char initClass[] = { 62 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 63 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 64 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 65 | /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 66 | /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, 67 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 68 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 69 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 70 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 71 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 72 | /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, 73 | /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, 74 | /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, 75 | /* ' */ CCLASS_OTHER, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, 76 | /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, 77 | /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, 78 | /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, 79 | /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, 80 | /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, 81 | /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, 82 | /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, 83 | /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, 84 | /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, 85 | /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, 86 | /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, 87 | /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, 88 | /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, 89 | /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, 90 | /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, 91 | /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_Y, 92 | /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, 93 | /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, 94 | /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, 95 | /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, 96 | /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, 97 | /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, 98 | /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, 99 | /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, 100 | /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, 101 | /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, 102 | /* x */ CCLASS_C, /* y */ CCLASS_Y, /* z */ CCLASS_C, 103 | /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, 104 | /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, 105 | }; 106 | 107 | /* 108 | ** Mapping from the character class number (0-13) to a symbol for each 109 | ** character class. Note that initClass[] can be used to map the class 110 | ** symbol back into the class number. 111 | */ 112 | const unsigned char className[] = ".ABCDHLRMY9 ?"; -------------------------------------------------------------------------------- /src/crypto/md5.c: -------------------------------------------------------------------------------- 1 | /********************************************************************* 2 | * Filename: md5.c 3 | * Author: Brad Conte (brad AT bradconte.com) 4 | * Source: https://github.com/B-Con/crypto-algorithms 5 | * License: Public Domain 6 | * Details: Implementation of the MD5 hashing algorithm. 7 | * Algorithm specification can be found here: 8 | * http://tools.ietf.org/html/rfc1321 9 | * This implementation uses little endian byte order. 10 | *********************************************************************/ 11 | 12 | /*************************** HEADER FILES ***************************/ 13 | #include "md5.h" 14 | 15 | #include 16 | #include 17 | 18 | /****************************** MACROS ******************************/ 19 | #define ROTLEFT(a, b) ((a << b) | (a >> (32 - b))) 20 | 21 | #define F(x, y, z) ((x & y) | (~x & z)) 22 | #define G(x, y, z) ((x & z) | (y & ~z)) 23 | #define H(x, y, z) (x ^ y ^ z) 24 | #define I(x, y, z) (y ^ (x | ~z)) 25 | 26 | #define FF(a, b, c, d, m, s, t) \ 27 | { \ 28 | a += F(b, c, d) + m + t; \ 29 | a = b + ROTLEFT(a, s); \ 30 | } 31 | #define GG(a, b, c, d, m, s, t) \ 32 | { \ 33 | a += G(b, c, d) + m + t; \ 34 | a = b + ROTLEFT(a, s); \ 35 | } 36 | #define HH(a, b, c, d, m, s, t) \ 37 | { \ 38 | a += H(b, c, d) + m + t; \ 39 | a = b + ROTLEFT(a, s); \ 40 | } 41 | #define II(a, b, c, d, m, s, t) \ 42 | { \ 43 | a += I(b, c, d) + m + t; \ 44 | a = b + ROTLEFT(a, s); \ 45 | } 46 | 47 | /*********************** FUNCTION DEFINITIONS ***********************/ 48 | void md5_transform(MD5_CTX* ctx, const BYTE data[]) { 49 | WORD a, b, c, d, m[16], i, j; 50 | 51 | // MD5 specifies big endian byte order, but this implementation assumes a little 52 | // endian byte order CPU. Reverse all the bytes upon input, and re-reverse them 53 | // on output (in md5_final()). 54 | for (i = 0, j = 0; i < 16; ++i, j += 4) 55 | m[i] = (data[j]) + (data[j + 1] << 8) + (data[j + 2] << 16) + (data[j + 3] << 24); 56 | 57 | a = ctx->state[0]; 58 | b = ctx->state[1]; 59 | c = ctx->state[2]; 60 | d = ctx->state[3]; 61 | 62 | FF(a, b, c, d, m[0], 7, 0xd76aa478); 63 | FF(d, a, b, c, m[1], 12, 0xe8c7b756); 64 | FF(c, d, a, b, m[2], 17, 0x242070db); 65 | FF(b, c, d, a, m[3], 22, 0xc1bdceee); 66 | FF(a, b, c, d, m[4], 7, 0xf57c0faf); 67 | FF(d, a, b, c, m[5], 12, 0x4787c62a); 68 | FF(c, d, a, b, m[6], 17, 0xa8304613); 69 | FF(b, c, d, a, m[7], 22, 0xfd469501); 70 | FF(a, b, c, d, m[8], 7, 0x698098d8); 71 | FF(d, a, b, c, m[9], 12, 0x8b44f7af); 72 | FF(c, d, a, b, m[10], 17, 0xffff5bb1); 73 | FF(b, c, d, a, m[11], 22, 0x895cd7be); 74 | FF(a, b, c, d, m[12], 7, 0x6b901122); 75 | FF(d, a, b, c, m[13], 12, 0xfd987193); 76 | FF(c, d, a, b, m[14], 17, 0xa679438e); 77 | FF(b, c, d, a, m[15], 22, 0x49b40821); 78 | 79 | GG(a, b, c, d, m[1], 5, 0xf61e2562); 80 | GG(d, a, b, c, m[6], 9, 0xc040b340); 81 | GG(c, d, a, b, m[11], 14, 0x265e5a51); 82 | GG(b, c, d, a, m[0], 20, 0xe9b6c7aa); 83 | GG(a, b, c, d, m[5], 5, 0xd62f105d); 84 | GG(d, a, b, c, m[10], 9, 0x02441453); 85 | GG(c, d, a, b, m[15], 14, 0xd8a1e681); 86 | GG(b, c, d, a, m[4], 20, 0xe7d3fbc8); 87 | GG(a, b, c, d, m[9], 5, 0x21e1cde6); 88 | GG(d, a, b, c, m[14], 9, 0xc33707d6); 89 | GG(c, d, a, b, m[3], 14, 0xf4d50d87); 90 | GG(b, c, d, a, m[8], 20, 0x455a14ed); 91 | GG(a, b, c, d, m[13], 5, 0xa9e3e905); 92 | GG(d, a, b, c, m[2], 9, 0xfcefa3f8); 93 | GG(c, d, a, b, m[7], 14, 0x676f02d9); 94 | GG(b, c, d, a, m[12], 20, 0x8d2a4c8a); 95 | 96 | HH(a, b, c, d, m[5], 4, 0xfffa3942); 97 | HH(d, a, b, c, m[8], 11, 0x8771f681); 98 | HH(c, d, a, b, m[11], 16, 0x6d9d6122); 99 | HH(b, c, d, a, m[14], 23, 0xfde5380c); 100 | HH(a, b, c, d, m[1], 4, 0xa4beea44); 101 | HH(d, a, b, c, m[4], 11, 0x4bdecfa9); 102 | HH(c, d, a, b, m[7], 16, 0xf6bb4b60); 103 | HH(b, c, d, a, m[10], 23, 0xbebfbc70); 104 | HH(a, b, c, d, m[13], 4, 0x289b7ec6); 105 | HH(d, a, b, c, m[0], 11, 0xeaa127fa); 106 | HH(c, d, a, b, m[3], 16, 0xd4ef3085); 107 | HH(b, c, d, a, m[6], 23, 0x04881d05); 108 | HH(a, b, c, d, m[9], 4, 0xd9d4d039); 109 | HH(d, a, b, c, m[12], 11, 0xe6db99e5); 110 | HH(c, d, a, b, m[15], 16, 0x1fa27cf8); 111 | HH(b, c, d, a, m[2], 23, 0xc4ac5665); 112 | 113 | II(a, b, c, d, m[0], 6, 0xf4292244); 114 | II(d, a, b, c, m[7], 10, 0x432aff97); 115 | II(c, d, a, b, m[14], 15, 0xab9423a7); 116 | II(b, c, d, a, m[5], 21, 0xfc93a039); 117 | II(a, b, c, d, m[12], 6, 0x655b59c3); 118 | II(d, a, b, c, m[3], 10, 0x8f0ccc92); 119 | II(c, d, a, b, m[10], 15, 0xffeff47d); 120 | II(b, c, d, a, m[1], 21, 0x85845dd1); 121 | II(a, b, c, d, m[8], 6, 0x6fa87e4f); 122 | II(d, a, b, c, m[15], 10, 0xfe2ce6e0); 123 | II(c, d, a, b, m[6], 15, 0xa3014314); 124 | II(b, c, d, a, m[13], 21, 0x4e0811a1); 125 | II(a, b, c, d, m[4], 6, 0xf7537e82); 126 | II(d, a, b, c, m[11], 10, 0xbd3af235); 127 | II(c, d, a, b, m[2], 15, 0x2ad7d2bb); 128 | II(b, c, d, a, m[9], 21, 0xeb86d391); 129 | 130 | ctx->state[0] += a; 131 | ctx->state[1] += b; 132 | ctx->state[2] += c; 133 | ctx->state[3] += d; 134 | } 135 | 136 | void* md5_init() { 137 | MD5_CTX* ctx; 138 | ctx = malloc(sizeof(MD5_CTX)); 139 | ctx->datalen = 0; 140 | ctx->bitlen = 0; 141 | ctx->state[0] = 0x67452301; 142 | ctx->state[1] = 0xEFCDAB89; 143 | ctx->state[2] = 0x98BADCFE; 144 | ctx->state[3] = 0x10325476; 145 | return ctx; 146 | } 147 | 148 | void md5_update(MD5_CTX* ctx, const BYTE data[], size_t len) { 149 | size_t i; 150 | 151 | for (i = 0; i < len; ++i) { 152 | ctx->data[ctx->datalen] = data[i]; 153 | ctx->datalen++; 154 | if (ctx->datalen == 64) { 155 | md5_transform(ctx, ctx->data); 156 | ctx->bitlen += 512; 157 | ctx->datalen = 0; 158 | } 159 | } 160 | } 161 | 162 | int md5_final(MD5_CTX* ctx, BYTE hash[]) { 163 | size_t i; 164 | 165 | i = ctx->datalen; 166 | 167 | // Pad whatever data is left in the buffer. 168 | if (ctx->datalen < 56) { 169 | ctx->data[i++] = 0x80; 170 | while (i < 56) 171 | ctx->data[i++] = 0x00; 172 | } else if (ctx->datalen >= 56) { 173 | ctx->data[i++] = 0x80; 174 | while (i < 64) 175 | ctx->data[i++] = 0x00; 176 | md5_transform(ctx, ctx->data); 177 | memset(ctx->data, 0, 56); 178 | } 179 | 180 | // Append to the padding the total message's length in bits and transform. 181 | ctx->bitlen += ctx->datalen * 8; 182 | ctx->data[56] = ctx->bitlen; 183 | ctx->data[57] = ctx->bitlen >> 8; 184 | ctx->data[58] = ctx->bitlen >> 16; 185 | ctx->data[59] = ctx->bitlen >> 24; 186 | ctx->data[60] = ctx->bitlen >> 32; 187 | ctx->data[61] = ctx->bitlen >> 40; 188 | ctx->data[62] = ctx->bitlen >> 48; 189 | ctx->data[63] = ctx->bitlen >> 56; 190 | md5_transform(ctx, ctx->data); 191 | 192 | // Since this implementation uses little endian byte ordering and MD uses big endian, 193 | // reverse all the bytes when copying the final state to the output hash. 194 | for (i = 0; i < 4; ++i) { 195 | hash[i] = (ctx->state[0] >> (i * 8)) & 0x000000ff; 196 | hash[i + 4] = (ctx->state[1] >> (i * 8)) & 0x000000ff; 197 | hash[i + 8] = (ctx->state[2] >> (i * 8)) & 0x000000ff; 198 | hash[i + 12] = (ctx->state[3] >> (i * 8)) & 0x000000ff; 199 | } 200 | free(ctx); 201 | return MD5_BLOCK_SIZE; 202 | } 203 | -------------------------------------------------------------------------------- /src/crypto/sha1.c: -------------------------------------------------------------------------------- 1 | // Originally from the sha1 SQLite exension, Public Domain 2 | // https://sqlite.org/src/file/ext/misc/sha1.c 3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License 4 | 5 | #include "sha1.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define SHA_ROT(x, l, r) ((x) << (l) | (x) >> (r)) 13 | #define rol(x, k) SHA_ROT(x, k, 32 - (k)) 14 | #define ror(x, k) SHA_ROT(x, 32 - (k), k) 15 | 16 | #define blk0le(i) (block[i] = (ror(block[i], 8) & 0xFF00FF00) | (rol(block[i], 8) & 0x00FF00FF)) 17 | #define blk0be(i) block[i] 18 | #define blk(i) \ 19 | (block[i & 15] = \ 20 | rol(block[(i + 13) & 15] ^ block[(i + 8) & 15] ^ block[(i + 2) & 15] ^ block[i & 15], 1)) 21 | 22 | /* 23 | * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1 24 | * 25 | * Rl0() for little-endian and Rb0() for big-endian. Endianness is 26 | * determined at run-time. 27 | */ 28 | #define Rl0(v, w, x, y, z, i) \ 29 | z += ((w & (x ^ y)) ^ y) + blk0le(i) + 0x5A827999 + rol(v, 5); \ 30 | w = ror(w, 2); 31 | #define Rb0(v, w, x, y, z, i) \ 32 | z += ((w & (x ^ y)) ^ y) + blk0be(i) + 0x5A827999 + rol(v, 5); \ 33 | w = ror(w, 2); 34 | #define R1(v, w, x, y, z, i) \ 35 | z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + rol(v, 5); \ 36 | w = ror(w, 2); 37 | #define R2(v, w, x, y, z, i) \ 38 | z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + rol(v, 5); \ 39 | w = ror(w, 2); 40 | #define R3(v, w, x, y, z, i) \ 41 | z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + rol(v, 5); \ 42 | w = ror(w, 2); 43 | #define R4(v, w, x, y, z, i) \ 44 | z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + rol(v, 5); \ 45 | w = ror(w, 2); 46 | 47 | /* 48 | * Hash a single 512-bit block. This is the core of the algorithm. 49 | */ 50 | void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]) { 51 | unsigned int qq[5]; /* a, b, c, d, e; */ 52 | static int one = 1; 53 | unsigned int block[16]; 54 | memcpy(block, buffer, 64); 55 | memcpy(qq, state, 5 * sizeof(unsigned int)); 56 | 57 | #define a qq[0] 58 | #define b qq[1] 59 | #define c qq[2] 60 | #define d qq[3] 61 | #define e qq[4] 62 | 63 | /* Copy ctx->state[] to working vars */ 64 | /* 65 | a = state[0]; 66 | b = state[1]; 67 | c = state[2]; 68 | d = state[3]; 69 | e = state[4]; 70 | */ 71 | 72 | /* 4 rounds of 20 operations each. Loop unrolled. */ 73 | if (1 == *(unsigned char*)&one) { 74 | Rl0(a, b, c, d, e, 0); 75 | Rl0(e, a, b, c, d, 1); 76 | Rl0(d, e, a, b, c, 2); 77 | Rl0(c, d, e, a, b, 3); 78 | Rl0(b, c, d, e, a, 4); 79 | Rl0(a, b, c, d, e, 5); 80 | Rl0(e, a, b, c, d, 6); 81 | Rl0(d, e, a, b, c, 7); 82 | Rl0(c, d, e, a, b, 8); 83 | Rl0(b, c, d, e, a, 9); 84 | Rl0(a, b, c, d, e, 10); 85 | Rl0(e, a, b, c, d, 11); 86 | Rl0(d, e, a, b, c, 12); 87 | Rl0(c, d, e, a, b, 13); 88 | Rl0(b, c, d, e, a, 14); 89 | Rl0(a, b, c, d, e, 15); 90 | } else { 91 | Rb0(a, b, c, d, e, 0); 92 | Rb0(e, a, b, c, d, 1); 93 | Rb0(d, e, a, b, c, 2); 94 | Rb0(c, d, e, a, b, 3); 95 | Rb0(b, c, d, e, a, 4); 96 | Rb0(a, b, c, d, e, 5); 97 | Rb0(e, a, b, c, d, 6); 98 | Rb0(d, e, a, b, c, 7); 99 | Rb0(c, d, e, a, b, 8); 100 | Rb0(b, c, d, e, a, 9); 101 | Rb0(a, b, c, d, e, 10); 102 | Rb0(e, a, b, c, d, 11); 103 | Rb0(d, e, a, b, c, 12); 104 | Rb0(c, d, e, a, b, 13); 105 | Rb0(b, c, d, e, a, 14); 106 | Rb0(a, b, c, d, e, 15); 107 | } 108 | R1(e, a, b, c, d, 16); 109 | R1(d, e, a, b, c, 17); 110 | R1(c, d, e, a, b, 18); 111 | R1(b, c, d, e, a, 19); 112 | R2(a, b, c, d, e, 20); 113 | R2(e, a, b, c, d, 21); 114 | R2(d, e, a, b, c, 22); 115 | R2(c, d, e, a, b, 23); 116 | R2(b, c, d, e, a, 24); 117 | R2(a, b, c, d, e, 25); 118 | R2(e, a, b, c, d, 26); 119 | R2(d, e, a, b, c, 27); 120 | R2(c, d, e, a, b, 28); 121 | R2(b, c, d, e, a, 29); 122 | R2(a, b, c, d, e, 30); 123 | R2(e, a, b, c, d, 31); 124 | R2(d, e, a, b, c, 32); 125 | R2(c, d, e, a, b, 33); 126 | R2(b, c, d, e, a, 34); 127 | R2(a, b, c, d, e, 35); 128 | R2(e, a, b, c, d, 36); 129 | R2(d, e, a, b, c, 37); 130 | R2(c, d, e, a, b, 38); 131 | R2(b, c, d, e, a, 39); 132 | R3(a, b, c, d, e, 40); 133 | R3(e, a, b, c, d, 41); 134 | R3(d, e, a, b, c, 42); 135 | R3(c, d, e, a, b, 43); 136 | R3(b, c, d, e, a, 44); 137 | R3(a, b, c, d, e, 45); 138 | R3(e, a, b, c, d, 46); 139 | R3(d, e, a, b, c, 47); 140 | R3(c, d, e, a, b, 48); 141 | R3(b, c, d, e, a, 49); 142 | R3(a, b, c, d, e, 50); 143 | R3(e, a, b, c, d, 51); 144 | R3(d, e, a, b, c, 52); 145 | R3(c, d, e, a, b, 53); 146 | R3(b, c, d, e, a, 54); 147 | R3(a, b, c, d, e, 55); 148 | R3(e, a, b, c, d, 56); 149 | R3(d, e, a, b, c, 57); 150 | R3(c, d, e, a, b, 58); 151 | R3(b, c, d, e, a, 59); 152 | R4(a, b, c, d, e, 60); 153 | R4(e, a, b, c, d, 61); 154 | R4(d, e, a, b, c, 62); 155 | R4(c, d, e, a, b, 63); 156 | R4(b, c, d, e, a, 64); 157 | R4(a, b, c, d, e, 65); 158 | R4(e, a, b, c, d, 66); 159 | R4(d, e, a, b, c, 67); 160 | R4(c, d, e, a, b, 68); 161 | R4(b, c, d, e, a, 69); 162 | R4(a, b, c, d, e, 70); 163 | R4(e, a, b, c, d, 71); 164 | R4(d, e, a, b, c, 72); 165 | R4(c, d, e, a, b, 73); 166 | R4(b, c, d, e, a, 74); 167 | R4(a, b, c, d, e, 75); 168 | R4(e, a, b, c, d, 76); 169 | R4(d, e, a, b, c, 77); 170 | R4(c, d, e, a, b, 78); 171 | R4(b, c, d, e, a, 79); 172 | 173 | /* Add the working vars back into context.state[] */ 174 | state[0] += a; 175 | state[1] += b; 176 | state[2] += c; 177 | state[3] += d; 178 | state[4] += e; 179 | 180 | #undef a 181 | #undef b 182 | #undef c 183 | #undef d 184 | #undef e 185 | } 186 | 187 | /* Initialize a SHA1 context */ 188 | void* sha1_init() { 189 | /* SHA1 initialization constants */ 190 | SHA1Context* ctx; 191 | ctx = malloc(sizeof(SHA1Context)); 192 | ctx->state[0] = 0x67452301; 193 | ctx->state[1] = 0xEFCDAB89; 194 | ctx->state[2] = 0x98BADCFE; 195 | ctx->state[3] = 0x10325476; 196 | ctx->state[4] = 0xC3D2E1F0; 197 | ctx->count[0] = ctx->count[1] = 0; 198 | return ctx; 199 | } 200 | 201 | /* Add new content to the SHA1 hash */ 202 | void sha1_update(SHA1Context* ctx, const unsigned char* data, size_t len) { 203 | unsigned int i, j; 204 | 205 | j = ctx->count[0]; 206 | if ((ctx->count[0] += len << 3) < j) { 207 | ctx->count[1] += (len >> 29) + 1; 208 | } 209 | j = (j >> 3) & 63; 210 | if ((j + len) > 63) { 211 | (void)memcpy(&ctx->buffer[j], data, (i = 64 - j)); 212 | SHA1Transform(ctx->state, ctx->buffer); 213 | for (; i + 63 < len; i += 64) { 214 | SHA1Transform(ctx->state, &data[i]); 215 | } 216 | j = 0; 217 | } else { 218 | i = 0; 219 | } 220 | (void)memcpy(&ctx->buffer[j], &data[i], len - i); 221 | } 222 | 223 | int sha1_final(SHA1Context* ctx, unsigned char hash[]) { 224 | unsigned int i; 225 | unsigned char finalcount[8]; 226 | static const char zEncode[] = "0123456789abcdef"; 227 | 228 | for (i = 0; i < 8; i++) { 229 | finalcount[i] = (unsigned char)((ctx->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 230 | 255); /* Endian independent */ 231 | } 232 | sha1_update(ctx, (const unsigned char*)"\200", 1); 233 | while ((ctx->count[0] & 504) != 448) { 234 | sha1_update(ctx, (const unsigned char*)"\0", 1); 235 | } 236 | sha1_update(ctx, finalcount, 8); /* Should cause a SHA1Transform() */ 237 | for (i = 0; i < 20; i++) { 238 | hash[i] = (unsigned char)((ctx->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255); 239 | } 240 | free(ctx); 241 | return SHA1_BLOCK_SIZE; 242 | } -------------------------------------------------------------------------------- /src/sqlite3-ipaddr.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Vincent Bernat, MIT License 2 | // https://github.com/nalgeon/sqlean 3 | 4 | /* 5 | * SQLite IP address functions. 6 | */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "sqlite3ext.h" 17 | 18 | SQLITE_EXTENSION_INIT1 19 | 20 | struct ipaddress { 21 | int af; 22 | union { 23 | struct in6_addr ipv6; 24 | struct in_addr ipv4; 25 | }; 26 | unsigned masklen; 27 | }; 28 | 29 | static struct ipaddress* parse_ipaddress(const char* address) { 30 | struct ipaddress* ip = NULL; 31 | unsigned char buf[sizeof(struct in6_addr)]; 32 | char* sep = strchr(address, '/'); 33 | unsigned long masklen; 34 | if (sep) { 35 | char* end; 36 | errno = 0; 37 | masklen = strtoul(sep + 1, &end, 10); 38 | if (errno != 0 || sep + 1 == end || *end != '\0') 39 | return NULL; 40 | *sep = '\0'; 41 | } 42 | if (inet_pton(AF_INET, address, buf)) { 43 | if (sep && masklen > 32) 44 | goto end; 45 | 46 | ip = sqlite3_malloc(sizeof(struct ipaddress)); 47 | memcpy(&ip->ipv4, buf, sizeof(struct in_addr)); 48 | ip->af = AF_INET; 49 | ip->masklen = sep ? masklen : 32; 50 | } else if (inet_pton(AF_INET6, address, buf)) { 51 | if (sep && masklen > 128) 52 | goto end; 53 | 54 | ip = sqlite3_malloc(sizeof(struct ipaddress)); 55 | memcpy(&ip->ipv6, buf, sizeof(struct in6_addr)); 56 | ip->af = AF_INET6; 57 | ip->masklen = sep ? masklen : 128; 58 | } 59 | end: 60 | if (sep) 61 | *sep = '/'; 62 | return ip; 63 | } 64 | 65 | static void sqlite3_ipfamily(sqlite3_context* context, int argc, sqlite3_value** argv) { 66 | assert(argc == 1); 67 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 68 | sqlite3_result_null(context); 69 | return; 70 | } 71 | const char* address = (char*)sqlite3_value_text(argv[0]); 72 | struct ipaddress* ip = parse_ipaddress(address); 73 | if (ip == NULL) { 74 | sqlite3_result_null(context); 75 | return; 76 | } 77 | sqlite3_result_int(context, ip->af == AF_INET ? 4 : 6); 78 | sqlite3_free(ip); 79 | } 80 | 81 | static void sqlite3_iphost(sqlite3_context* context, int argc, sqlite3_value** argv) { 82 | assert(argc == 1); 83 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 84 | sqlite3_result_null(context); 85 | return; 86 | } 87 | const char* address = (char*)sqlite3_value_text(argv[0]); 88 | struct ipaddress* ip = parse_ipaddress(address); 89 | if (ip == NULL) { 90 | sqlite3_result_null(context); 91 | return; 92 | } 93 | if (ip->af == AF_INET) { 94 | char* result = sqlite3_malloc(INET_ADDRSTRLEN); 95 | inet_ntop(AF_INET, &ip->ipv4, result, INET_ADDRSTRLEN); 96 | sqlite3_result_text(context, result, -1, sqlite3_free); 97 | } else if (ip->af == AF_INET6) { 98 | char* result = sqlite3_malloc(INET6_ADDRSTRLEN); 99 | inet_ntop(AF_INET6, &ip->ipv6, result, INET6_ADDRSTRLEN); 100 | sqlite3_result_text(context, result, -1, sqlite3_free); 101 | } 102 | sqlite3_free(ip); 103 | } 104 | 105 | static void sqlite3_ipmasklen(sqlite3_context* context, int argc, sqlite3_value** argv) { 106 | assert(argc == 1); 107 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 108 | sqlite3_result_null(context); 109 | return; 110 | } 111 | const char* address = (char*)sqlite3_value_text(argv[0]); 112 | struct ipaddress* ip = parse_ipaddress(address); 113 | if (ip == NULL) { 114 | sqlite3_result_null(context); 115 | return; 116 | } 117 | sqlite3_result_int(context, ip->masklen); 118 | return; 119 | } 120 | 121 | static void sqlite3_ipnetwork(sqlite3_context* context, int argc, sqlite3_value** argv) { 122 | assert(argc == 1); 123 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL) { 124 | sqlite3_result_null(context); 125 | return; 126 | } 127 | const char* address = (char*)sqlite3_value_text(argv[0]); 128 | struct ipaddress* ip = parse_ipaddress(address); 129 | if (ip == NULL) { 130 | sqlite3_result_null(context); 131 | return; 132 | } 133 | if (ip->af == AF_INET) { 134 | char buf[INET_ADDRSTRLEN]; 135 | ip->ipv4.s_addr = 136 | htonl(ntohl(ip->ipv4.s_addr) & ~(uint32_t)((1ULL << (32 - ip->masklen)) - 1)); 137 | inet_ntop(AF_INET, &ip->ipv4, buf, INET_ADDRSTRLEN); 138 | char* result = sqlite3_malloc(INET_ADDRSTRLEN + 3); 139 | sprintf(result, "%s/%u", buf, ip->masklen); 140 | sqlite3_result_text(context, result, -1, sqlite3_free); 141 | } else if (ip->af == AF_INET6) { 142 | char buf[INET6_ADDRSTRLEN]; 143 | for (unsigned i = 0; i < 16; i++) { 144 | if (ip->masklen / 8 < i) 145 | ip->ipv6.s6_addr[i] = 0; 146 | else if (ip->masklen / 8 == i) 147 | ip->ipv6.s6_addr[i] &= ~(ip->masklen % 8); 148 | } 149 | inet_ntop(AF_INET6, &ip->ipv6, buf, INET6_ADDRSTRLEN); 150 | char* result = sqlite3_malloc(INET6_ADDRSTRLEN + 4); 151 | sprintf(result, "%s/%u", buf, ip->masklen); 152 | sqlite3_result_text(context, result, -1, sqlite3_free); 153 | } 154 | sqlite3_free(ip); 155 | } 156 | 157 | static void sqlite3_ipcontains(sqlite3_context* context, int argc, sqlite3_value** argv) { 158 | assert(argc == 2); 159 | if (sqlite3_value_type(argv[0]) == SQLITE_NULL || sqlite3_value_type(argv[1]) == SQLITE_NULL) { 160 | sqlite3_result_null(context); 161 | return; 162 | } 163 | 164 | const char* address1 = (char*)sqlite3_value_text(argv[0]); 165 | struct ipaddress* ip1 = parse_ipaddress(address1); 166 | const char* address2 = (char*)sqlite3_value_text(argv[1]); 167 | struct ipaddress* ip2 = parse_ipaddress(address2); 168 | if (ip1 == NULL || ip2 == NULL) { 169 | sqlite3_result_null(context); 170 | goto end; 171 | } 172 | if (ip1->af != ip2->af || ip1->masklen > ip2->masklen) { 173 | sqlite3_result_int(context, 0); 174 | goto end; 175 | } 176 | 177 | if (ip1->af == AF_INET) { 178 | ip1->ipv4.s_addr = 179 | htonl(ntohl(ip1->ipv4.s_addr) & ~(uint32_t)((1ULL << (32 - ip1->masklen)) - 1)); 180 | ip2->ipv4.s_addr = 181 | htonl(ntohl(ip2->ipv4.s_addr) & ~(uint32_t)((1ULL << (32 - ip1->masklen)) - 1)); 182 | sqlite3_result_int(context, ip1->ipv4.s_addr == ip2->ipv4.s_addr); 183 | goto end; 184 | } 185 | if (ip1->af == AF_INET6) { 186 | for (unsigned i = 0; i < 16; i++) { 187 | if (ip1->masklen / 8 < i) { 188 | ip1->ipv6.s6_addr[i] = 0; 189 | ip2->ipv6.s6_addr[i] = 0; 190 | } else if (ip1->masklen / 8 == i) { 191 | ip1->ipv6.s6_addr[i] &= ~(ip1->masklen % 8); 192 | ip2->ipv6.s6_addr[i] &= ~(ip1->masklen % 8); 193 | } 194 | if (ip1->ipv6.s6_addr[i] != ip2->ipv6.s6_addr[i]) { 195 | sqlite3_result_int(context, 0); 196 | goto end; 197 | } 198 | } 199 | sqlite3_result_int(context, 1); 200 | } 201 | end: 202 | sqlite3_free(ip1); 203 | sqlite3_free(ip2); 204 | } 205 | 206 | /* 207 | * Registers the extension. 208 | */ 209 | #ifdef _WIN32 210 | __declspec(dllexport) 211 | #endif 212 | int sqlite3_ipaddr_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) { 213 | SQLITE_EXTENSION_INIT2(pApi); 214 | static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC; 215 | sqlite3_create_function(db, "ipfamily", 1, flags, 0, sqlite3_ipfamily, 0, 0); 216 | sqlite3_create_function(db, "iphost", 1, flags, 0, sqlite3_iphost, 0, 0); 217 | sqlite3_create_function(db, "ipmasklen", 1, flags, 0, sqlite3_ipmasklen, 0, 0); 218 | sqlite3_create_function(db, "ipnetwork", 1, flags, 0, sqlite3_ipnetwork, 0, 0); 219 | sqlite3_create_function(db, "ipcontains", 2, flags, 0, sqlite3_ipcontains, 0, 0); 220 | 221 | return SQLITE_OK; 222 | } 223 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # All the missing SQLite functions 2 | 3 | SQLite has few functions compared to other database management systems. SQLite authors see this as a feature rather than a problem, because SQLite has an extension mechanism in place. 4 | 5 | There are a lot of SQLite extensions out there, but they are incomplete, inconsistent and scattered across the internet. `sqlean` brings them together, neatly packaged into domain modules, documented, tested, and built for Linux, Windows and macOS. 6 | 7 | We do not try to gather all the existing extensions into one giant pile — that would not be very useful. The goal is to create a well-thought set of domain modules with a convenient API. A kind of standard library for SQLite. 8 | 9 | To achieve it, we split extensions that are too broad, merge the ones that are too narrow, refactor, add missing features, test, document, and do a ton of other small things. 10 | 11 | ## The main set 12 | 13 | These are the most popular functions. They are tested, documented and organized into the domain modules with clear API. 14 | 15 | Think of them as the extended standard library for SQLite: 16 | 17 | - [crypto](docs/crypto.md): secure hashes 18 | - [fileio](docs/fileio.md): read and write files 19 | - [fuzzy](docs/fuzzy.md): fuzzy string matching and phonetics 20 | - [ipaddr](docs/ipaddr.md): IP address manipulation 21 | - [json1](docs/json1.md): JSON functions 22 | - [math](docs/math.md): math functions 23 | - [re](docs/re.md): regular expressions 24 | - [stats](docs/stats.md): math statistics 25 | - [text](docs/text.md): string functions 26 | - [unicode](docs/unicode.md): Unicode support 27 | - [uuid](docs/uuid.md): Universally Unique IDentifiers 28 | - [vsv](docs/vsv.md): CSV files as virtual tables 29 | 30 | ## The incubator 31 | 32 | These extensions haven't yet made their way to the main set. They may be untested, poorly documented, too broad, too narrow, or without a well-thought API. 33 | 34 | Think of them as candidates for the standard library: 35 | 36 | - [array](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1004109889): one-dimensional arrays 37 | - [besttype](https://github.com/nalgeon/sqlean/issues/27#issuecomment-999732640): convert string value to numeric 38 | - [bloom](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1002267134): a fast way to tell if a value is already in a table 39 | - [btreeinfo](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1004896027), [memstat](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007421989), [recsize](https://github.com/nalgeon/sqlean/issues/27#issuecomment-999732907) and [stmt](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007654407): various database introspection features 40 | - [cbrt](https://github.com/nalgeon/sqlean/issues/27#issuecomment-996605444) and [math2](https://github.com/nalgeon/sqlean/issues/27#issuecomment-999128539): additional math functions and bit arithmetics 41 | - [classifier](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1001239676): binary classifier via logistic regression 42 | - [closure](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1004931771): navigate hierarchic tables with parent/child relationships 43 | - [compress](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1000937999) and [sqlar](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1000938046): compress / uncompress data 44 | - [cron](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997427979): match dates against cron patterns 45 | - [dbdump](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1006791300): export database as SQL 46 | - [decimal](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007348326), [fcmp](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997482625) and [ieee754](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007375162): decimal and floating-point arithmetic 47 | - [define](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1004347222): create scalar and table-valued functions from SQL 48 | - [envfuncs](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997423609): read environment variables 49 | - [eval](https://github.com/nalgeon/sqlean/issues/27#issuecomment-996432840): run arbitrary SQL statements 50 | - [isodate](https://github.com/nalgeon/sqlean/issues/27#issuecomment-998138191): additional date and time functions 51 | - [pearson](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997417836): Pearson correlation coefficient between two data sets 52 | - [pivotvtab](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997052157): pivot tables 53 | - [prefixes](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007464840): generate string prefixes 54 | - [rotate](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007500659): string obfuscation 55 | - [spellfix](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1002297477): search a large vocabulary for close matches 56 | - [stats2](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1000902666) and [stats3](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1002703581): additional math statistics functions 57 | - [text2](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1003105288): additional string functions 58 | - [uint](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1001232670): natural string sorting and comparison 59 | - [unhex](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997432989): reverse for `hex()` 60 | - [unionvtab](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007687162): union similar tables into one 61 | - [xmltojson](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997018486): convert XML to JSON string 62 | - [zipfile](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1001190336): read and write zip files 63 | - [zorder](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007733209): map multidimensional data to a single dimension 64 | 65 | [Vote for your favorites](https://github.com/nalgeon/sqlean/issues/27)! We'll refactor and merge popular ones into the main set. 66 | 67 | ## Download 68 | 69 | There are [precompiled binaries](https://github.com/nalgeon/sqlean/releases/latest) for every OS: 70 | 71 | - `*.dll` - for Windows 72 | - `*.so` - for Linux 73 | - `*.dylib` - for macOS 74 | 75 | Binaries are 64-bit and require a 64-bit SQLite version. If you are using SQLite shell on Windows (`sqlite.exe`), its 64-bit version is available at https://github.com/nalgeon/sqlite. 76 | 77 | Incubator extensions are [also available](https://github.com/nalgeon/sqlean/releases/tag/incubator). 78 | 79 | ## Usage 80 | 81 | CLI usage: 82 | 83 | ``` 84 | sqlite> .load ./stats 85 | sqlite> select median(value) from generate_series(1, 99); 86 | ``` 87 | 88 | IDE usage: 89 | 90 | ``` 91 | select load_extension('c:\Users\anton\sqlite\stats.dll'); 92 | select median(value) from generate_series(1, 99); 93 | ``` 94 | 95 | In-app usage: 96 | 97 | ```python 98 | import sqlite3 99 | 100 | connection = sqlite3.connect(":memory:") 101 | connection.enable_load_extension(True) 102 | connection.load_extension("./stats.so") 103 | connection.execute("select median(value) from generate_series(1, 99)") 104 | connection.close() 105 | ``` 106 | 107 | You can specify any other supported extension instead of `stats`. 108 | 109 | ## Contributing 110 | 111 | Contributions are welcome! Submit your own or third-party extension to the incubator: 112 | 113 | - [How to submit your extension](https://github.com/nalgeon/sqlean/blob/incubator/docs/submit.md) 114 | - [How to submit a third-party extension](https://github.com/nalgeon/sqlean/blob/incubator/docs/external.md) 115 | 116 | We want every extension to be self-contained. So we limit the project scope to extensions without external dependencies (other than the C standard library and SQLite itself). 117 | 118 | Please note that we only accept extensions with permissive licenses (MIT License, Apache License etc) or public domain. Copyleft licenses like GPL won't do. 119 | 120 | ## License 121 | 122 | Copyright 2021+ [Anton Zhiyanov](https://antonz.org/), [Contributors](https://github.com/nalgeon/sqlean/graphs/contributors) and [Third-party Authors](docs/third-party.md). 123 | 124 | The software is available under the MIT License. 125 | 126 | ## Stay tuned 127 | 128 | Follow [**@ohmypy**](https://twitter.com/ohmypy) on Twitter to keep up with new features 🚀 129 | -------------------------------------------------------------------------------- /src/sqlite3-uuid.c: -------------------------------------------------------------------------------- 1 | // Originally from the uuid SQLite exension, Public Domain 2 | // https://www.sqlite.org/src/file/ext/misc/uuid.c 3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License 4 | 5 | /* 6 | * This SQLite extension implements functions that handling RFC-4122 UUIDs 7 | * Three SQL functions are implemented: 8 | * 9 | * uuid4() - generate a version 4 UUID as a string 10 | * uuid_str(X) - convert a UUID X into a well-formed UUID string 11 | * uuid_blob(X) - convert a UUID X into a 16-byte blob 12 | * 13 | * The output from uuid4() and uuid_str(X) are always well-formed RFC-4122 14 | * UUID strings in this format: 15 | * 16 | * xxxxxxxx-xxxx-Mxxx-Nxxx-xxxxxxxxxxxx 17 | * 18 | * All of the 'x', 'M', and 'N' values are lower-case hexadecimal digits. 19 | * The M digit indicates the "version". For uuid4()-generated UUIDs, the 20 | * version is always "4" (a random UUID). The upper three bits of N digit 21 | * are the "variant". This library only supports variant 1 (indicated 22 | * by values of N between '8' and 'b') as those are overwhelming the most 23 | * common. Other variants are for legacy compatibility only. 24 | * 25 | * The output of uuid_blob(X) is always a 16-byte blob. The UUID input 26 | * string is converted in network byte order (big-endian) in accordance 27 | * with RFC-4122 specifications for variant-1 UUIDs. Note that network 28 | * byte order is *always* used, even if the input self-identifies as a 29 | * variant-2 UUID. 30 | * 31 | * The input X to the uuid_str() and uuid_blob() functions can be either 32 | * a string or a BLOB. If it is a BLOB it must be exactly 16 bytes in 33 | * length or else a NULL is returned. If the input is a string it must 34 | * consist of 32 hexadecimal digits, upper or lower case, optionally 35 | * surrounded by {...} and with optional "-" characters interposed in the 36 | * middle. The flexibility of input is inspired by the PostgreSQL 37 | * implementation of UUID functions that accept in all of the following 38 | * formats: 39 | * 40 | * A0EEBC99-9C0B-4EF8-BB6D-6BB9BD380A11 41 | * {a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11} 42 | * a0eebc999c0b4ef8bb6d6bb9bd380a11 43 | * a0ee-bc99-9c0b-4ef8-bb6d-6bb9-bd38-0a11 44 | * {a0eebc99-9c0b4ef8-bb6d6bb9-bd380a11} 45 | * 46 | * If any of the above inputs are passed into uuid_str(), the output will 47 | * always be in the canonical RFC-4122 format: 48 | * 49 | * a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 50 | * 51 | * If the X input string has too few or too many digits or contains 52 | * stray characters other than {, }, or -, then NULL is returned. 53 | */ 54 | #include "sqlite3ext.h" 55 | SQLITE_EXTENSION_INIT1 56 | #include 57 | #include 58 | #include 59 | 60 | #if !defined(SQLITE_ASCII) && !defined(SQLITE_EBCDIC) 61 | #define SQLITE_ASCII 1 62 | #endif 63 | 64 | /* 65 | * Translate a single byte of Hex into an integer. 66 | * This routine only works if h really is a valid hexadecimal 67 | * character: 0..9a..fA..F 68 | */ 69 | static unsigned char sqlite3UuidHexToInt(int h) { 70 | assert((h >= '0' && h <= '9') || (h >= 'a' && h <= 'f') || (h >= 'A' && h <= 'F')); 71 | #ifdef SQLITE_ASCII 72 | h += 9 * (1 & (h >> 6)); 73 | #endif 74 | #ifdef SQLITE_EBCDIC 75 | h += 9 * (1 & ~(h >> 4)); 76 | #endif 77 | return (unsigned char)(h & 0xf); 78 | } 79 | 80 | /* 81 | * Convert a 16-byte BLOB into a well-formed RFC-4122 UUID. The output 82 | * buffer zStr should be at least 37 bytes in length. The output will 83 | * be zero-terminated. 84 | */ 85 | static void sqlite3_uuid_blob_to_str(const unsigned char* aBlob, /* Input blob */ 86 | unsigned char* zStr /* Write the answer here */ 87 | ) { 88 | static const char zDigits[] = "0123456789abcdef"; 89 | int i, k; 90 | unsigned char x; 91 | k = 0; 92 | for (i = 0, k = 0x550; i < 16; i++, k = k >> 1) { 93 | if (k & 1) { 94 | zStr[0] = '-'; 95 | zStr++; 96 | } 97 | x = aBlob[i]; 98 | zStr[0] = zDigits[x >> 4]; 99 | zStr[1] = zDigits[x & 0xf]; 100 | zStr += 2; 101 | } 102 | *zStr = 0; 103 | } 104 | 105 | /* 106 | * Attempt to parse a zero-terminated input string zStr into a binary 107 | * UUID. Return 0 on success, or non-zero if the input string is not 108 | * parsable. 109 | */ 110 | static int sqlite3_uuid_str_to_blob(const unsigned char* zStr, /* Input string */ 111 | unsigned char* aBlob /* Write results here */ 112 | ) { 113 | int i; 114 | if (zStr[0] == '{') 115 | zStr++; 116 | for (i = 0; i < 16; i++) { 117 | if (zStr[0] == '-') 118 | zStr++; 119 | if (isxdigit(zStr[0]) && isxdigit(zStr[1])) { 120 | aBlob[i] = (sqlite3UuidHexToInt(zStr[0]) << 4) + sqlite3UuidHexToInt(zStr[1]); 121 | zStr += 2; 122 | } else { 123 | return 1; 124 | } 125 | } 126 | if (zStr[0] == '}') 127 | zStr++; 128 | return zStr[0] != 0; 129 | } 130 | 131 | /* 132 | * Render sqlite3_value pIn as a 16-byte UUID blob. Return a pointer 133 | * to the blob, or NULL if the input is not well-formed. 134 | */ 135 | static const unsigned char* sqlite3_uuid_input_to_blob(sqlite3_value* pIn, /* Input text */ 136 | unsigned char* pBuf /* output buffer */ 137 | ) { 138 | switch (sqlite3_value_type(pIn)) { 139 | case SQLITE_TEXT: { 140 | const unsigned char* z = sqlite3_value_text(pIn); 141 | if (sqlite3_uuid_str_to_blob(z, pBuf)) 142 | return 0; 143 | return pBuf; 144 | } 145 | case SQLITE_BLOB: { 146 | int n = sqlite3_value_bytes(pIn); 147 | return n == 16 ? sqlite3_value_blob(pIn) : 0; 148 | } 149 | default: { 150 | return 0; 151 | } 152 | } 153 | } 154 | 155 | /* 156 | * sqlite3_uuid generates a version 4 UUID as a string 157 | */ 158 | static void sqlite3_uuid(sqlite3_context* context, int argc, sqlite3_value** argv) { 159 | unsigned char aBlob[16]; 160 | unsigned char zStr[37]; 161 | (void)argc; 162 | (void)argv; 163 | sqlite3_randomness(16, aBlob); 164 | aBlob[6] = (aBlob[6] & 0x0f) + 0x40; 165 | aBlob[8] = (aBlob[8] & 0x3f) + 0x80; 166 | sqlite3_uuid_blob_to_str(aBlob, zStr); 167 | sqlite3_result_text(context, (char*)zStr, 36, SQLITE_TRANSIENT); 168 | } 169 | 170 | /* 171 | * sqlite3_uuid_str converts a UUID X into a well-formed UUID string. 172 | * X can be either a string or a blob. 173 | */ 174 | static void sqlite3_uuid_str(sqlite3_context* context, int argc, sqlite3_value** argv) { 175 | unsigned char aBlob[16]; 176 | unsigned char zStr[37]; 177 | const unsigned char* pBlob; 178 | (void)argc; 179 | pBlob = sqlite3_uuid_input_to_blob(argv[0], aBlob); 180 | if (pBlob == 0) 181 | return; 182 | sqlite3_uuid_blob_to_str(pBlob, zStr); 183 | sqlite3_result_text(context, (char*)zStr, 36, SQLITE_TRANSIENT); 184 | } 185 | 186 | /* 187 | * sqlite3_uuid_blob converts a UUID X into a 16-byte blob. 188 | * X can be either a string or a blob. 189 | */ 190 | static void sqlite3_uuid_blob(sqlite3_context* context, int argc, sqlite3_value** argv) { 191 | unsigned char aBlob[16]; 192 | const unsigned char* pBlob; 193 | (void)argc; 194 | pBlob = sqlite3_uuid_input_to_blob(argv[0], aBlob); 195 | if (pBlob == 0) 196 | return; 197 | sqlite3_result_blob(context, pBlob, 16, SQLITE_TRANSIENT); 198 | } 199 | 200 | #ifdef _WIN32 201 | __declspec(dllexport) 202 | #endif 203 | int sqlite3_uuid_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) { 204 | SQLITE_EXTENSION_INIT2(pApi); 205 | (void)pzErrMsg; /* Unused parameter */ 206 | sqlite3_create_function(db, "uuid4", 0, SQLITE_UTF8 | SQLITE_INNOCUOUS, 0, sqlite3_uuid, 0, 0); 207 | /* for postgresql compatibility */ 208 | sqlite3_create_function(db, "gen_random_uuid", 0, SQLITE_UTF8 | SQLITE_INNOCUOUS, 0, 209 | sqlite3_uuid, 0, 0); 210 | static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC; 211 | sqlite3_create_function(db, "uuid_str", 1, flags, 0, sqlite3_uuid_str, 0, 0); 212 | sqlite3_create_function(db, "uuid_blob", 1, flags, 0, sqlite3_uuid_blob, 0, 0); 213 | return SQLITE_OK; 214 | } 215 | -------------------------------------------------------------------------------- /src/fuzzy/editdist.c: -------------------------------------------------------------------------------- 1 | // Originally from the spellfix SQLite exension, Public Domain 2 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c 3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License 4 | 5 | #include 6 | #include 7 | 8 | #include "common.h" 9 | 10 | extern const unsigned char midClass[]; 11 | extern const unsigned char initClass[]; 12 | extern const unsigned char className[]; 13 | 14 | /* 15 | ** Return the character class number for a character given its 16 | ** context. 17 | */ 18 | static char characterClass(char cPrev, char c) { 19 | return cPrev == 0 ? initClass[c & 0x7f] : midClass[c & 0x7f]; 20 | } 21 | 22 | /* 23 | ** Return the cost of inserting or deleting character c immediately 24 | ** following character cPrev. If cPrev==0, that means c is the first 25 | ** character of the word. 26 | */ 27 | static int insertOrDeleteCost(char cPrev, char c, char cNext) { 28 | char classC = characterClass(cPrev, c); 29 | char classCprev; 30 | 31 | if (classC == CCLASS_SILENT) { 32 | /* Insert or delete "silent" characters such as H or W */ 33 | return 1; 34 | } 35 | if (cPrev == c) { 36 | /* Repeated characters, or miss a repeat */ 37 | return 10; 38 | } 39 | if (classC == CCLASS_VOWEL && (cPrev == 'r' || cNext == 'r')) { 40 | return 20; /* Insert a vowel before or after 'r' */ 41 | } 42 | classCprev = characterClass(cPrev, cPrev); 43 | if (classC == classCprev) { 44 | if (classC == CCLASS_VOWEL) { 45 | /* Remove or add a new vowel to a vowel cluster */ 46 | return 15; 47 | } else { 48 | /* Remove or add a consonant not in the same class */ 49 | return 50; 50 | } 51 | } 52 | 53 | /* any other character insertion or deletion */ 54 | return 100; 55 | } 56 | 57 | /* 58 | ** Divide the insertion cost by this factor when appending to the 59 | ** end of the word. 60 | */ 61 | #define FINAL_INS_COST_DIV 4 62 | 63 | /* 64 | ** Return the cost of substituting cTo in place of cFrom assuming 65 | ** the previous character is cPrev. If cPrev==0 then cTo is the first 66 | ** character of the word. 67 | */ 68 | static int substituteCost(char cPrev, char cFrom, char cTo) { 69 | char classFrom, classTo; 70 | if (cFrom == cTo) { 71 | /* Exact match */ 72 | return 0; 73 | } 74 | if (cFrom == (cTo ^ 0x20) && ((cTo >= 'A' && cTo <= 'Z') || (cTo >= 'a' && cTo <= 'z'))) { 75 | /* differ only in case */ 76 | return 0; 77 | } 78 | classFrom = characterClass(cPrev, cFrom); 79 | classTo = characterClass(cPrev, cTo); 80 | if (classFrom == classTo) { 81 | /* Same character class */ 82 | return 40; 83 | } 84 | if (classFrom >= CCLASS_B && classFrom <= CCLASS_Y && classTo >= CCLASS_B && 85 | classTo <= CCLASS_Y) { 86 | /* Convert from one consonant to another, but in a different class */ 87 | return 75; 88 | } 89 | /* Any other subsitution */ 90 | return 100; 91 | } 92 | 93 | /* 94 | ** Given two strings zA and zB which are pure ASCII, return the cost 95 | ** of transforming zA into zB. If zA ends with '*' assume that it is 96 | ** a prefix of zB and give only minimal penalty for extra characters 97 | ** on the end of zB. 98 | ** 99 | ** Smaller numbers mean a closer match. 100 | ** 101 | ** Negative values indicate an error: 102 | ** -1 One of the inputs is NULL 103 | ** -2 Non-ASCII characters on input 104 | ** -3 Unable to allocate memory 105 | ** 106 | ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes 107 | ** of zB that matched the pattern in zA. If zA does not end with a '*', 108 | ** then this value is always the number of bytes in zB (i.e. strlen(zB)). 109 | ** If zA does end in a '*', then it is the number of bytes in the prefix 110 | ** of zB that was deemed to match zA. 111 | */ 112 | int edit_distance(const char* zA, const char* zB, int* pnMatch) { 113 | int nA, nB; /* Number of characters in zA[] and zB[] */ 114 | int xA, xB; /* Loop counters for zA[] and zB[] */ 115 | char cA = 0, cB; /* Current character of zA and zB */ 116 | char cAprev, cBprev; /* Previous character of zA and zB */ 117 | char cAnext, cBnext; /* Next character in zA and zB */ 118 | int d; /* North-west cost value */ 119 | int dc = 0; /* North-west character value */ 120 | int res; /* Final result */ 121 | int* m; /* The cost matrix */ 122 | char* cx; /* Corresponding character values */ 123 | int* toFree = 0; /* Malloced space */ 124 | int nMatch = 0; 125 | int mStack[60 + 15]; /* Stack space to use if not too much is needed */ 126 | 127 | /* Early out if either input is NULL */ 128 | if (zA == 0 || zB == 0) 129 | return -1; 130 | 131 | /* Skip any common prefix */ 132 | while (zA[0] && zA[0] == zB[0]) { 133 | dc = zA[0]; 134 | zA++; 135 | zB++; 136 | nMatch++; 137 | } 138 | if (pnMatch) 139 | *pnMatch = nMatch; 140 | if (zA[0] == 0 && zB[0] == 0) 141 | return 0; 142 | 143 | #if 0 144 | printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' '); 145 | #endif 146 | 147 | /* Verify input strings and measure their lengths */ 148 | for (nA = 0; zA[nA]; nA++) { 149 | if (zA[nA] & 0x80) 150 | return -2; 151 | } 152 | for (nB = 0; zB[nB]; nB++) { 153 | if (zB[nB] & 0x80) 154 | return -2; 155 | } 156 | 157 | /* Special processing if either string is empty */ 158 | if (nA == 0) { 159 | cBprev = (char)dc; 160 | for (xB = res = 0; (cB = zB[xB]) != 0; xB++) { 161 | res += insertOrDeleteCost(cBprev, cB, zB[xB + 1]) / FINAL_INS_COST_DIV; 162 | cBprev = cB; 163 | } 164 | return res; 165 | } 166 | if (nB == 0) { 167 | cAprev = (char)dc; 168 | for (xA = res = 0; (cA = zA[xA]) != 0; xA++) { 169 | res += insertOrDeleteCost(cAprev, cA, zA[xA + 1]); 170 | cAprev = cA; 171 | } 172 | return res; 173 | } 174 | 175 | /* A is a prefix of B */ 176 | if (zA[0] == '*' && zA[1] == 0) 177 | return 0; 178 | 179 | /* Allocate and initialize the Wagner matrix */ 180 | if (nB < (sizeof(mStack) * 4) / (sizeof(mStack[0]) * 5)) { 181 | m = mStack; 182 | } else { 183 | m = toFree = malloc((nB + 1) * 5 * sizeof(m[0]) / 4); 184 | if (m == 0) 185 | return -3; 186 | } 187 | cx = (char*)&m[nB + 1]; 188 | 189 | /* Compute the Wagner edit distance */ 190 | m[0] = 0; 191 | cx[0] = (char)dc; 192 | cBprev = (char)dc; 193 | for (xB = 1; xB <= nB; xB++) { 194 | cBnext = zB[xB]; 195 | cB = zB[xB - 1]; 196 | cx[xB] = cB; 197 | m[xB] = m[xB - 1] + insertOrDeleteCost(cBprev, cB, cBnext); 198 | cBprev = cB; 199 | } 200 | cAprev = (char)dc; 201 | for (xA = 1; xA <= nA; xA++) { 202 | int lastA = (xA == nA); 203 | cA = zA[xA - 1]; 204 | cAnext = zA[xA]; 205 | if (cA == '*' && lastA) 206 | break; 207 | d = m[0]; 208 | dc = cx[0]; 209 | m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext); 210 | cBprev = 0; 211 | for (xB = 1; xB <= nB; xB++) { 212 | int totalCost, insCost, delCost, subCost, ncx; 213 | cB = zB[xB - 1]; 214 | cBnext = zB[xB]; 215 | 216 | /* Cost to insert cB */ 217 | insCost = insertOrDeleteCost(cx[xB - 1], cB, cBnext); 218 | if (lastA) 219 | insCost /= FINAL_INS_COST_DIV; 220 | 221 | /* Cost to delete cA */ 222 | delCost = insertOrDeleteCost(cx[xB], cA, cBnext); 223 | 224 | /* Cost to substitute cA->cB */ 225 | subCost = substituteCost(cx[xB - 1], cA, cB); 226 | 227 | /* Best cost */ 228 | totalCost = insCost + m[xB - 1]; 229 | ncx = cB; 230 | if ((delCost + m[xB]) < totalCost) { 231 | totalCost = delCost + m[xB]; 232 | ncx = cA; 233 | } 234 | if ((subCost + d) < totalCost) { 235 | totalCost = subCost + d; 236 | } 237 | 238 | #if 0 239 | printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c" 240 | " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n", 241 | xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB, 242 | insCost, delCost, subCost, totalCost, ncx?ncx:' '); 243 | #endif 244 | 245 | /* Update the matrix */ 246 | d = m[xB]; 247 | dc = cx[xB]; 248 | m[xB] = totalCost; 249 | cx[xB] = (char)ncx; 250 | cBprev = cB; 251 | } 252 | cAprev = cA; 253 | } 254 | 255 | /* Free the wagner matrix and return the result */ 256 | if (cA == '*') { 257 | res = m[1]; 258 | for (xB = 1; xB <= nB; xB++) { 259 | if (m[xB] < res) { 260 | res = m[xB]; 261 | if (pnMatch) 262 | *pnMatch = xB + nMatch; 263 | } 264 | } 265 | } else { 266 | res = m[nB]; 267 | /* In the current implementation, pnMatch is always NULL if zA does 268 | ** not end in "*" */ 269 | assert(pnMatch == 0); 270 | } 271 | free(toFree); 272 | return res; 273 | } -------------------------------------------------------------------------------- /src/sqlite3-math.c: -------------------------------------------------------------------------------- 1 | // Originally from SQLite 3.35.4 source code (func.c), Public Domain 2 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License 3 | 4 | /* 5 | * SQLite math functions. 6 | */ 7 | #include 8 | #include 9 | 10 | #include "sqlite3ext.h" 11 | SQLITE_EXTENSION_INIT1 12 | 13 | #if defined(HAVE_STDINT_H) /* Use this case if we have ANSI headers */ 14 | #define SQLITE_INT_TO_PTR(X) ((void*)(intptr_t)(X)) 15 | #define SQLITE_PTR_TO_INT(X) ((int)(intptr_t)(X)) 16 | #elif defined(__PTRDIFF_TYPE__) /* This case should work for GCC */ 17 | #define SQLITE_INT_TO_PTR(X) ((void*)(__PTRDIFF_TYPE__)(X)) 18 | #define SQLITE_PTR_TO_INT(X) ((int)(__PTRDIFF_TYPE__)(X)) 19 | #elif !defined(__GNUC__) /* Works for compilers other than LLVM */ 20 | #define SQLITE_INT_TO_PTR(X) ((void*)&((char*)0)[X]) 21 | #define SQLITE_PTR_TO_INT(X) ((int)(((char*)X) - (char*)0)) 22 | #else /* Generates a warning - but it always works */ 23 | #define SQLITE_INT_TO_PTR(X) ((void*)(X)) 24 | #define SQLITE_PTR_TO_INT(X) ((int)(X)) 25 | #endif 26 | 27 | /* Mathematical Constants */ 28 | #ifndef M_PI 29 | #define M_PI 3.141592653589793238462643383279502884 30 | #endif 31 | #ifndef M_LN10 32 | #define M_LN10 2.302585092994045684017991454684364208 33 | #endif 34 | #ifndef M_LN2 35 | #define M_LN2 0.693147180559945309417232121458176568 36 | #endif 37 | 38 | /* 39 | ** Implementation SQL functions: 40 | ** 41 | ** ceil(X) 42 | ** ceiling(X) 43 | ** floor(X) 44 | ** 45 | ** The sqlite3_user_data() pointer is a pointer to the libm implementation 46 | ** of the underlying C function. 47 | */ 48 | static void ceilingFunc(sqlite3_context* context, int argc, sqlite3_value** argv) { 49 | assert(argc == 1); 50 | switch (sqlite3_value_numeric_type(argv[0])) { 51 | case SQLITE_INTEGER: { 52 | sqlite3_result_int64(context, sqlite3_value_int64(argv[0])); 53 | break; 54 | } 55 | case SQLITE_FLOAT: { 56 | double (*x)(double) = (double (*)(double))sqlite3_user_data(context); 57 | sqlite3_result_double(context, x(sqlite3_value_double(argv[0]))); 58 | break; 59 | } 60 | default: { 61 | break; 62 | } 63 | } 64 | } 65 | 66 | /* 67 | ** On some systems, ceil() and floor() are intrinsic function. You are 68 | ** unable to take a pointer to these functions. Hence, we here wrap them 69 | ** in our own actual functions. 70 | */ 71 | static double xCeil(double x) { 72 | return ceil(x); 73 | } 74 | static double xFloor(double x) { 75 | return floor(x); 76 | } 77 | 78 | /* 79 | ** Implementation of SQL functions: 80 | ** 81 | ** ln(X) - natural logarithm 82 | ** log(X) - log X base 10 83 | ** log10(X) - log X base 10 84 | */ 85 | static void log1Func(sqlite3_context* context, int argc, sqlite3_value** argv) { 86 | double x, b, ans; 87 | assert(argc == 1); 88 | switch (sqlite3_value_numeric_type(argv[0])) { 89 | case SQLITE_INTEGER: 90 | case SQLITE_FLOAT: 91 | x = sqlite3_value_double(argv[0]); 92 | if (x <= 0.0) 93 | return; 94 | break; 95 | default: 96 | return; 97 | } 98 | ans = log(x); 99 | switch (SQLITE_PTR_TO_INT(sqlite3_user_data(context))) { 100 | case 1: 101 | /* Convert from natural logarithm to log base 10 */ 102 | ans *= 1.0 / M_LN10; 103 | break; 104 | case 2: 105 | /* Convert from natural logarithm to log base 2 */ 106 | ans *= 1.0 / M_LN2; 107 | break; 108 | default: 109 | break; 110 | } 111 | sqlite3_result_double(context, ans); 112 | } 113 | 114 | /* 115 | ** Implementation of SQL functions: 116 | ** 117 | ** log(B,X) - log X base B 118 | */ 119 | static void log2Func(sqlite3_context* context, int argc, sqlite3_value** argv) { 120 | double x, b, ans; 121 | assert(argc == 2); 122 | switch (sqlite3_value_numeric_type(argv[0])) { 123 | case SQLITE_INTEGER: 124 | case SQLITE_FLOAT: 125 | x = sqlite3_value_double(argv[0]); 126 | if (x <= 0.0) 127 | return; 128 | break; 129 | default: 130 | return; 131 | } 132 | switch (sqlite3_value_numeric_type(argv[0])) { 133 | case SQLITE_INTEGER: 134 | case SQLITE_FLOAT: 135 | b = log(x); 136 | if (b <= 0.0) 137 | return; 138 | x = sqlite3_value_double(argv[1]); 139 | if (x <= 0.0) 140 | return; 141 | break; 142 | default: 143 | return; 144 | } 145 | ans = log(x) / b; 146 | sqlite3_result_double(context, ans); 147 | } 148 | 149 | /* 150 | ** Functions to converts degrees to radians and radians to degrees. 151 | */ 152 | static double degToRad(double x) { 153 | return x * (M_PI / 180.0); 154 | } 155 | static double radToDeg(double x) { 156 | return x * (180.0 / M_PI); 157 | } 158 | 159 | /* 160 | ** Implementation of 1-argument SQL math functions: 161 | ** 162 | ** exp(X) - Compute e to the X-th power 163 | */ 164 | static void math1Func(sqlite3_context* context, int argc, sqlite3_value** argv) { 165 | int type0; 166 | double v0, ans; 167 | double (*x)(double); 168 | assert(argc == 1); 169 | type0 = sqlite3_value_numeric_type(argv[0]); 170 | if (type0 != SQLITE_INTEGER && type0 != SQLITE_FLOAT) 171 | return; 172 | v0 = sqlite3_value_double(argv[0]); 173 | x = (double (*)(double))sqlite3_user_data(context); 174 | ans = x(v0); 175 | sqlite3_result_double(context, ans); 176 | } 177 | 178 | /* 179 | ** Implementation of 2-argument SQL math functions: 180 | ** 181 | ** power(X,Y) - Compute X to the Y-th power 182 | */ 183 | static void math2Func(sqlite3_context* context, int argc, sqlite3_value** argv) { 184 | int type0, type1; 185 | double v0, v1, ans; 186 | double (*x)(double, double); 187 | assert(argc == 2); 188 | type0 = sqlite3_value_numeric_type(argv[0]); 189 | if (type0 != SQLITE_INTEGER && type0 != SQLITE_FLOAT) 190 | return; 191 | type1 = sqlite3_value_numeric_type(argv[1]); 192 | if (type1 != SQLITE_INTEGER && type1 != SQLITE_FLOAT) 193 | return; 194 | v0 = sqlite3_value_double(argv[0]); 195 | v1 = sqlite3_value_double(argv[1]); 196 | x = (double (*)(double, double))sqlite3_user_data(context); 197 | ans = x(v0, v1); 198 | sqlite3_result_double(context, ans); 199 | } 200 | 201 | /* 202 | ** Implementation of pi() SQL math function 203 | */ 204 | static void piFunc(sqlite3_context* context, int argc, sqlite3_value** argv) { 205 | assert(argc == 0); 206 | sqlite3_result_double(context, M_PI); 207 | } 208 | 209 | /* 210 | * Registers the extension. 211 | */ 212 | #ifdef _WIN32 213 | __declspec(dllexport) 214 | #endif 215 | int sqlite3_math_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) { 216 | static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC; 217 | SQLITE_EXTENSION_INIT2(pApi); 218 | sqlite3_create_function(db, "ceil", 1, flags, xCeil, ceilingFunc, 0, 0); 219 | sqlite3_create_function(db, "ceiling", 1, flags, xCeil, ceilingFunc, 0, 0); 220 | sqlite3_create_function(db, "floor", 1, flags, xFloor, ceilingFunc, 0, 0); 221 | sqlite3_create_function(db, "trunc", 1, flags, trunc, ceilingFunc, 0, 0); 222 | sqlite3_create_function(db, "ln", 1, flags, 0, log1Func, 0, 0); 223 | sqlite3_create_function(db, "log", 1, flags, (void*)(1), log1Func, 0, 0); 224 | sqlite3_create_function(db, "log10", 1, flags, (void*)(1), log1Func, 0, 0); 225 | sqlite3_create_function(db, "log2", 1, flags, (void*)(2), log1Func, 0, 0); 226 | sqlite3_create_function(db, "log", 2, flags, 0, log2Func, 0, 0); 227 | sqlite3_create_function(db, "exp", 1, flags, exp, math1Func, 0, 0); 228 | sqlite3_create_function(db, "pow", 2, flags, pow, math2Func, 0, 0); 229 | sqlite3_create_function(db, "power", 2, flags, pow, math2Func, 0, 0); 230 | sqlite3_create_function(db, "mod", 2, flags, fmod, math2Func, 0, 0); 231 | sqlite3_create_function(db, "acos", 1, flags, acos, math1Func, 0, 0); 232 | sqlite3_create_function(db, "asin", 1, flags, asin, math1Func, 0, 0); 233 | sqlite3_create_function(db, "atan", 1, flags, atan, math1Func, 0, 0); 234 | sqlite3_create_function(db, "atan2", 2, flags, atan2, math2Func, 0, 0); 235 | sqlite3_create_function(db, "cos", 1, flags, cos, math1Func, 0, 0); 236 | sqlite3_create_function(db, "sin", 1, flags, sin, math1Func, 0, 0); 237 | sqlite3_create_function(db, "tan", 1, flags, tan, math1Func, 0, 0); 238 | sqlite3_create_function(db, "cosh", 1, flags, cosh, math1Func, 0, 0); 239 | sqlite3_create_function(db, "sinh", 1, flags, sinh, math1Func, 0, 0); 240 | sqlite3_create_function(db, "tanh", 1, flags, tanh, math1Func, 0, 0); 241 | sqlite3_create_function(db, "acosh", 1, flags, acosh, math1Func, 0, 0); 242 | sqlite3_create_function(db, "asinh", 1, flags, asinh, math1Func, 0, 0); 243 | sqlite3_create_function(db, "atanh", 1, flags, atanh, math1Func, 0, 0); 244 | sqlite3_create_function(db, "sqrt", 1, flags, sqrt, math1Func, 0, 0); 245 | sqlite3_create_function(db, "radians", 1, flags, degToRad, math1Func, 0, 0); 246 | sqlite3_create_function(db, "degrees", 1, flags, radToDeg, math1Func, 0, 0); 247 | sqlite3_create_function(db, "pi", 0, flags, 0, piFunc, 0, 0); 248 | return SQLITE_OK; 249 | } -------------------------------------------------------------------------------- /src/sqlite3-re.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | // https://github.com/nalgeon/sqlean 3 | 4 | /* 5 | * SQLite extension for working with regular expressions. 6 | * 7 | * regexp_like(source, pattern) 8 | * - checks if source string matches pattern 9 | * regexp_substr(source, pattern) 10 | * - returns source substring matching pattern 11 | * regexp_replace(source, pattern, replacement) 12 | * - replaces matching substring with replacement string 13 | * 14 | * The following regular expression syntax is supported: 15 | * X* zero or more occurrences of X 16 | * X+ one or more occurrences of X 17 | * X? zero or one occurrences of X 18 | * (X) match X 19 | * X|Y X or Y 20 | * ^X X occurring at the beginning of the string 21 | * X$ X occurring at the end of the string 22 | * . Match any single character 23 | * \c Character c where c is one of \{}()[]|*+?. 24 | * \c C-language escapes for c in afnrtv. ex: \t or \n 25 | * [abc] Any single character from the set abc 26 | * [^abc] Any single character not in the set abc 27 | * [a-z] Any single character in the range a-z 28 | * [^a-z] Any single character not in the range a-z 29 | ** 30 | */ 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "re.h" 37 | #include "sqlite3ext.h" 38 | SQLITE_EXTENSION_INIT1 39 | 40 | /* 41 | * Replaces `rep` substring of the `orig` string with `with` substring. 42 | */ 43 | static char* str_replace(char* orig, char* rep, char* with) { 44 | char* result; // the return string 45 | char* ins; // the next insert point 46 | char* tmp; // varies 47 | int len_rep; // length of rep (the string to remove) 48 | int len_with; // length of with (the string to replace rep with) 49 | int len_front; // distance between rep and end of last rep 50 | int count; // number of replacements 51 | 52 | // sanity checks and initialization 53 | if (!orig || !rep) 54 | return NULL; 55 | len_rep = strlen(rep); 56 | if (len_rep == 0) 57 | return NULL; // empty rep causes infinite loop during count 58 | if (!with) 59 | with = ""; 60 | len_with = strlen(with); 61 | 62 | // count the number of replacements needed 63 | ins = orig; 64 | for (count = 0; (tmp = strstr(ins, rep)); ++count) { 65 | ins = tmp + len_rep; 66 | } 67 | 68 | tmp = result = sqlite3_malloc(strlen(orig) + (len_with - len_rep) * count + 1); 69 | 70 | if (!result) 71 | return NULL; 72 | 73 | // first time through the loop, all the variable are set correctly 74 | // from here on, 75 | // tmp points to the end of the result string 76 | // ins points to the next occurrence of rep in orig 77 | // orig points to the remainder of orig after "end of rep" 78 | while (count--) { 79 | ins = strstr(orig, rep); 80 | len_front = ins - orig; 81 | tmp = strncpy(tmp, orig, len_front) + len_front; 82 | tmp = strcpy(tmp, with) + len_with; 83 | orig += len_front + len_rep; // move to next "end of rep" 84 | } 85 | strcpy(tmp, orig); 86 | return result; 87 | } 88 | 89 | /* 90 | * Checks if source string matches pattern. 91 | * regexp_statement(pattern, source) 92 | * E.g.: 93 | * select true where 'abc' regexp 'a.c'; 94 | */ 95 | static void regexp_statement(sqlite3_context* context, int argc, sqlite3_value** argv) { 96 | regexp* r; 97 | const char* source; 98 | const char* pattern; 99 | int is_match = 0; 100 | 101 | assert(argc == 2); 102 | 103 | source = (const char*)sqlite3_value_text(argv[1]); 104 | #ifdef DEBUG 105 | fprintf(stderr, "source = %s\n", source); 106 | #endif 107 | if (!source) { 108 | sqlite3_result_int(context, is_match); 109 | return; 110 | } 111 | 112 | pattern = (const char*)sqlite3_value_text(argv[0]); 113 | #ifdef DEBUG 114 | fprintf(stderr, "pattern = %s\n", pattern); 115 | #endif 116 | if (!pattern) { 117 | sqlite3_result_error(context, "missing regexp pattern", -1); 118 | return; 119 | } 120 | 121 | r = re_compile(pattern); 122 | if (r == NULL) { 123 | sqlite3_result_error(context, "invalid regexp pattern", -1); 124 | return; 125 | } 126 | 127 | is_match = re_execute(r, source); 128 | sqlite3_result_int(context, is_match); 129 | free((char*)r); 130 | } 131 | 132 | /* 133 | * Checks if source string matches pattern. 134 | * regexp_like(source, pattern) 135 | * E.g.: 136 | * select regexp_like('abc', 'a.c'); 137 | */ 138 | static void regexp_like(sqlite3_context* context, int argc, sqlite3_value** argv) { 139 | regexp* r; 140 | const char* source; 141 | const char* pattern; 142 | int is_match = 0; 143 | 144 | assert(argc == 2); 145 | 146 | source = (const char*)sqlite3_value_text(argv[0]); 147 | #ifdef DEBUG 148 | fprintf(stderr, "source = %s\n", source); 149 | #endif 150 | if (!source) { 151 | sqlite3_result_int(context, is_match); 152 | return; 153 | } 154 | 155 | pattern = (const char*)sqlite3_value_text(argv[1]); 156 | #ifdef DEBUG 157 | fprintf(stderr, "pattern = %s\n", pattern); 158 | #endif 159 | if (!pattern) { 160 | sqlite3_result_error(context, "missing regexp pattern", -1); 161 | return; 162 | } 163 | 164 | r = re_compile(pattern); 165 | if (r == NULL) { 166 | sqlite3_result_error(context, "invalid regexp pattern", -1); 167 | return; 168 | } 169 | 170 | is_match = re_execute(r, source); 171 | sqlite3_result_int(context, is_match); 172 | free((char*)r); 173 | } 174 | 175 | /* 176 | * Returns source substring matching pattern. 177 | * regexp_substr(source, pattern) 178 | * E.g.: select regexp_substr('abcdef', 'b.d') = 'bcd'; 179 | */ 180 | static void regexp_substr(sqlite3_context* context, int argc, sqlite3_value** argv) { 181 | regexp* r; 182 | const char* source; 183 | const char* pattern; 184 | int is_match = 0; 185 | 186 | assert(argc == 2); 187 | 188 | source = (const char*)sqlite3_value_text(argv[0]); 189 | if (!source) { 190 | return; 191 | } 192 | 193 | pattern = (const char*)sqlite3_value_text(argv[1]); 194 | if (!pattern) { 195 | sqlite3_result_error(context, "missing regexp pattern", -1); 196 | return; 197 | } 198 | 199 | r = re_compile(pattern); 200 | if (r == NULL) { 201 | sqlite3_result_error(context, "invalid regexp pattern", -1); 202 | return; 203 | } 204 | 205 | is_match = re_execute(r, source); 206 | if (!is_match) { 207 | return; 208 | } 209 | 210 | int len = r->endp[0] - r->startp[0]; 211 | char* matched_str = sqlite3_malloc(len + 1); 212 | (void)strncpy(matched_str, r->startp[0], len); 213 | matched_str[len] = '\0'; 214 | #ifdef DEBUG 215 | fprintf(stderr, "matched_str = '%s'\n", matched_str); 216 | #endif 217 | 218 | sqlite3_result_text(context, (char*)matched_str, -1, sqlite3_free); 219 | free((char*)r); 220 | } 221 | 222 | /* 223 | * Returns source substring matching pattern. 224 | * regexp_replace(source, pattern, replacement) 225 | * E.g.: select regexp_replace('abcdef', 'b.d', '...') = 'a...ef'; 226 | */ 227 | static void regexp_replace(sqlite3_context* context, int argc, sqlite3_value** argv) { 228 | regexp* r; 229 | char* source; 230 | char* pattern; 231 | char* replacement; 232 | char* result; 233 | 234 | int is_match = 0; 235 | 236 | assert(argc == 3); 237 | 238 | source = (char*)sqlite3_value_text(argv[0]); 239 | if (!source) { 240 | return; 241 | } 242 | 243 | pattern = (char*)sqlite3_value_text(argv[1]); 244 | if (!pattern) { 245 | sqlite3_result_error(context, "missing regexp pattern", -1); 246 | return; 247 | } 248 | 249 | r = re_compile(pattern); 250 | if (r == NULL) { 251 | sqlite3_result_error(context, "invalid regexp pattern", -1); 252 | return; 253 | } 254 | 255 | replacement = (char*)sqlite3_value_text(argv[2]); 256 | if (!replacement) { 257 | sqlite3_result_value(context, argv[0]); 258 | return; 259 | } 260 | 261 | is_match = re_execute(r, source); 262 | if (!is_match) { 263 | sqlite3_result_value(context, argv[0]); 264 | return; 265 | } 266 | 267 | int matched_len = r->endp[0] - r->startp[0]; 268 | char* matched_str = sqlite3_malloc(matched_len + 1); 269 | (void)strncpy(matched_str, r->startp[0], matched_len); 270 | matched_str[matched_len] = '\0'; 271 | 272 | char replacement_str[BUFSIZ]; 273 | int err = re_substitute(r, replacement, replacement_str); 274 | if (err) { 275 | sqlite3_result_error(context, "invalid replacement pattern", -1); 276 | return; 277 | } 278 | 279 | int head_len = r->startp[0] - source; 280 | char* head_str = sqlite3_malloc(head_len + 1); 281 | (void)strncpy(head_str, source, head_len); 282 | head_str[head_len] = '\0'; 283 | 284 | int tail_len = source + strlen(source) - r->endp[0]; 285 | char* tail_str = sqlite3_malloc(tail_len + 1); 286 | (void)strncpy(tail_str, r->endp[0], tail_len); 287 | tail_str[tail_len] = '\0'; 288 | 289 | int replacement_len = strlen(replacement_str); 290 | 291 | int result_len = head_len + replacement_len + tail_len; 292 | result = sqlite3_malloc(result_len + 1); 293 | strcat(result, head_str); 294 | strcat(result, replacement_str); 295 | strcat(result, tail_str); 296 | result[result_len] = '\0'; 297 | 298 | #ifdef DEBUG 299 | fprintf(stderr, "head string (%d) = '%s'\n", head_len, head_str); 300 | fprintf(stderr, "matched string (%d) = '%s'\n", matched_len, matched_str); 301 | fprintf(stderr, "repl string (%d) = '%s'\n", replacement_len, replacement_str); 302 | fprintf(stderr, "tail string (%d) = '%s'\n", tail_len, tail_str); 303 | fprintf(stderr, "result string (%d) = '%s'\n", result_len, result); 304 | fprintf(stderr, "replace('%s', '%s', '%s') = '%s'\n", source, matched_str, replacement_str, 305 | result); 306 | #endif 307 | 308 | sqlite3_result_text(context, (char*)result, -1, sqlite3_free); 309 | sqlite3_free(head_str); 310 | sqlite3_free(matched_str); 311 | sqlite3_free(tail_str); 312 | free((char*)r); 313 | } 314 | 315 | /* 316 | * Registers the extension. 317 | */ 318 | #ifdef _WIN32 319 | __declspec(dllexport) 320 | #endif 321 | int sqlite3_re_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) { 322 | SQLITE_EXTENSION_INIT2(pApi); 323 | sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8, 0, regexp_statement, 0, 0); 324 | sqlite3_create_function(db, "regexp_like", 2, SQLITE_UTF8, 0, regexp_like, 0, 0); 325 | sqlite3_create_function(db, "regexp_substr", 2, SQLITE_UTF8, 0, regexp_substr, 0, 0); 326 | sqlite3_create_function(db, "regexp_replace", 3, SQLITE_UTF8, 0, regexp_replace, 0, 0); 327 | return SQLITE_OK; 328 | } -------------------------------------------------------------------------------- /src/sqlite3-fuzzy.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | // https://github.com/nalgeon/sqlean 3 | 4 | /* 5 | * Fuzzy string matching and phonetics. 6 | */ 7 | #include 8 | #include 9 | #include 10 | 11 | #include "sqlite3ext.h" 12 | SQLITE_EXTENSION_INIT1 13 | 14 | #include "fuzzy/fuzzy.h" 15 | 16 | // is_ascii checks if the string consists of ASCII symbols only 17 | static bool is_ascii(const unsigned char* str) { 18 | for (int idx = 0; str[idx]; idx++) { 19 | if (str[idx] & 0x80) { 20 | return false; 21 | } 22 | } 23 | return true; 24 | } 25 | 26 | // Below are functions extracted from the 27 | // https://github.com/Rostepher/libstrcmp/ 28 | 29 | // sqlite3_dlevenshtein implements Damerau-Levenshtein distance 30 | static void sqlite3_dlevenshtein(sqlite3_context* context, int argc, sqlite3_value** argv) { 31 | assert(argc == 2); 32 | const unsigned char* str1 = sqlite3_value_text(argv[0]); 33 | const unsigned char* str2 = sqlite3_value_text(argv[1]); 34 | if (str1 == 0 || str2 == 0) { 35 | sqlite3_result_error(context, "arguments should not be NULL", -1); 36 | return; 37 | } 38 | if (!is_ascii(str1) || !is_ascii(str2)) { 39 | sqlite3_result_error(context, "arguments should be ASCII strings", -1); 40 | return; 41 | } 42 | unsigned distance = damerau_levenshtein((const char*)str1, (const char*)str2); 43 | sqlite3_result_int(context, distance); 44 | } 45 | 46 | // sqlite3_hamming implements Hamming distance 47 | static void sqlite3_hamming(sqlite3_context* context, int argc, sqlite3_value** argv) { 48 | assert(argc == 2); 49 | const unsigned char* str1 = sqlite3_value_text(argv[0]); 50 | const unsigned char* str2 = sqlite3_value_text(argv[1]); 51 | if (str1 == 0 || str2 == 0) { 52 | sqlite3_result_error(context, "arguments should not be NULL", -1); 53 | return; 54 | } 55 | if (!is_ascii(str1) || !is_ascii(str2)) { 56 | sqlite3_result_error(context, "arguments should be ASCII strings", -1); 57 | return; 58 | } 59 | int distance = hamming((const char*)str1, (const char*)str2); 60 | sqlite3_result_int(context, distance); 61 | } 62 | 63 | // sqlite3_jaro_winkler implements Jaro-Winkler distance 64 | static void sqlite3_jaro_winkler(sqlite3_context* context, int argc, sqlite3_value** argv) { 65 | assert(argc == 2); 66 | const unsigned char* str1 = sqlite3_value_text(argv[0]); 67 | const unsigned char* str2 = sqlite3_value_text(argv[1]); 68 | if (str1 == 0 || str2 == 0) { 69 | sqlite3_result_error(context, "arguments should not be NULL", -1); 70 | return; 71 | } 72 | if (!is_ascii(str1) || !is_ascii(str2)) { 73 | sqlite3_result_error(context, "arguments should be ASCII strings", -1); 74 | return; 75 | } 76 | double distance = jaro_winkler((const char*)str1, (const char*)str2); 77 | sqlite3_result_double(context, distance); 78 | } 79 | 80 | // sqlite3_levenshtein implements Levenshtein distance 81 | static void sqlite3_levenshtein(sqlite3_context* context, int argc, sqlite3_value** argv) { 82 | assert(argc == 2); 83 | const unsigned char* str1 = sqlite3_value_text(argv[0]); 84 | const unsigned char* str2 = sqlite3_value_text(argv[1]); 85 | if (str1 == 0 || str2 == 0) { 86 | sqlite3_result_error(context, "arguments should not be NULL", -1); 87 | return; 88 | } 89 | if (!is_ascii(str1) || !is_ascii(str2)) { 90 | sqlite3_result_error(context, "arguments should be ASCII strings", -1); 91 | return; 92 | } 93 | unsigned distance = levenshtein((const char*)str1, (const char*)str2); 94 | sqlite3_result_int(context, distance); 95 | } 96 | 97 | // sqlite3_osa_distance implements Optimal String Alignment distance 98 | static void sqlite3_osa_distance(sqlite3_context* context, int argc, sqlite3_value** argv) { 99 | assert(argc == 2); 100 | const unsigned char* str1 = sqlite3_value_text(argv[0]); 101 | const unsigned char* str2 = sqlite3_value_text(argv[1]); 102 | if (str1 == 0 || str2 == 0) { 103 | sqlite3_result_error(context, "arguments should not be NULL", -1); 104 | return; 105 | } 106 | if (!is_ascii(str1) || !is_ascii(str2)) { 107 | sqlite3_result_error(context, "arguments should be ASCII strings", -1); 108 | return; 109 | } 110 | unsigned distance = optimal_string_alignment((const char*)str1, (const char*)str2); 111 | sqlite3_result_int(context, distance); 112 | } 113 | 114 | // sqlite3_soundex implements Soundex coding 115 | static void sqlite3_soundex(sqlite3_context* context, int argc, sqlite3_value** argv) { 116 | assert(argc == 1); 117 | const unsigned char* source = sqlite3_value_text(argv[0]); 118 | if (source == 0) { 119 | return; 120 | } 121 | if (!is_ascii(source)) { 122 | sqlite3_result_error(context, "argument should be ASCII string", -1); 123 | return; 124 | } 125 | char* result = soundex((const char*)source); 126 | sqlite3_result_text(context, result, -1, free); 127 | } 128 | 129 | // sqlite3_rsoundex implements Refined Soundex coding 130 | static void sqlite3_rsoundex(sqlite3_context* context, int argc, sqlite3_value** argv) { 131 | assert(argc == 1); 132 | const unsigned char* source = sqlite3_value_text(argv[0]); 133 | if (source == 0) { 134 | return; 135 | } 136 | if (!is_ascii(source)) { 137 | sqlite3_result_error(context, "argument should be ASCII string", -1); 138 | return; 139 | } 140 | char* result = refined_soundex((const char*)source); 141 | sqlite3_result_text(context, result, -1, free); 142 | } 143 | 144 | // Below are functions extracted from the spellfix SQLite exension 145 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c 146 | 147 | /* 148 | ** phonetic_hash(X) 149 | ** 150 | ** Generate a "phonetic hash" from a string of ASCII characters in X. 151 | ** 152 | ** * Map characters by character class as defined above. 153 | ** * Omit double-letters 154 | ** * Omit vowels beside R and L 155 | ** * Omit T when followed by CH 156 | ** * Omit W when followed by R 157 | ** * Omit D when followed by J or G 158 | ** * Omit K in KN or G in GN at the beginning of a word 159 | ** 160 | ** Space to hold the result is obtained from sqlite3_malloc() 161 | ** 162 | ** Return NULL if memory allocation fails. 163 | */ 164 | static void sqlite3_phonetic_hash(sqlite3_context* context, int argc, sqlite3_value** argv) { 165 | const unsigned char* zIn; 166 | unsigned char* zOut; 167 | 168 | zIn = sqlite3_value_text(argv[0]); 169 | if (zIn == 0) 170 | return; 171 | zOut = phonetic_hash(zIn, sqlite3_value_bytes(argv[0])); 172 | if (zOut == 0) { 173 | sqlite3_result_error_nomem(context); 174 | } else { 175 | sqlite3_result_text(context, (char*)zOut, -1, free); 176 | } 177 | } 178 | 179 | /* 180 | ** edit_distance(A,B) 181 | ** 182 | ** Return the cost of transforming string A into string B. Both strings 183 | ** must be pure ASCII text. If A ends with '*' then it is assumed to be 184 | ** a prefix of B and extra characters on the end of B have minimal additional 185 | ** cost. 186 | */ 187 | static void sqlite3_edit_distance(sqlite3_context* context, int argc, sqlite3_value** argv) { 188 | int res = edit_distance((const char*)sqlite3_value_text(argv[0]), 189 | (const char*)sqlite3_value_text(argv[1]), 0); 190 | if (res < 0) { 191 | if (res == (-3)) { 192 | sqlite3_result_error_nomem(context); 193 | } else if (res == (-2)) { 194 | sqlite3_result_error(context, "non-ASCII input to editdist()", -1); 195 | } else { 196 | sqlite3_result_error(context, "NULL input to editdist()", -1); 197 | } 198 | } else { 199 | sqlite3_result_int(context, res); 200 | } 201 | } 202 | 203 | /* 204 | ** translit(X) 205 | ** 206 | ** Convert a string that contains non-ASCII Roman characters into 207 | ** pure ASCII. 208 | */ 209 | static void sqlite3_transliterate(sqlite3_context* context, int argc, sqlite3_value** argv) { 210 | const unsigned char* zIn = sqlite3_value_text(argv[0]); 211 | int nIn = sqlite3_value_bytes(argv[0]); 212 | unsigned char* zOut = transliterate(zIn, nIn); 213 | if (zOut == 0) { 214 | sqlite3_result_error_nomem(context); 215 | } else { 216 | sqlite3_result_text(context, (char*)zOut, -1, free); 217 | } 218 | } 219 | 220 | /* 221 | ** script_code(X) 222 | ** 223 | ** Try to determine the dominant script used by the word X and return 224 | ** its ISO 15924 numeric code. 225 | ** 226 | ** The current implementation only understands the following scripts: 227 | ** 228 | ** 215 (Latin) 229 | ** 220 (Cyrillic) 230 | ** 200 (Greek) 231 | ** 232 | ** This routine will return 998 if the input X contains characters from 233 | ** two or more of the above scripts or 999 if X contains no characters 234 | ** from any of the above scripts. 235 | */ 236 | static void sqlite3_script_code(sqlite3_context* context, int argc, sqlite3_value** argv) { 237 | const unsigned char* zIn = sqlite3_value_text(argv[0]); 238 | int nIn = sqlite3_value_bytes(argv[0]); 239 | int res = script_code(zIn, nIn); 240 | sqlite3_result_int(context, res); 241 | } 242 | 243 | // Below are custom functions 244 | 245 | // sqlite3_caverphone implements Caverphone coding 246 | static void sqlite3_caverphone(sqlite3_context* context, int argc, sqlite3_value** argv) { 247 | assert(argc == 1); 248 | const unsigned char* source = sqlite3_value_text(argv[0]); 249 | if (source == 0) { 250 | return; 251 | } 252 | if (!is_ascii(source)) { 253 | sqlite3_result_error(context, "argument should be ASCII string", -1); 254 | return; 255 | } 256 | char* result = caverphone((const char*)source); 257 | sqlite3_result_text(context, result, -1, free); 258 | } 259 | 260 | /* 261 | * Registers the extension. 262 | */ 263 | #ifdef _WIN32 264 | __declspec(dllexport) 265 | #endif 266 | int sqlite3_fuzzy_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) { 267 | SQLITE_EXTENSION_INIT2(pApi); 268 | static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC; 269 | // libstrcmp 270 | sqlite3_create_function(db, "dlevenshtein", 2, flags, 0, sqlite3_dlevenshtein, 0, 0); 271 | sqlite3_create_function(db, "hamming", 2, flags, 0, sqlite3_hamming, 0, 0); 272 | sqlite3_create_function(db, "jaro_winkler", 2, flags, 0, sqlite3_jaro_winkler, 0, 0); 273 | sqlite3_create_function(db, "levenshtein", 2, flags, 0, sqlite3_levenshtein, 0, 0); 274 | sqlite3_create_function(db, "osa_distance", 2, flags, 0, sqlite3_osa_distance, 0, 0); 275 | sqlite3_create_function(db, "soundex", 1, flags, 0, sqlite3_soundex, 0, 0); 276 | sqlite3_create_function(db, "rsoundex", 1, flags, 0, sqlite3_rsoundex, 0, 0); 277 | // spellfix 278 | sqlite3_create_function(db, "edit_distance", 2, flags, 0, sqlite3_edit_distance, 0, 0); 279 | sqlite3_create_function(db, "phonetic_hash", 1, flags, 0, sqlite3_phonetic_hash, 0, 0); 280 | sqlite3_create_function(db, "script_code", 1, flags, 0, sqlite3_script_code, 0, 0); 281 | sqlite3_create_function(db, "translit", 1, flags, 0, sqlite3_transliterate, 0, 0); 282 | // custom 283 | sqlite3_create_function(db, "caverphone", 1, flags, 0, sqlite3_caverphone, 0, 0); 284 | return SQLITE_OK; 285 | } -------------------------------------------------------------------------------- /src/fuzzy/caverphone.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Anton Zhiyanov, MIT License 2 | // https://github.com/nalgeon/sqlean 3 | 4 | // Caverphone phonetic coding algorithm. 5 | // https://en.wikipedia.org/wiki/Caverphone 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | // remove_non_letters deletes everything from the source string, 12 | // except lowercased letters a-z 13 | static char* remove_non_letters(const char* src) { 14 | size_t src_len = strlen(src); 15 | char* res = malloc((src_len + 1) * sizeof(char)); 16 | const char* src_it; 17 | char* res_it = res; 18 | for (size_t idx = 0; idx < src_len; idx++) { 19 | src_it = src + idx; 20 | if (*src_it < 97 || *src_it > 122) { 21 | continue; 22 | } 23 | *res_it = *src_it; 24 | res_it++; 25 | } 26 | *res_it = '\0'; 27 | return res; 28 | } 29 | 30 | // replace_start replaces the `old` substring with the `new` one 31 | // if it matches at the beginning of the `src` string 32 | static char* replace_start(const char* src, const char* old, const char* new) { 33 | size_t src_len = strlen(src); 34 | size_t old_len = strlen(old); 35 | size_t new_len = strlen(new); 36 | assert(new_len <= old_len); 37 | 38 | char* res = malloc((src_len + 1) * sizeof(char)); 39 | 40 | if (src_len < old_len) { 41 | // source string is shorter than the substring to replace, 42 | // so there is definitely no match 43 | strcpy(res, src); 44 | return res; 45 | } 46 | 47 | if (strncmp(src, old, old_len) == 0) { 48 | strncpy(res, new, new_len); 49 | strncpy(res + new_len, src + old_len, src_len - old_len); 50 | *(res + src_len - old_len + new_len) = '\0'; 51 | } else { 52 | strcpy(res, src); 53 | } 54 | return res; 55 | } 56 | 57 | // replace_end replaces the `old` substring with the `new` one 58 | // if it matches at the end of the `src` string 59 | static char* replace_end(const char* src, const char* old, const char* new) { 60 | size_t src_len = strlen(src); 61 | size_t old_len = strlen(old); 62 | size_t new_len = strlen(new); 63 | assert(new_len <= old_len); 64 | 65 | char* res = malloc((src_len + 1) * sizeof(char)); 66 | 67 | if (src_len < old_len) { 68 | // source string is shorter than the substring to replace, 69 | // so there is definitely no match 70 | strcpy(res, src); 71 | return res; 72 | } 73 | 74 | strncpy(res, src, src_len - old_len); 75 | if (strncmp(src + src_len - old_len, old, old_len) == 0) { 76 | strncpy(res + src_len - old_len, new, new_len); 77 | *(res + src_len - old_len + new_len) = '\0'; 78 | } else { 79 | strncpy(res + src_len - old_len, src + src_len - old_len, old_len); 80 | *(res + src_len) = '\0'; 81 | } 82 | return res; 83 | } 84 | 85 | // replace replaces all `old` substrings with `new` ones 86 | // in the the `src` string 87 | static char* replace(const char* src, const char* old, const char* new) { 88 | size_t src_len = strlen(src); 89 | size_t old_len = strlen(old); 90 | size_t new_len = strlen(new); 91 | assert(new_len <= old_len); 92 | 93 | char* res = malloc((src_len + 1) * sizeof(char)); 94 | 95 | if (src_len < old_len) { 96 | // source string is shorter than the substring to replace, 97 | // so there is definitely no match 98 | strcpy(res, src); 99 | return res; 100 | } 101 | 102 | const char* src_it; 103 | char* res_it = res; 104 | for (size_t idx = 0; idx < src_len;) { 105 | src_it = src + idx; 106 | if (strncmp(src_it, old, old_len) == 0) { 107 | strncpy(res_it, new, new_len); 108 | res_it += new_len; 109 | idx += old_len; 110 | } else { 111 | *res_it = *src_it; 112 | res_it++; 113 | idx++; 114 | } 115 | } 116 | *res_it = '\0'; 117 | return res; 118 | } 119 | 120 | // replace_seq replaces all sequences of the `old` character 121 | // with the `new` substring in the the `src` string 122 | static char* replace_seq(const char* src, const char old, const char* new) { 123 | size_t src_len = strlen(src); 124 | size_t new_len = strlen(new); 125 | char* res = malloc((src_len + 1) * sizeof(char)); 126 | const char* src_it; 127 | char* res_it = res; 128 | size_t match_len = 0; 129 | for (size_t idx = 0; idx < src_len;) { 130 | src_it = src + idx; 131 | if (*src_it == old) { 132 | match_len++; 133 | idx++; 134 | } else { 135 | if (match_len > 0) { 136 | strncpy(res_it, new, new_len); 137 | res_it += new_len; 138 | match_len = 0; 139 | } 140 | *res_it = *src_it; 141 | res_it++; 142 | idx++; 143 | } 144 | } 145 | if (match_len > 0) { 146 | strncpy(res_it, new, new_len); 147 | res_it += new_len; 148 | } 149 | *res_it = '\0'; 150 | return res; 151 | } 152 | 153 | // pad pads `src` string with trailing 1s 154 | // up to the length of 10 characters 155 | static char* pad(const char* src) { 156 | size_t src_len = strlen(src); 157 | size_t max_len = 10; 158 | 159 | char* res = malloc((max_len + 1) * sizeof(char)); 160 | strncpy(res, src, max_len); 161 | if (src_len < max_len) { 162 | for (size_t idx = src_len; idx < max_len; idx++) { 163 | *(res + idx) = '1'; 164 | } 165 | } 166 | *(res + max_len) = '\0'; 167 | return res; 168 | } 169 | 170 | // step frees the source string and returns the result one 171 | static char* step(char* res, char* src) { 172 | free(src); 173 | return res; 174 | } 175 | 176 | // caverphone implements the Caverphone phonetic hashing algorithm 177 | // as described in https://caversham.otago.ac.nz/files/working/ctp150804.pdf 178 | char* caverphone(const char* src) { 179 | assert(src != NULL); 180 | 181 | char* res = malloc((strlen(src) + 1) * sizeof(char)); 182 | 183 | if (src == 0 || *src == '\0') { 184 | res[0] = '\0'; 185 | return res; 186 | } 187 | 188 | strcpy(res, src); 189 | 190 | // Remove anything not in the standard alphabet 191 | res = step(remove_non_letters((const char*)res), res); 192 | 193 | // Remove final e 194 | res = step(replace_end((const char*)res, "e", ""), res); 195 | 196 | // If the name starts with *gh make it *2f 197 | res = step(replace_start((const char*)res, "cough", "cou2f"), res); 198 | res = step(replace_start((const char*)res, "rough", "rou2f"), res); 199 | res = step(replace_start((const char*)res, "tough", "tou2f"), res); 200 | res = step(replace_start((const char*)res, "enough", "enou2f"), res); 201 | res = step(replace_start((const char*)res, "trough", "trou2f"), res); 202 | 203 | // If the name starts with gn make it 2n 204 | res = step(replace_start((const char*)res, "gn", "2n"), res); 205 | // If the name ends with mb make it m2 206 | res = step(replace_end((const char*)res, "mb", "m2"), res); 207 | // replace cq with 2q 208 | res = step(replace((const char*)res, "cq", "2q"), res); 209 | 210 | // replace c[iey] with s[iey] 211 | res = step(replace((const char*)res, "ci", "si"), res); 212 | res = step(replace((const char*)res, "ce", "se"), res); 213 | res = step(replace((const char*)res, "cy", "sy"), res); 214 | 215 | // replace tch with 2ch 216 | res = step(replace((const char*)res, "tch", "2ch"), res); 217 | 218 | // replace [cqx] with k 219 | res = step(replace((const char*)res, "c", "k"), res); 220 | res = step(replace((const char*)res, "q", "k"), res); 221 | res = step(replace((const char*)res, "x", "k"), res); 222 | 223 | // replace v with f 224 | res = step(replace((const char*)res, "v", "f"), res); 225 | // replace dg with 2g 226 | res = step(replace((const char*)res, "dg", "2g"), res); 227 | 228 | // replace ti[oa] with si[oa] 229 | res = step(replace((const char*)res, "tio", "sio"), res); 230 | res = step(replace((const char*)res, "tia", "sia"), res); 231 | 232 | // replace d with t 233 | res = step(replace((const char*)res, "d", "t"), res); 234 | // replace ph with fh 235 | res = step(replace((const char*)res, "ph", "fh"), res); 236 | // replace b with p 237 | res = step(replace((const char*)res, "b", "p"), res); 238 | // replace sh with s2 239 | res = step(replace((const char*)res, "sh", "s2"), res); 240 | // replace z with s 241 | res = step(replace((const char*)res, "z", "s"), res); 242 | 243 | // replace an initial vowel [aeiou] with an A 244 | res = step(replace_start((const char*)res, "a", "A"), res); 245 | res = step(replace_start((const char*)res, "e", "A"), res); 246 | res = step(replace_start((const char*)res, "i", "A"), res); 247 | res = step(replace_start((const char*)res, "o", "A"), res); 248 | res = step(replace_start((const char*)res, "u", "A"), res); 249 | 250 | // replace all other vowels with a 3 251 | res = step(replace((const char*)res, "a", "3"), res); 252 | res = step(replace((const char*)res, "e", "3"), res); 253 | res = step(replace((const char*)res, "i", "3"), res); 254 | res = step(replace((const char*)res, "o", "3"), res); 255 | res = step(replace((const char*)res, "u", "3"), res); 256 | 257 | // replace j with y 258 | res = step(replace((const char*)res, "j", "y"), res); 259 | 260 | // replace an initial y3 with Y3 261 | res = step(replace_start((const char*)res, "y3", "Y3"), res); 262 | // replace an initial y with A 263 | res = step(replace_start((const char*)res, "y", "A"), res); 264 | // replace y with 3 265 | res = step(replace((const char*)res, "y", "3"), res); 266 | 267 | // replace 3gh3 with 3kh3 268 | res = step(replace((const char*)res, "3gh3", "3kh3"), res); 269 | // replace gh with 22 270 | res = step(replace((const char*)res, "gh", "22"), res); 271 | // replace g with k 272 | res = step(replace((const char*)res, "g", "k"), res); 273 | 274 | // replace sequence of the letter [stpkfmn] with an uppercased letter 275 | res = step(replace_seq((const char*)res, 's', "S"), res); 276 | res = step(replace_seq((const char*)res, 't', "T"), res); 277 | res = step(replace_seq((const char*)res, 'p', "P"), res); 278 | res = step(replace_seq((const char*)res, 'k', "K"), res); 279 | res = step(replace_seq((const char*)res, 'f', "F"), res); 280 | res = step(replace_seq((const char*)res, 'm', "M"), res); 281 | res = step(replace_seq((const char*)res, 'n', "N"), res); 282 | 283 | // replace w3 with W3 284 | res = step(replace((const char*)res, "w3", "W3"), res); 285 | // replace wh3 with Wh3 286 | res = step(replace((const char*)res, "wh3", "Wh3"), res); 287 | // replace the final w with 3 288 | res = step(replace_end((const char*)res, "w", "3"), res); 289 | // replace w with 2 290 | res = step(replace((const char*)res, "w", "2"), res); 291 | 292 | // replace an initial h with an A 293 | res = step(replace_start((const char*)res, "h", "A"), res); 294 | // replace all other occurrences of h with a 2 295 | res = step(replace((const char*)res, "h", "2"), res); 296 | 297 | // replace r3 with R3 298 | res = step(replace((const char*)res, "r3", "R3"), res); 299 | // replace the final r with 3 300 | res = step(replace_end((const char*)res, "r", "3"), res); 301 | // replace r with 2 302 | res = step(replace((const char*)res, "r", "2"), res); 303 | 304 | // replace l3 with L3 305 | res = step(replace((const char*)res, "l3", "L3"), res); 306 | // replace the final l with 3 307 | res = step(replace_end((const char*)res, "l", "3"), res); 308 | // replace l with 2 309 | res = step(replace((const char*)res, "l", "2"), res); 310 | 311 | // remove all 2s 312 | res = step(replace((const char*)res, "2", ""), res); 313 | // replace the final 3 with A 314 | res = step(replace_end((const char*)res, "3", "A"), res); 315 | // remove all 3s 316 | res = step(replace((const char*)res, "3", ""), res); 317 | 318 | // put ten 1s on the end 319 | // take the first ten characters as the code 320 | res = step(pad((const char*)res), res); 321 | 322 | return res; 323 | } 324 | -------------------------------------------------------------------------------- /src/sqlite3-stats.c: -------------------------------------------------------------------------------- 1 | // Standard deviation and variance by Liam Healy, Public Domain 2 | // extension-functions.c at https://sqlite.org/contrib/ 3 | 4 | // Percentile and generate series by D. Richard Hipp, Public Domain 5 | // https://sqlite.org/src/file/ext/misc/percentile.c 6 | // https://sqlite.org/src/file/ext/misc/series.c 7 | 8 | // Refactored by Anton Zhiyanov, MIT License 9 | // https://github.com/nalgeon/sqlean 10 | 11 | // Statistical functions for SQLite. 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "sqlite3ext.h" 23 | SQLITE_EXTENSION_INIT1 24 | 25 | #pragma region Standard deviation and variance 26 | 27 | /* 28 | ** An instance of the following structure holds the context of a 29 | ** stddev() or variance() aggregate computation. 30 | ** implementaion of http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Algorithm_II 31 | ** less prone to rounding errors 32 | */ 33 | typedef struct StddevCtx StddevCtx; 34 | struct StddevCtx { 35 | double rM; 36 | double rS; 37 | int64_t cnt; /* number of elements */ 38 | }; 39 | 40 | /* 41 | ** called for each value received during a calculation of stddev or variance 42 | */ 43 | static void varianceStep(sqlite3_context* context, int argc, sqlite3_value** argv) { 44 | StddevCtx* p; 45 | 46 | double delta; 47 | double x; 48 | 49 | assert(argc == 1); 50 | p = sqlite3_aggregate_context(context, sizeof(*p)); 51 | /* only consider non-null values */ 52 | if (SQLITE_NULL != sqlite3_value_numeric_type(argv[0])) { 53 | p->cnt++; 54 | x = sqlite3_value_double(argv[0]); 55 | delta = (x - p->rM); 56 | p->rM += delta / p->cnt; 57 | p->rS += delta * (x - p->rM); 58 | } 59 | } 60 | 61 | /* 62 | ** Returns the sample standard deviation value 63 | */ 64 | static void stddevFinalize(sqlite3_context* context) { 65 | StddevCtx* p; 66 | p = sqlite3_aggregate_context(context, 0); 67 | if (p && p->cnt > 1) { 68 | sqlite3_result_double(context, sqrt(p->rS / (p->cnt - 1))); 69 | } else { 70 | sqlite3_result_double(context, 0.0); 71 | } 72 | } 73 | 74 | /* 75 | ** Returns the population standard deviation value 76 | */ 77 | static void stddevpopFinalize(sqlite3_context* context) { 78 | StddevCtx* p; 79 | p = sqlite3_aggregate_context(context, 0); 80 | if (p && p->cnt > 1) { 81 | sqlite3_result_double(context, sqrt(p->rS / p->cnt)); 82 | } else { 83 | sqlite3_result_double(context, 0.0); 84 | } 85 | } 86 | 87 | /* 88 | ** Returns the sample variance value 89 | */ 90 | static void varianceFinalize(sqlite3_context* context) { 91 | StddevCtx* p; 92 | p = sqlite3_aggregate_context(context, 0); 93 | if (p && p->cnt > 1) { 94 | sqlite3_result_double(context, p->rS / (p->cnt - 1)); 95 | } else { 96 | sqlite3_result_double(context, 0.0); 97 | } 98 | } 99 | 100 | /* 101 | ** Returns the population variance value 102 | */ 103 | static void variancepopFinalize(sqlite3_context* context) { 104 | StddevCtx* p; 105 | p = sqlite3_aggregate_context(context, 0); 106 | if (p && p->cnt > 1) { 107 | sqlite3_result_double(context, p->rS / p->cnt); 108 | } else { 109 | sqlite3_result_double(context, 0.0); 110 | } 111 | } 112 | 113 | #pragma endregion 114 | 115 | #pragma region Percentile 116 | 117 | /* The following object is the session context for a single percentile() 118 | ** function. We have to remember all input Y values until the very end. 119 | ** Those values are accumulated in the Percentile.a[] array. 120 | */ 121 | typedef struct Percentile Percentile; 122 | struct Percentile { 123 | unsigned nAlloc; /* Number of slots allocated for a[] */ 124 | unsigned nUsed; /* Number of slots actually used in a[] */ 125 | double rPct; /* 1.0 more than the value for P */ 126 | double* a; /* Array of Y values */ 127 | }; 128 | 129 | /* 130 | ** Return TRUE if the input floating-point number is an infinity. 131 | */ 132 | static int isInfinity(double r) { 133 | sqlite3_uint64 u; 134 | assert(sizeof(u) == sizeof(r)); 135 | memcpy(&u, &r, sizeof(u)); 136 | return ((u >> 52) & 0x7ff) == 0x7ff; 137 | } 138 | 139 | /* 140 | ** Return TRUE if two doubles differ by 0.001 or less 141 | */ 142 | static int sameValue(double a, double b) { 143 | a -= b; 144 | return a >= -0.001 && a <= 0.001; 145 | } 146 | 147 | /* 148 | ** The "step" function for percentile(Y,P) is called once for each 149 | ** input row. 150 | */ 151 | static void percentStep(sqlite3_context* pCtx, double rPct, int argc, sqlite3_value** argv) { 152 | Percentile* p; 153 | int eType; 154 | double y; 155 | 156 | /* Allocate the session context. */ 157 | p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p)); 158 | if (p == 0) 159 | return; 160 | 161 | /* Remember the P value. Throw an error if the P value is different 162 | ** from any prior row, per Requirement (2). */ 163 | if (p->rPct == 0.0) { 164 | p->rPct = rPct + 1.0; 165 | } else if (!sameValue(p->rPct, rPct + 1.0)) { 166 | sqlite3_result_error(pCtx, 167 | "2nd argument to percentile() is not the " 168 | "same for all input rows", 169 | -1); 170 | return; 171 | } 172 | 173 | /* Ignore rows for which Y is NULL */ 174 | eType = sqlite3_value_type(argv[0]); 175 | if (eType == SQLITE_NULL) 176 | return; 177 | 178 | /* If not NULL, then Y must be numeric. Otherwise throw an error. 179 | ** Requirement 4 */ 180 | if (eType != SQLITE_INTEGER && eType != SQLITE_FLOAT) { 181 | sqlite3_result_error(pCtx, 182 | "1st argument to percentile() is not " 183 | "numeric", 184 | -1); 185 | return; 186 | } 187 | 188 | /* Throw an error if the Y value is infinity or NaN */ 189 | y = sqlite3_value_double(argv[0]); 190 | if (isInfinity(y)) { 191 | sqlite3_result_error(pCtx, "Inf input to percentile()", -1); 192 | return; 193 | } 194 | 195 | /* Allocate and store the Y */ 196 | if (p->nUsed >= p->nAlloc) { 197 | unsigned n = p->nAlloc * 2 + 250; 198 | double* a = sqlite3_realloc64(p->a, sizeof(double) * n); 199 | if (a == 0) { 200 | sqlite3_free(p->a); 201 | memset(p, 0, sizeof(*p)); 202 | sqlite3_result_error_nomem(pCtx); 203 | return; 204 | } 205 | p->nAlloc = n; 206 | p->a = a; 207 | } 208 | p->a[p->nUsed++] = y; 209 | } 210 | 211 | static void percentStepCustom(sqlite3_context* pCtx, int argc, sqlite3_value** argv) { 212 | assert(argc == 2); 213 | /* Requirement 3: P must be a number between 0 and 100 */ 214 | int eType = sqlite3_value_numeric_type(argv[1]); 215 | double rPct = sqlite3_value_double(argv[1]); 216 | if ((eType != SQLITE_INTEGER && eType != SQLITE_FLOAT) || rPct < 0.0 || rPct > 100.0) { 217 | sqlite3_result_error(pCtx, 218 | "2nd argument to percentile() should be " 219 | "a number between 0.0 and 100.0", 220 | -1); 221 | return; 222 | } 223 | percentStep(pCtx, rPct, argc, argv); 224 | } 225 | 226 | static void percentStep25(sqlite3_context* pCtx, int argc, sqlite3_value** argv) { 227 | assert(argc == 1); 228 | percentStep(pCtx, 25, argc, argv); 229 | } 230 | 231 | static void percentStep50(sqlite3_context* pCtx, int argc, sqlite3_value** argv) { 232 | assert(argc == 1); 233 | percentStep(pCtx, 50, argc, argv); 234 | } 235 | 236 | static void percentStep75(sqlite3_context* pCtx, int argc, sqlite3_value** argv) { 237 | assert(argc == 1); 238 | percentStep(pCtx, 75, argc, argv); 239 | } 240 | 241 | static void percentStep90(sqlite3_context* pCtx, int argc, sqlite3_value** argv) { 242 | assert(argc == 1); 243 | percentStep(pCtx, 90, argc, argv); 244 | } 245 | 246 | static void percentStep95(sqlite3_context* pCtx, int argc, sqlite3_value** argv) { 247 | assert(argc == 1); 248 | percentStep(pCtx, 95, argc, argv); 249 | } 250 | 251 | static void percentStep99(sqlite3_context* pCtx, int argc, sqlite3_value** argv) { 252 | assert(argc == 1); 253 | percentStep(pCtx, 99, argc, argv); 254 | } 255 | 256 | /* 257 | ** Compare to doubles for sorting using qsort() 258 | */ 259 | static int SQLITE_CDECL doubleCmp(const void* pA, const void* pB) { 260 | double a = *(double*)pA; 261 | double b = *(double*)pB; 262 | if (a == b) 263 | return 0; 264 | if (a < b) 265 | return -1; 266 | return +1; 267 | } 268 | 269 | /* 270 | ** Called to compute the final output of percentile() and to clean 271 | ** up all allocated memory. 272 | */ 273 | static void percentFinal(sqlite3_context* pCtx) { 274 | Percentile* p; 275 | unsigned i1, i2; 276 | double v1, v2; 277 | double ix, vx; 278 | p = (Percentile*)sqlite3_aggregate_context(pCtx, 0); 279 | if (p == 0) 280 | return; 281 | if (p->a == 0) 282 | return; 283 | if (p->nUsed) { 284 | qsort(p->a, p->nUsed, sizeof(double), doubleCmp); 285 | ix = (p->rPct - 1.0) * (p->nUsed - 1) * 0.01; 286 | i1 = (unsigned)ix; 287 | i2 = ix == (double)i1 || i1 == p->nUsed - 1 ? i1 : i1 + 1; 288 | v1 = p->a[i1]; 289 | v2 = p->a[i2]; 290 | vx = v1 + (v2 - v1) * (ix - i1); 291 | sqlite3_result_double(pCtx, vx); 292 | } 293 | sqlite3_free(p->a); 294 | memset(p, 0, sizeof(*p)); 295 | } 296 | 297 | #pragma endregion 298 | 299 | #pragma region Generate series 300 | 301 | #ifndef SQLITE_OMIT_VIRTUALTABLE 302 | 303 | /* series_cursor is a subclass of sqlite3_vtab_cursor which will 304 | ** serve as the underlying representation of a cursor that scans 305 | ** over rows of the result 306 | */ 307 | typedef struct series_cursor series_cursor; 308 | struct series_cursor { 309 | sqlite3_vtab_cursor base; /* Base class - must be first */ 310 | int isDesc; /* True to count down rather than up */ 311 | sqlite3_int64 iRowid; /* The rowid */ 312 | sqlite3_int64 iValue; /* Current value ("value") */ 313 | sqlite3_int64 mnValue; /* Mimimum value ("start") */ 314 | sqlite3_int64 mxValue; /* Maximum value ("stop") */ 315 | sqlite3_int64 iStep; /* Increment ("step") */ 316 | }; 317 | 318 | /* 319 | ** The seriesConnect() method is invoked to create a new 320 | ** series_vtab that describes the generate_series virtual table. 321 | ** 322 | ** Think of this routine as the constructor for series_vtab objects. 323 | ** 324 | ** All this routine needs to do is: 325 | ** 326 | ** (1) Allocate the series_vtab object and initialize all fields. 327 | ** 328 | ** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the 329 | ** result set of queries against generate_series will look like. 330 | */ 331 | static int seriesConnect(sqlite3* db, 332 | void* pUnused, 333 | int argcUnused, 334 | const char* const* argvUnused, 335 | sqlite3_vtab** ppVtab, 336 | char** pzErrUnused) { 337 | sqlite3_vtab* pNew; 338 | int rc; 339 | 340 | /* Column numbers */ 341 | #define SERIES_COLUMN_VALUE 0 342 | #define SERIES_COLUMN_START 1 343 | #define SERIES_COLUMN_STOP 2 344 | #define SERIES_COLUMN_STEP 3 345 | 346 | (void)pUnused; 347 | (void)argcUnused; 348 | (void)argvUnused; 349 | (void)pzErrUnused; 350 | rc = sqlite3_declare_vtab(db, "CREATE TABLE x(value,start hidden,stop hidden,step hidden)"); 351 | if (rc == SQLITE_OK) { 352 | pNew = *ppVtab = sqlite3_malloc(sizeof(*pNew)); 353 | if (pNew == 0) 354 | return SQLITE_NOMEM; 355 | memset(pNew, 0, sizeof(*pNew)); 356 | sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS); 357 | } 358 | return rc; 359 | } 360 | 361 | /* 362 | ** This method is the destructor for series_cursor objects. 363 | */ 364 | static int seriesDisconnect(sqlite3_vtab* pVtab) { 365 | sqlite3_free(pVtab); 366 | return SQLITE_OK; 367 | } 368 | 369 | /* 370 | ** Constructor for a new series_cursor object. 371 | */ 372 | static int seriesOpen(sqlite3_vtab* pUnused, sqlite3_vtab_cursor** ppCursor) { 373 | series_cursor* pCur; 374 | (void)pUnused; 375 | pCur = sqlite3_malloc(sizeof(*pCur)); 376 | if (pCur == 0) 377 | return SQLITE_NOMEM; 378 | memset(pCur, 0, sizeof(*pCur)); 379 | *ppCursor = &pCur->base; 380 | return SQLITE_OK; 381 | } 382 | 383 | /* 384 | ** Destructor for a series_cursor. 385 | */ 386 | static int seriesClose(sqlite3_vtab_cursor* cur) { 387 | sqlite3_free(cur); 388 | return SQLITE_OK; 389 | } 390 | 391 | /* 392 | ** Advance a series_cursor to its next row of output. 393 | */ 394 | static int seriesNext(sqlite3_vtab_cursor* cur) { 395 | series_cursor* pCur = (series_cursor*)cur; 396 | if (pCur->isDesc) { 397 | pCur->iValue -= pCur->iStep; 398 | } else { 399 | pCur->iValue += pCur->iStep; 400 | } 401 | pCur->iRowid++; 402 | return SQLITE_OK; 403 | } 404 | 405 | /* 406 | ** Return values of columns for the row at which the series_cursor 407 | ** is currently pointing. 408 | */ 409 | static int seriesColumn(sqlite3_vtab_cursor* cur, /* The cursor */ 410 | sqlite3_context* ctx, /* First argument to sqlite3_result_...() */ 411 | int i /* Which column to return */ 412 | ) { 413 | series_cursor* pCur = (series_cursor*)cur; 414 | sqlite3_int64 x = 0; 415 | switch (i) { 416 | case SERIES_COLUMN_START: 417 | x = pCur->mnValue; 418 | break; 419 | case SERIES_COLUMN_STOP: 420 | x = pCur->mxValue; 421 | break; 422 | case SERIES_COLUMN_STEP: 423 | x = pCur->iStep; 424 | break; 425 | default: 426 | x = pCur->iValue; 427 | break; 428 | } 429 | sqlite3_result_int64(ctx, x); 430 | return SQLITE_OK; 431 | } 432 | 433 | /* 434 | ** Return the rowid for the current row. In this implementation, the 435 | ** first row returned is assigned rowid value 1, and each subsequent 436 | ** row a value 1 more than that of the previous. 437 | */ 438 | static int seriesRowid(sqlite3_vtab_cursor* cur, sqlite_int64* pRowid) { 439 | series_cursor* pCur = (series_cursor*)cur; 440 | *pRowid = pCur->iRowid; 441 | return SQLITE_OK; 442 | } 443 | 444 | /* 445 | ** Return TRUE if the cursor has been moved off of the last 446 | ** row of output. 447 | */ 448 | static int seriesEof(sqlite3_vtab_cursor* cur) { 449 | series_cursor* pCur = (series_cursor*)cur; 450 | if (pCur->isDesc) { 451 | return pCur->iValue < pCur->mnValue; 452 | } else { 453 | return pCur->iValue > pCur->mxValue; 454 | } 455 | } 456 | 457 | /* True to cause run-time checking of the start=, stop=, and/or step= 458 | ** parameters. The only reason to do this is for testing the 459 | ** constraint checking logic for virtual tables in the SQLite core. 460 | */ 461 | #ifndef SQLITE_SERIES_CONSTRAINT_VERIFY 462 | #define SQLITE_SERIES_CONSTRAINT_VERIFY 0 463 | #endif 464 | 465 | /* 466 | ** This method is called to "rewind" the series_cursor object back 467 | ** to the first row of output. This method is always called at least 468 | ** once prior to any call to seriesColumn() or seriesRowid() or 469 | ** seriesEof(). 470 | ** 471 | ** The query plan selected by seriesBestIndex is passed in the idxNum 472 | ** parameter. (idxStr is not used in this implementation.) idxNum 473 | ** is a bitmask showing which constraints are available: 474 | ** 475 | ** 1: start=VALUE 476 | ** 2: stop=VALUE 477 | ** 4: step=VALUE 478 | ** 479 | ** Also, if bit 8 is set, that means that the series should be output 480 | ** in descending order rather than in ascending order. If bit 16 is 481 | ** set, then output must appear in ascending order. 482 | ** 483 | ** This routine should initialize the cursor and position it so that it 484 | ** is pointing at the first row, or pointing off the end of the table 485 | ** (so that seriesEof() will return true) if the table is empty. 486 | */ 487 | static int seriesFilter(sqlite3_vtab_cursor* pVtabCursor, 488 | int idxNum, 489 | const char* idxStrUnused, 490 | int argc, 491 | sqlite3_value** argv) { 492 | series_cursor* pCur = (series_cursor*)pVtabCursor; 493 | int i = 0; 494 | (void)idxStrUnused; 495 | if (idxNum & 1) { 496 | pCur->mnValue = sqlite3_value_int64(argv[i++]); 497 | } else { 498 | pCur->mnValue = 0; 499 | } 500 | if (idxNum & 2) { 501 | pCur->mxValue = sqlite3_value_int64(argv[i++]); 502 | } else { 503 | pCur->mxValue = 0xffffffff; 504 | } 505 | if (idxNum & 4) { 506 | pCur->iStep = sqlite3_value_int64(argv[i++]); 507 | if (pCur->iStep == 0) { 508 | pCur->iStep = 1; 509 | } else if (pCur->iStep < 0) { 510 | pCur->iStep = -pCur->iStep; 511 | if ((idxNum & 16) == 0) 512 | idxNum |= 8; 513 | } 514 | } else { 515 | pCur->iStep = 1; 516 | } 517 | for (i = 0; i < argc; i++) { 518 | if (sqlite3_value_type(argv[i]) == SQLITE_NULL) { 519 | /* If any of the constraints have a NULL value, then return no rows. 520 | ** See ticket https://www.sqlite.org/src/info/fac496b61722daf2 */ 521 | pCur->mnValue = 1; 522 | pCur->mxValue = 0; 523 | break; 524 | } 525 | } 526 | if (idxNum & 8) { 527 | pCur->isDesc = 1; 528 | pCur->iValue = pCur->mxValue; 529 | if (pCur->iStep > 0) { 530 | pCur->iValue -= (pCur->mxValue - pCur->mnValue) % pCur->iStep; 531 | } 532 | } else { 533 | pCur->isDesc = 0; 534 | pCur->iValue = pCur->mnValue; 535 | } 536 | pCur->iRowid = 1; 537 | return SQLITE_OK; 538 | } 539 | 540 | /* 541 | ** SQLite will invoke this method one or more times while planning a query 542 | ** that uses the generate_series virtual table. This routine needs to create 543 | ** a query plan for each invocation and compute an estimated cost for that 544 | ** plan. 545 | ** 546 | ** In this implementation idxNum is used to represent the 547 | ** query plan. idxStr is unused. 548 | ** 549 | ** The query plan is represented by bits in idxNum: 550 | ** 551 | ** (1) start = $value -- constraint exists 552 | ** (2) stop = $value -- constraint exists 553 | ** (4) step = $value -- constraint exists 554 | ** (8) output in descending order 555 | */ 556 | static int seriesBestIndex(sqlite3_vtab* pVTab, sqlite3_index_info* pIdxInfo) { 557 | int i, j; /* Loop over constraints */ 558 | int idxNum = 0; /* The query plan bitmask */ 559 | int bStartSeen = 0; /* EQ constraint seen on the START column */ 560 | int unusableMask = 0; /* Mask of unusable constraints */ 561 | int nArg = 0; /* Number of arguments that seriesFilter() expects */ 562 | int aIdx[3]; /* Constraints on start, stop, and step */ 563 | const struct sqlite3_index_constraint* pConstraint; 564 | 565 | /* This implementation assumes that the start, stop, and step columns 566 | ** are the last three columns in the virtual table. */ 567 | assert(SERIES_COLUMN_STOP == SERIES_COLUMN_START + 1); 568 | assert(SERIES_COLUMN_STEP == SERIES_COLUMN_START + 2); 569 | 570 | aIdx[0] = aIdx[1] = aIdx[2] = -1; 571 | pConstraint = pIdxInfo->aConstraint; 572 | for (i = 0; i < pIdxInfo->nConstraint; i++, pConstraint++) { 573 | int iCol; /* 0 for start, 1 for stop, 2 for step */ 574 | int iMask; /* bitmask for those column */ 575 | if (pConstraint->iColumn < SERIES_COLUMN_START) 576 | continue; 577 | iCol = pConstraint->iColumn - SERIES_COLUMN_START; 578 | assert(iCol >= 0 && iCol <= 2); 579 | iMask = 1 << iCol; 580 | if (iCol == 0) 581 | bStartSeen = 1; 582 | if (pConstraint->usable == 0) { 583 | unusableMask |= iMask; 584 | continue; 585 | } else if (pConstraint->op == SQLITE_INDEX_CONSTRAINT_EQ) { 586 | idxNum |= iMask; 587 | aIdx[iCol] = i; 588 | } 589 | } 590 | for (i = 0; i < 3; i++) { 591 | if ((j = aIdx[i]) >= 0) { 592 | pIdxInfo->aConstraintUsage[j].argvIndex = ++nArg; 593 | pIdxInfo->aConstraintUsage[j].omit = !SQLITE_SERIES_CONSTRAINT_VERIFY; 594 | } 595 | } 596 | /* The current generate_column() implementation requires at least one 597 | ** argument (the START value). Legacy versions assumed START=0 if the 598 | ** first argument was omitted. Compile with -DZERO_ARGUMENT_GENERATE_SERIES 599 | ** to obtain the legacy behavior */ 600 | #ifndef ZERO_ARGUMENT_GENERATE_SERIES 601 | if (!bStartSeen) { 602 | sqlite3_free(pVTab->zErrMsg); 603 | pVTab->zErrMsg = 604 | sqlite3_mprintf("first argument to \"generate_series()\" missing or unusable"); 605 | return SQLITE_ERROR; 606 | } 607 | #endif 608 | if ((unusableMask & ~idxNum) != 0) { 609 | /* The start, stop, and step columns are inputs. Therefore if there 610 | ** are unusable constraints on any of start, stop, or step then 611 | ** this plan is unusable */ 612 | return SQLITE_CONSTRAINT; 613 | } 614 | if ((idxNum & 3) == 3) { 615 | /* Both start= and stop= boundaries are available. This is the 616 | ** the preferred case */ 617 | pIdxInfo->estimatedCost = (double)(2 - ((idxNum & 4) != 0)); 618 | pIdxInfo->estimatedRows = 1000; 619 | if (pIdxInfo->nOrderBy == 1) { 620 | if (pIdxInfo->aOrderBy[0].desc) { 621 | idxNum |= 8; 622 | } else { 623 | idxNum |= 16; 624 | } 625 | pIdxInfo->orderByConsumed = 1; 626 | } 627 | } else { 628 | /* If either boundary is missing, we have to generate a huge span 629 | ** of numbers. Make this case very expensive so that the query 630 | ** planner will work hard to avoid it. */ 631 | pIdxInfo->estimatedRows = 2147483647; 632 | } 633 | pIdxInfo->idxNum = idxNum; 634 | return SQLITE_OK; 635 | } 636 | 637 | /* 638 | ** This following structure defines all the methods for the 639 | ** generate_series virtual table. 640 | */ 641 | static sqlite3_module seriesModule = { 642 | 0, /* iVersion */ 643 | 0, /* xCreate */ 644 | seriesConnect, /* xConnect */ 645 | seriesBestIndex, /* xBestIndex */ 646 | seriesDisconnect, /* xDisconnect */ 647 | 0, /* xDestroy */ 648 | seriesOpen, /* xOpen - open a cursor */ 649 | seriesClose, /* xClose - close a cursor */ 650 | seriesFilter, /* xFilter - configure scan constraints */ 651 | seriesNext, /* xNext - advance a cursor */ 652 | seriesEof, /* xEof - check for end of scan */ 653 | seriesColumn, /* xColumn - read data */ 654 | seriesRowid, /* xRowid - read data */ 655 | 0, /* xUpdate */ 656 | 0, /* xBegin */ 657 | 0, /* xSync */ 658 | 0, /* xCommit */ 659 | 0, /* xRollback */ 660 | 0, /* xFindMethod */ 661 | 0, /* xRename */ 662 | 0, /* xSavepoint */ 663 | 0, /* xRelease */ 664 | 0, /* xRollbackTo */ 665 | 0 /* xShadowName */ 666 | }; 667 | 668 | #endif /* SQLITE_OMIT_VIRTUALTABLE */ 669 | 670 | #pragma endregion 671 | 672 | /* 673 | * Registers the extension. 674 | */ 675 | #ifdef _WIN32 676 | __declspec(dllexport) 677 | #endif 678 | int sqlite3_stats_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) { 679 | SQLITE_EXTENSION_INIT2(pApi); 680 | static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS; 681 | sqlite3_create_function(db, "stddev", 1, flags, 0, 0, varianceStep, stddevFinalize); 682 | sqlite3_create_function(db, "stddev_samp", 1, flags, 0, 0, varianceStep, stddevFinalize); 683 | sqlite3_create_function(db, "stddev_pop", 1, flags, 0, 0, varianceStep, stddevpopFinalize); 684 | sqlite3_create_function(db, "variance", 1, flags, 0, 0, varianceStep, varianceFinalize); 685 | sqlite3_create_function(db, "var_samp", 1, flags, 0, 0, varianceStep, varianceFinalize); 686 | sqlite3_create_function(db, "var_pop", 1, flags, 0, 0, varianceStep, variancepopFinalize); 687 | sqlite3_create_function(db, "median", 1, flags, 0, 0, percentStep50, percentFinal); 688 | sqlite3_create_function(db, "percentile", 2, flags, 0, 0, percentStepCustom, percentFinal); 689 | sqlite3_create_function(db, "percentile_25", 1, flags, 0, 0, percentStep25, percentFinal); 690 | sqlite3_create_function(db, "percentile_75", 1, flags, 0, 0, percentStep75, percentFinal); 691 | sqlite3_create_function(db, "percentile_90", 1, flags, 0, 0, percentStep90, percentFinal); 692 | sqlite3_create_function(db, "percentile_95", 1, flags, 0, 0, percentStep95, percentFinal); 693 | sqlite3_create_function(db, "percentile_99", 1, flags, 0, 0, percentStep99, percentFinal); 694 | #ifndef SQLITE_OMIT_VIRTUALTABLE 695 | if (sqlite3_libversion_number() >= 3008012) { 696 | sqlite3_create_module(db, "generate_series", &seriesModule, 0); 697 | } 698 | #endif 699 | return SQLITE_OK; 700 | } --------------------------------------------------------------------------------