├── .gitignore
├── .clang-format
├── docs
    ├── json1.md
    ├── crypto.md
    ├── math.md
    ├── text.md
    ├── ipaddr.md
    ├── uuid.md
    ├── third-party.md
    ├── unicode.md
    ├── stats.md
    ├── re.md
    ├── fuzzy.md
    ├── fileio.md
    └── vsv.md
├── src
    ├── crypto
    │   ├── sha1.h
    │   ├── md5.h
    │   ├── sha2.h
    │   ├── md5.c
    │   └── sha1.c
    ├── fuzzy
    │   ├── fuzzy.h
    │   ├── hamming.c
    │   ├── common.h
    │   ├── levenshtein.c
    │   ├── optimal_string_alignment.c
    │   ├── phonetic.c
    │   ├── soundex.c
    │   ├── refined_soundex.c
    │   ├── damerau_levenshtein.c
    │   ├── jaro_winkler.c
    │   ├── common.c
    │   ├── editdist.c
    │   └── caverphone.c
    ├── re.h
    ├── sqlite3-crypto.c
    ├── sqlite3-text.c
    ├── sqlite3-ipaddr.c
    ├── sqlite3-uuid.c
    ├── sqlite3-math.c
    ├── sqlite3-re.c
    ├── sqlite3-fuzzy.c
    └── sqlite3-stats.c
├── test
    ├── vsv.sql
    ├── unicode.sql
    ├── re.sql
    ├── text.sql
    ├── uuid.sql
    ├── crypto.sql
    ├── json1.sql
    ├── stats.sql
    ├── ipaddr.sql
    ├── fileio.sql
    ├── math.sql
    └── fuzzy.sql
├── LICENSE
├── .github
    └── workflows
    │   ├── build.yml
    │   └── publish.yml
├── Makefile
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | dist


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: Chromium
2 | IndentWidth: 4
3 | ColumnLimit: 100


--------------------------------------------------------------------------------
/docs/json1.md:
--------------------------------------------------------------------------------
 1 | # json1: JSON handling in SQLite
 2 | 
 3 | This is the 'native' SQLite [JSON1 extension](https://sqlite.org/json1.html).
 4 | It's often compiled into SQLite build, but in case your build doesn't include it - I've compiled it separately.
 5 | 
 6 | ## Usage
 7 | 
 8 | ```
 9 | sqlite> .load ./json1
10 | sqlite> select json_object("answer", 42);
11 | ```
12 | 
13 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
14 | 


--------------------------------------------------------------------------------
/src/crypto/sha1.h:
--------------------------------------------------------------------------------
 1 | // Adapted from https://sqlite.org/src/file/ext/misc/sha1.c
 2 | // Public domain
 3 | 
 4 | #ifndef __SHA1_H__
 5 | #define __SHA1_H__
 6 | 
 7 | #include <stddef.h>
 8 | 
 9 | #define SHA1_BLOCK_SIZE 20
10 | 
11 | typedef struct SHA1Context {
12 |     unsigned int state[5];
13 |     unsigned int count[2];
14 |     unsigned char buffer[64];
15 | } SHA1Context;
16 | 
17 | void* sha1_init();
18 | void sha1_update(SHA1Context* ctx, const unsigned char data[], size_t len);
19 | int sha1_final(SHA1Context* ctx, unsigned char hash[]);
20 | 
21 | #endif


--------------------------------------------------------------------------------
/docs/crypto.md:
--------------------------------------------------------------------------------
 1 | # crypto: Secure hashes in SQLite
 2 | 
 3 | Secure hash and message digest functions.
 4 | 
 5 | Provides following functions:
 6 | 
 7 | -   `md5(data)`,
 8 | -   `sha1(data)`,
 9 | -   `sha256(data)`,
10 | -   `sha384(data)`,
11 | -   `sha512(data)`.
12 | 
13 | Each function expects `data` to be `TEXT` or `BLOB`. Returns a `BLOB` hash. Use the `hex()` function to convert it to hex string.
14 | 
15 | ## Usage
16 | 
17 | ```
18 | sqlite> select hex(md5('abc'));
19 | 900150983CD24FB0D6963F7D28E17F72
20 | ```
21 | 
22 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
23 | 


--------------------------------------------------------------------------------
/test/vsv.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/vsv
 5 | 
 6 | .shell echo '11,Diane,London' > people.csv
 7 | .shell echo '22,Grace,Berlin' >> people.csv
 8 | .shell echo '33,Alice,Paris' >> people.csv
 9 | 
10 | create virtual table people using vsv(
11 |     filename=people.csv,
12 |     schema="create table people(id integer, name text, city text)",
13 |     columns=3,
14 |     affinity=integer
15 | );
16 | select '01', count(*) = 3 from people;
17 | select '02', (id, name, city) = (22, 'Grace', 'Berlin') from people where id = 22;
18 | select '03', typeof(id) = 'integer' from people where id = 22;
19 | 
20 | .shell rm -f people.csv


--------------------------------------------------------------------------------
/test/unicode.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | -- .load dist/unicode
 5 | select load_extension('dist/unicode');
 6 | 
 7 | select '01', lower('hElLo') = 'hello';
 8 | select '02', nlower('hElLo') = 'hello';
 9 | select '03', upper('hElLo') = 'HELLO';
10 | select '04', nupper('hElLo') = 'HELLO';
11 | select '05', casefold('hElLo') = 'hello';
12 | 
13 | select '11', lower('пРиВеТ') = 'привет';
14 | select '12', nlower('пРиВеТ') = 'привет';
15 | select '13', upper('пРиВеТ') = 'ПРИВЕТ';
16 | select '14', nupper('пРиВеТ') = 'ПРИВЕТ';
17 | select '15', casefold('пРиВеТ') = 'привет';
18 | 
19 | select '21', unaccent('hôtel') = 'hotel';
20 | 
21 | select '31', like('hEl_o', 'hello') = 1;
22 | select '32', like('пРиВ_Т', 'привет') = 1;
23 | select '33', ('привет' like 'пРиВ_Т') = 1;
24 | 


--------------------------------------------------------------------------------
/docs/math.md:
--------------------------------------------------------------------------------
 1 | # math: Mathematics in SQLite
 2 | 
 3 | Common math functions for SQLite versions before 3.35.
 4 | Extracted from SQLite 3.35.4 source code ([func.c](https://sqlite.org/src/file/src/func.c)).
 5 | 
 6 | Provides following functions:
 7 | 
 8 | -   rounding: `ceil`, `floor`, `trunc`;
 9 | -   logarithmic: `ln`, `log10`, `log2`, `log`;
10 | -   arithmetic: `pow`, `sqrt`, `mod`;
11 | -   trigonometric: `cos`, `sin`, `tan`;
12 | -   hyperbolic: `cosh`, `sinh`, `tanh`;
13 | -   inverse trigonometric: `acos`, `asin`, `atan`, `atan2`;
14 | -   inverse hyperbolic: `acosh`, `asinh`, `atanh`;
15 | -   angular measures: `radians`, `degrees`;
16 | -   `pi`.
17 | 
18 | [Full description](https://sqlite.org/lang_mathfunc.html)
19 | 
20 | ## Usage
21 | 
22 | ```
23 | sqlite> .load ./math
24 | sqlite> select sqrt(9);
25 | ```
26 | 
27 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
28 | 


--------------------------------------------------------------------------------
/src/fuzzy/fuzzy.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2014 Ross Bayer, MIT License
 2 | // https://github.com/Rostepher/libstrcmp
 3 | 
 4 | #ifndef FUZZY_H
 5 | #define FUZZY_H
 6 | 
 7 | // distance metrics
 8 | int damerau_levenshtein(const char*, const char*);
 9 | int hamming(const char*, const char*);
10 | double jaro(const char*, const char*);
11 | double jaro_winkler(const char*, const char*);
12 | unsigned levenshtein(const char*, const char*);
13 | unsigned optimal_string_alignment(const char*, const char*);
14 | int edit_distance(const char*, const char*, int*);
15 | 
16 | // phonetics
17 | char* caverphone(const char*);
18 | char* soundex(const char*);
19 | char* refined_soundex(const char*);
20 | unsigned char* phonetic_hash(const unsigned char*, int);
21 | 
22 | // translit
23 | unsigned char* transliterate(const unsigned char*, int);
24 | int translen_to_charlen(const char*, int, int);
25 | int script_code(const unsigned char*, int);
26 | 
27 | #endif


--------------------------------------------------------------------------------
/test/re.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/re
 5 | select '01', regexp_replace('the year is 2021', '[0-9]+', '2050') = 'the year is 2050';
 6 | select '02', regexp_replace('the year is 2021', '2k21', '2050') = 'the year is 2021';
 7 | select '03', regexp_replace('10 10 10', '10$', '') = '10 10 ';
 8 | select '04', regexp_replace('10 10 10', '^10', '') = ' 10 10';
 9 | select '05', regexp_replace('hello', 'h', '') = 'ello';
10 | select '06', regexp_replace('hello', 'h', '.') = '.ello';
11 | select '07', regexp_replace('hello', 'h', '..') = '..ello';
12 | select '08', regexp_replace('hello', 'e', '') = 'hllo';
13 | select '09', regexp_replace('hello', 'e', '.') = 'h.llo';
14 | select '10', regexp_replace('hello', 'e', '..') = 'h..llo';
15 | select '11', regexp_replace('hello', 'o', '') = 'hell';
16 | select '12', regexp_replace('hello', 'o', '.') = 'hell.';
17 | select '13', regexp_replace('hello', 'o', '..') = 'hell..';
18 | 


--------------------------------------------------------------------------------
/docs/text.md:
--------------------------------------------------------------------------------
 1 | # text: Text manipulation in SQLite
 2 | 
 3 | Additional string functions.
 4 | Adapted from [extension-functions.c](https://sqlite.org/contrib/) by Liam Healy.
 5 | 
 6 | Provides following functions:
 7 | 
 8 | ### `reverse(source)`
 9 | 
10 | Returns reversed string.
11 | 
12 | ```
13 | sqlite> select reverse('hello world');
14 | dlrow olleh
15 | ```
16 | 
17 | ### `split_part(source, sep, part)`
18 | 
19 | Splits `source` string on `sep` and returns the given `part` (counting from one).
20 | 
21 | ```
22 | sqlite> select split_part('one;two;three', ';', 2);
23 | two
24 | sqlite> select split_part('one;;three', ';', 2);
25 | 
26 | ```
27 | 
28 | If `sep` is composed of multiple characters, each character is treated as separator. E.g.:
29 | 
30 | ```
31 | sqlite> select split_part('one/two\three', '/\', 2);
32 | two
33 | ```
34 | 
35 | Only ASCII (1-byte) symbols are supported as separators.
36 | 
37 | ## Usage
38 | 
39 | ```
40 | sqlite> .load ./text
41 | sqlite> select reverse('hello');
42 | ```
43 | 
44 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
45 | 


--------------------------------------------------------------------------------
/test/text.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/text
 5 | 
 6 | -- Reverse string
 7 | select '01', reverse(null) is NULL;
 8 | select '02', reverse('hello') = 'olleh';
 9 | select '03', reverse('привет') = 'тевирп';
10 | select '04', reverse("𐌀𐌁𐌂") = '𐌂𐌁𐌀';
11 | select '05', reverse('hello 42@ world') = 'dlrow @24 olleh';
12 | 
13 | -- Extract part from string
14 | select '11', split_part(NULL, ',', 2) is NULL;
15 | select '12', split_part('', ',', 2) = '';
16 | select '13', split_part('one,two,three', ',', 2) = 'two';
17 | select '14', split_part('one|two|three', '|', 2) = 'two';
18 | select '15', split_part('один,два,три', ',', 2) = 'два';
19 | select '16', split_part('one,two,three', ',', 10) = '';
20 | select '17', split_part('one,two,three', ';', 2) = '';
21 | select '18', split_part('one,two,three', '', 1) = 'one,two,three';
22 | select '19', split_part('one,two,three', NULL, 2) is NULL;
23 | select '20', split_part('one,,,four', ',', 2) = '';
24 | select '21', split_part('one,,,four', ',', 4) = 'four';
25 | select '22', split_part('one/two|three', '/|', 2) = 'two';


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021+ Anton Zhiyanov <https://github.com/nalgeon/sqlean>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/crypto/md5.h:
--------------------------------------------------------------------------------
 1 | /*********************************************************************
 2 |  * Filename:   md5.h
 3 |  * Author:     Brad Conte (brad AT bradconte.com)
 4 |  * Source:     https://github.com/B-Con/crypto-algorithms
 5 |  * License:    Public Domain
 6 |  * Details:    Defines the API for the corresponding MD5 implementation.
 7 |  *********************************************************************/
 8 | 
 9 | #ifndef MD5_H
10 | #define MD5_H
11 | 
12 | /*************************** HEADER FILES ***************************/
13 | #include <stddef.h>
14 | 
15 | /****************************** MACROS ******************************/
16 | #define MD5_BLOCK_SIZE 16  // MD5 outputs a 16 byte digest
17 | 
18 | /**************************** DATA TYPES ****************************/
19 | typedef unsigned char BYTE;  // 8-bit byte
20 | typedef unsigned int WORD;   // 32-bit word, change to "long" for 16-bit machines
21 | 
22 | typedef struct {
23 |     BYTE data[64];
24 |     WORD datalen;
25 |     unsigned long long bitlen;
26 |     WORD state[4];
27 | } MD5_CTX;
28 | 
29 | /*********************** FUNCTION DECLARATIONS **********************/
30 | void* md5_init();
31 | void md5_update(MD5_CTX* ctx, const BYTE data[], size_t len);
32 | int md5_final(MD5_CTX* ctx, BYTE hash[]);
33 | 
34 | #endif  // MD5_H
35 | 


--------------------------------------------------------------------------------
/test/uuid.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/uuid
 5 | 
 6 | -- uuid4
 7 | select '01', uuid4() like '________-____-4___-____-____________';
 8 | select '02', gen_random_uuid() like '________-____-4___-____-____________';
 9 | 
10 | -- uuid_str
11 | select '03', uuid_str('d5a80b20-0d8f-11e5-b8cb-080027b6ec40') = 'd5a80b20-0d8f-11e5-b8cb-080027b6ec40';
12 | select '04', uuid_str('d5a80b200d8f11e5b8cb080027b6ec40') = 'd5a80b20-0d8f-11e5-b8cb-080027b6ec40';
13 | select '05', uuid_str('{d5a80b20-0d8f-11e5-b8cb-080027b6ec40}') = 'd5a80b20-0d8f-11e5-b8cb-080027b6ec40';
14 | select '06', uuid_str('D5A80B20-0D8F-11E5-B8CB-080027B6EC40') = 'd5a80b20-0d8f-11e5-b8cb-080027b6ec40';
15 | select '07', uuid_str(randomblob(16)) like '________-____-____-____-____________';
16 | select '08', uuid_str(uuid4()) like '________-____-4___-____-____________';
17 | select '09', uuid_str('hello') is null;
18 | select '10', uuid_str('') is null;
19 | select '11', uuid_str(null) is null;
20 | 
21 | -- uuid_blob
22 | select '12', typeof(uuid_blob('d5a80b20-0d8f-11e5-b8cb-080027b6ec40')) = 'blob';
23 | select '13', typeof(uuid_blob(uuid4())) = 'blob';
24 | select '14', uuid_blob('hello') is null;
25 | select '15', uuid_blob('') is null;
26 | select '16', uuid_blob(null) is null;


--------------------------------------------------------------------------------
/docs/ipaddr.md:
--------------------------------------------------------------------------------
 1 | # ipaddr: IP address manipulation in SQLite
 2 | 
 3 | Functions to manipulate IPs and subnets. Created by [Vincent Bernat](https://github.com/vincentbernat).
 4 | 
 5 | ⚠️ This extension is not available on Windows.
 6 | 
 7 | ### `ipfamily(ip)`
 8 | 
 9 | Returns the family of a specified IP address.
10 | 
11 | ```
12 | sqlite> select ipfamily('192.168.1.1');
13 | 4
14 | ```
15 | 
16 | ### `iphost(ip)`
17 | 
18 | Returns the host part of an IP address.
19 | 
20 | ```
21 | sqlite> select iphost('2001:db8::123/64');
22 | 2001:db8::123
23 | ```
24 | 
25 | ### `ipmasklen(ip)`
26 | 
27 | Returns the prefix length of an IP address.
28 | 
29 | ```
30 | sqlite> select ipmasklen('192.168.16.12/24');
31 | 24
32 | ```
33 | 
34 | ### `ipnetwork(ip)`
35 | 
36 | Returns the network part of an IP address.
37 | 
38 | ```
39 | sqlite> select ipnetwork('192.168.16.12/24');
40 | 192.168.16.0/24
41 | ```
42 | 
43 | ### `ipcontains(subnet, ip)`
44 | 
45 | Returns `1` if `subnet` contains `ip` (which can be another subnet).
46 | `0` otherwise.
47 | 
48 | ```
49 | sqlite> select ipcontains('192.168.16.0/24', '192.168.16.3');
50 | 1
51 | ```
52 | 
53 | ## Usage
54 | 
55 | ```
56 | sqlite> .load ./ipaddr
57 | sqlite> select ipfamily('2001:db8::1');
58 | 6
59 | ```
60 | 
61 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
62 | 


--------------------------------------------------------------------------------
/docs/uuid.md:
--------------------------------------------------------------------------------
 1 | # uuid: Universally Unique IDentifiers (UUIDs) in SQLite
 2 | 
 3 | Limited support for [RFC 4122](https://www.ietf.org/rfc/rfc4122.txt) compliant UUIDs:
 4 | 
 5 | - Generate a version 4 (random) UUID.
 6 | - Convert a 16-byte blob into a well-formed UUID string and vice versa.
 7 | 
 8 | Adapted from [uuid.c](https://sqlite.org/src/file/ext/misc/uuid.c) by D. Richard Hipp.
 9 | 
10 | Provides following functions:
11 | 
12 | ### `uuid4()`
13 | 
14 | Generates a version 4 (random) UUID as a string. Aliased as `gen_random_uuid()` for PostgreSQL compatibility.
15 | 
16 | ```
17 | sqlite> select uuid4();
18 | c476b6e9-35f1-4afd-9552-704cd7edbe27
19 | 
20 | sqlite> select gen_random_uuid();
21 | 8d144638-3baf-4901-a554-b541142c152b
22 | ```
23 | 
24 | ### `uuid_str(X)`
25 | 
26 | Converts a UUID `X` into a well-formed UUID string. `X` can be either a string or a blob.
27 | 
28 | ```
29 | sqlite> select uuid_str(randomblob(16));
30 | fb6f9675-7509-d8b7-0891-00d4e6230894
31 | ```
32 | 
33 | ### `uuid_blob(X)`
34 | 
35 | Converts a UUID `X` into a 16-byte blob. X can be either a string or a blob.
36 | 
37 | ```
38 | sqlite> select hex(uuid_blob(uuid4()));
39 | 7192B1B452964E809500CF0364476CD3
40 | ```
41 | 
42 | ## Usage
43 | 
44 | ```
45 | sqlite> .load ./uuid
46 | sqlite> select uuid4();
47 | ```
48 | 
49 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
50 | 


--------------------------------------------------------------------------------
/docs/third-party.md:
--------------------------------------------------------------------------------
 1 | # Third-party authors
 2 | 
 3 | SQLean relies heavily on third-party SQLite extensions and open source libraries. Some of them are public domain, others use free permissive licenses. SQLean does not use code distributed under copyleft or non-free licenses.
 4 | 
 5 | | Library | Author | License |
 6 | | ------- | ------ | ------- |
 7 | | [crypto-algorithms](https://github.com/B-Con/crypto-algorithms) | Brad Conte | Public Domain |
 8 | | extension-functions.c | Liam Healy | Public Domain |
 9 | | [fileio.c](https://www.sqlite.org/src/file/ext/misc/fileio.c) | D. Richard Hipp | Public Domain |
10 | | [libstrcmp](https://github.com/Rostepher/libstrcmp) | Ross Bayer | MIT License |
11 | | [percentile.c](https://sqlite.org/src/file/ext/misc/percentile.c) | D. Richard Hipp | Public Domain |
12 | | [regexp.old](https://github.com/garyhouston/regexp.old) | Henry Spencer | Spencer License 94 |
13 | | [series.c](https://sqlite.org/src/file/ext/misc/series.c) | D. Richard Hipp | Public Domain |
14 | | [sha1.c](https://sqlite.org/src/file/ext/misc/sha1.c) | D. Richard Hipp | Public Domain |
15 | | sha2.c | [Aaron D. Gifford](https://aarongifford.com/) | 3-Clause BSD License |
16 | | [sqlite3_unicode](https://github.com/Zensey/sqlite3_unicode) | Unknow Author | Public Domain |
17 | | [uuid.c](https://sqlite.org/src/file/ext/misc/uuid.c) | D. Richard Hipp | Public Domain |
18 | | [vsv.c](http://www.dessus.com/files/vsv.c) | Keith Medcalf | Public Domain |
19 | 


--------------------------------------------------------------------------------
/src/fuzzy/hamming.c:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2014 Ross Bayer, MIT License
 2 | // https://github.com/Rostepher/libstrcmp
 3 | 
 4 | #include <assert.h>
 5 | #include <stddef.h>
 6 | #include <string.h>
 7 | 
 8 | #include "common.h"
 9 | 
10 | /// Computes and returns the hamming distance between two strings. Both strings
11 | /// must have the same length and not be NULL. More information about the
12 | /// algorithm can be found here:
13 | ///     http://en.wikipedia.org/wiki/Hamming_distance
14 | ///
15 | /// @param str1 first non NULL string
16 | /// @param str2 second non NULL string
17 | ///
18 | /// @returns hamming distance or -1 if str1 and st2 did not have the same
19 | ///     length or if one or both str1 and str2 were NULL
20 | int hamming(const char* str1, const char* str2) {
21 |     // strings cannot be NULL
22 |     assert(str1 != NULL);
23 |     assert(str2 != NULL);
24 | 
25 |     size_t str1_len = strlen(str1);
26 |     size_t str2_len = strlen(str2);
27 | 
28 |     // handle cases where strings have different lengths
29 |     if (str1_len != str2_len) {
30 |         return -1;
31 |     }
32 | 
33 |     // return 0 if strings are both empty, but not NULL
34 |     if (str1_len == 0 && str2_len == 0) {
35 |         return 0;
36 |     }
37 | 
38 |     int dist = 0;
39 |     while (str1_len > 0 && str2_len > 0) {
40 |         dist += (NOT_EQ(*str1, *str2));
41 |         str1++, str2++;
42 |         str1_len--, str2_len--;
43 |     }
44 | 
45 |     return dist;
46 | }


--------------------------------------------------------------------------------
/test/crypto.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/crypto
 5 | 
 6 | select '01', md5(null) is NULL;
 7 | select '02', hex(md5('')) = upper('d41d8cd98f00b204e9800998ecf8427e');
 8 | select '03', hex(md5('abc')) = upper('900150983cd24fb0d6963f7d28e17f72');
 9 | 
10 | select '11', sha1(null) is NULL;
11 | select '12', hex(sha1('')) = upper('da39a3ee5e6b4b0d3255bfef95601890afd80709');
12 | select '13', hex(sha1('abc')) = upper('a9993e364706816aba3e25717850c26c9cd0d89d');
13 | 
14 | select '21', sha256(null) is NULL;
15 | select '22', hex(sha256('')) = upper('e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855');
16 | select '23', hex(sha256('abc')) = upper('ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad');
17 | 
18 | select '31', sha384(null) is NULL;
19 | select '32', hex(sha384('')) = upper('38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b');
20 | select '33', hex(sha384('abc')) = upper('cb00753f45a35e8bb5a03d699ac65007272c32ab0eded1631a8b605a43ff5bed8086072ba1e7cc2358baeca134c825a7');
21 | 
22 | select '41', sha512(null) is NULL;
23 | select '42', hex(sha512('')) = upper('cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e');
24 | select '43', hex(sha512('abc')) = upper('ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f');
25 | 


--------------------------------------------------------------------------------
/test/json1.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/stats
 5 | .load dist/json1
 6 | 
 7 | -- total of 17 functions
 8 | 
 9 | select '01', json('{"answer" : 42}') = '{"answer":42}';
10 | select '02', json_array(1, 2, 3, 4) = '[1,2,3,4]';
11 | select '03', json_array_length('[1,2,3,4]') = 4;
12 | select '04', json_object('answer', 42) = '{"answer":42}';
13 | 
14 | select '11', json_extract('{"answer":42}', '$.answer') = 42;
15 | select '12', json_insert('[1,2,3]', '$[#]', 42) = '[1,2,3,42]';
16 | select '13', json_replace('{"answer":42}', '$.answer', 'no') = '{"answer":"no"}';
17 | select '14', json_set('{"answer":42}', '$.useful', false) = '{"answer":42,"useful":0}';
18 | select '15', json_patch('{"a":1,"b":2,"c":3}', '{"b":10,"d":11}') = '{"a":1,"b":10,"c":3,"d":11}';
19 | select '16', json_remove('{"answer":42,"useful":0}', '$.useful') = '{"answer":42}';
20 | 
21 | select '21', json_type('{"answer":42}') = 'object';
22 | select '22', json_valid('{"answer":42}') = 1;
23 | select '23', json_quote('answer') = '"answer"';
24 | 
25 | select '31', json_group_array(value) = '[1,2,3,4]' from generate_series(1,4);
26 | select '32', json_group_object('v', value) = '{"v":1,"v":2,"v":3,"v":4}' from generate_series(1,4);
27 | 
28 | select '41', sum(value) = 10 from json_each('[1,2,3,4]');
29 | select '42', sum(value) = 10 from json_each('{"a":[1,2,3,4]}', '$.a');
30 | select '43', count(*) = 6 from json_tree('{"a":[1,2,3,4]}');
31 | select '44', count(*) = 5 from json_tree('{"a":[1,2,3,4]}', '$.a');
32 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on:
 4 |     push:
 5 |         branches: [main]
 6 |         paths:
 7 |             - .github/**
 8 |             - src/**
 9 |             - test/**
10 |             - Makefile
11 |     pull_request:
12 |         branches: [main]
13 |     workflow_dispatch:
14 | 
15 | env:
16 |     SQLITE_RELEASE_YEAR: "2021"
17 |     SQLITE_VERSION: "3360000"
18 |     SQLITE_BRANCH: "3.36"
19 | 
20 | jobs:
21 |     build:
22 |         name: Build for ${{ matrix.os }}
23 |         runs-on: ${{ matrix.os }}
24 |         strategy:
25 |             matrix:
26 |                 include:
27 |                     - os: ubuntu-latest
28 |                     - os: windows-latest
29 |                     - os: macos-latest
30 | 
31 |         steps:
32 |             - uses: actions/checkout@v2
33 | 
34 |             - name: Download SQLite sources
35 |               shell: bash
36 |               run: |
37 |                   make prepare-dist
38 |                   make download-sqlite
39 |                   make download-external
40 | 
41 |             - name: Build for Linux
42 |               if: matrix.os == 'ubuntu-latest'
43 |               run: |
44 |                   make compile-linux
45 |                   make test-all
46 | 
47 |             - name: Build for Windows
48 |               if: matrix.os == 'windows-latest'
49 |               shell: bash
50 |               run: make compile-windows
51 | 
52 |             - name: Build for macOS
53 |               if: matrix.os == 'macos-latest'
54 |               run: make compile-macos
55 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: publish
 2 | 
 3 | on:
 4 |     push:
 5 |         tags:
 6 |             - "*"
 7 |     workflow_dispatch:
 8 | 
 9 | env:
10 |     SQLITE_RELEASE_YEAR: "2021"
11 |     SQLITE_VERSION: "3360000"
12 |     SQLITE_BRANCH: "3.36"
13 | 
14 | jobs:
15 |     publish:
16 |         name: Publish for ${{ matrix.os }}
17 |         runs-on: ${{ matrix.os }}
18 |         strategy:
19 |             matrix:
20 |                 include:
21 |                     - os: ubuntu-latest
22 |                     - os: windows-latest
23 |                     - os: macos-latest
24 | 
25 |         steps:
26 |             - uses: actions/checkout@v2
27 | 
28 |             - name: Download SQLite sources
29 |               shell: bash
30 |               run: |
31 |                   make prepare-dist
32 |                   make download-sqlite
33 |                   make download-external
34 | 
35 |             - name: Build for Linux
36 |               if: matrix.os == 'ubuntu-latest'
37 |               run: make compile-linux
38 | 
39 |             - name: Build for Windows
40 |               if: matrix.os == 'windows-latest'
41 |               shell: bash
42 |               run: make compile-windows
43 | 
44 |             - name: Build for macOS
45 |               if: matrix.os == 'macos-latest'
46 |               run: make compile-macos
47 | 
48 |             - name: Upload binaries to release
49 |               uses: svenstaro/upload-release-action@v2
50 |               with:
51 |                   repo_token: ${{ secrets.GITHUB_TOKEN }}
52 |                   file: dist/*
53 |                   file_glob: true
54 |                   tag: ${{ github.ref }}
55 | 


--------------------------------------------------------------------------------
/test/stats.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/stats
 5 | 
 6 | select '01', percentile(value, 25) = 25.5 from generate_series(1, 99);
 7 | select '02', percentile_25(value) = 25.5 from generate_series(1, 99);
 8 | 
 9 | select '03', percentile(value, 50) = 50 from generate_series(1, 99);
10 | select '04', median(value) = 50 from generate_series(1, 99);
11 | 
12 | select '05', percentile(value, 75) = 74.5 from generate_series(1, 99);
13 | select '06', percentile_75(value) = 74.5 from generate_series(1, 99);
14 | 
15 | select '07', percentile(value, 90) = 89.2 from generate_series(1, 99);
16 | select '08', percentile_90(value) = 89.2 from generate_series(1, 99);
17 | 
18 | select '09', percentile(value, 95) = 95.05 from generate_series(1, 100);
19 | select '10', percentile_95(value) = 95.05 from generate_series(1, 100);
20 | 
21 | select '11', percentile(value, 99) = 98.02 from generate_series(1, 99);
22 | select '12', percentile_99(value) = 98.02 from generate_series(1, 99);
23 | 
24 | select '21', round(stddev(value), 1) = 28.7 from generate_series(1, 99);
25 | select '22', round(stddev_samp(value), 1) = 28.7 from generate_series(1, 99);
26 | select '23', round(stddev_pop(value), 1) = 28.6 from generate_series(1, 99);
27 | 
28 | select '31', variance(value) = 825 from generate_series(1, 99);
29 | select '32', var_samp(value) = 825 from generate_series(1, 99);
30 | select '33', round(var_pop(value), 0) = 817 from generate_series(1, 99);
31 | 
32 | select '41', (count(*), min(value), max(value)) = (99, 1, 99) from generate_series(1, 99);
33 | select '42', (count(*), min(value), max(value)) = (20, 0, 95) from generate_series(0, 99, 5);
34 | with tmp as (select * from generate_series(20) limit 10)
35 | select '43', (count(*), min(value), max(value)) = (10, 20, 29) from tmp;


--------------------------------------------------------------------------------
/src/re.h:
--------------------------------------------------------------------------------
 1 | // Originally by Henry Spencer, Spencer License 94
 2 | // https://github.com/garyhouston/regexp.old
 3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License
 4 | 
 5 | /*
 6 |  * Copyright (c) 1986, 1993, 1995 by University of Toronto.
 7 |  * Written by Henry Spencer.  Not derived from licensed software.
 8 |  *
 9 |  * Permission is granted to anyone to use this software for any
10 |  * purpose on any computer system, and to redistribute it in any way,
11 |  * subject to the following restrictions:
12 |  *
13 |  * 1. The author is not responsible for the consequences of use of
14 |  * 	this software, no matter how awful, even if they arise
15 |  * 	from defects in it.
16 |  *
17 |  * 2. The origin of this software must not be misrepresented, either
18 |  * 	by explicit claim or by omission.
19 |  *
20 |  * 3. Altered versions must be plainly marked as such, and must not
21 |  * 	be misrepresented (by explicit claim or omission) as being
22 |  * 	the original software.
23 |  *
24 |  * 4. This notice must not be removed or altered.
25 |  */
26 | 
27 | /*
28 |  * Definitions etc. for regexp(3) routines.
29 |  *
30 |  * Caveat:  this is V8 regexp(3) [actually, a reimplementation thereof],
31 |  * not the System V one.
32 |  */
33 | #define NSUBEXP 10
34 | typedef struct regexp {
35 |     char* startp[NSUBEXP];
36 |     char* endp[NSUBEXP];
37 |     char regstart;   /* Internal use only. */
38 |     char reganch;    /* Internal use only. */
39 |     char* regmust;   /* Internal use only. */
40 |     int regmlen;     /* Internal use only. */
41 |     char program[1]; /* Unwarranted chumminess with compiler. */
42 | } regexp;
43 | 
44 | regexp* re_compile(const char* re);
45 | int re_execute(regexp* rp, const char* s);
46 | int re_substitute(const regexp* rp, const char* src, char* dst);
47 | void re_error(char* message);
48 | 


--------------------------------------------------------------------------------
/src/fuzzy/common.h:
--------------------------------------------------------------------------------
 1 | // Adapted from the spellfix SQLite exension, Public Domain
 2 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c
 3 | 
 4 | #ifndef COMMON_H
 5 | #define COMMON_H
 6 | 
 7 | /*
 8 | ** Character classes for ASCII characters:
 9 | **
10 | **   0   ''        Silent letters:   H W
11 | **   1   'A'       Any vowel:   A E I O U (Y)
12 | **   2   'B'       A bilabeal stop or fricative:  B F P V W
13 | **   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
14 | **   4   'D'       Alveolar stops:  D T
15 | **   5   'H'       Letter H at the beginning of a word
16 | **   6   'L'       Glide:  L
17 | **   7   'R'       Semivowel:  R
18 | **   8   'M'       Nasals:  M N
19 | **   9   'Y'       Letter Y at the beginning of a word.
20 | **   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
21 | **   11  ' '       White space
22 | **   12  '?'       Other.
23 | */
24 | #define CCLASS_SILENT 0
25 | #define CCLASS_VOWEL 1
26 | #define CCLASS_B 2
27 | #define CCLASS_C 3
28 | #define CCLASS_D 4
29 | #define CCLASS_H 5
30 | #define CCLASS_L 6
31 | #define CCLASS_R 7
32 | #define CCLASS_M 8
33 | #define CCLASS_Y 9
34 | #define CCLASS_DIGIT 10
35 | #define CCLASS_SPACE 11
36 | #define CCLASS_OTHER 12
37 | 
38 | #define SCRIPT_LATIN 0x0001
39 | #define SCRIPT_CYRILLIC 0x0002
40 | #define SCRIPT_GREEK 0x0004
41 | #define SCRIPT_HEBREW 0x0008
42 | #define SCRIPT_ARABIC 0x0010
43 | 
44 | #define ALWAYS(X) 1
45 | #define NEVER(X) 0
46 | 
47 | // Copyright (c) 2014 Ross Bayer, MIT License
48 | // https://github.com/Rostepher/libstrcmp
49 | 
50 | #define EQ(a, b) ((a) == (b))
51 | #define NOT_EQ(a, b) !EQ(a, b)
52 | 
53 | #define MIN(a, b) ((a) < (b)) ? (a) : (b)
54 | #define MIN3(a, b, c) MIN(MIN(a, b), c)
55 | #define MIN4(a, b, c, d) MIN(MIN(a, b), MIN(c, d))
56 | 
57 | #define MAX(a, b) ((a) > (b)) ? (a) : (b)
58 | #define MAX3(a, b, c) MAX(MAX(a, b), c)
59 | #define MAX4(a, b, c, d) MAX(MAX(a, b), MAX(b, c))
60 | 
61 | #endif


--------------------------------------------------------------------------------
/test/ipaddr.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Vincent Bernat, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/ipaddr
 5 | select '00', ipfamily('192.168.16.12') = 4;
 6 | select '01', ipfamily('192.168.16.12/24') = 4;
 7 | select '02', ipfamily('2001:db8::1') = 6;
 8 | select '03', ipfamily('2001:db8::1/64') = 6;
 9 | select '04', iphost('192.168.160.120') = '192.168.160.120';
10 | select '05', iphost('192.168.16.12/24') = '192.168.16.12';
11 | select '06', iphost('2001:db8::1/64') = '2001:db8::1';
12 | select '07', iphost('2001:db8::1') = '2001:db8::1';
13 | select '08', ipmasklen('192.168.16.12') = 32;
14 | select '09', ipmasklen('192.168.16.12/24') = 24;
15 | select '10', ipmasklen('2001:db8::1/64') = 64;
16 | select '11', ipmasklen('2001:db8::1') = 128;
17 | select '12', ipnetwork('192.168.160.120/24') = '192.168.160.0/24';
18 | select '13', ipnetwork('192.168.160.128/26') = '192.168.160.128/26';
19 | select '14', ipnetwork('192.168.160.120') = '192.168.160.120/32';
20 | select '15', ipnetwork('2001:db8::1/64') = '2001:db8::/64';
21 | select '16', ipnetwork('2001:db8::1') = '2001:db8::1/128';
22 | select '17', ipnetwork('2001:db8:1::1/48') = '2001:db8:1::/48';
23 | select '18', ipnetwork('2001:db8:1::1/47') = '2001:db8::/47';
24 | select '19', ipcontains('192.168.16.0/24', '192.168.16.3') = 1;
25 | select '20', ipcontains('192.168.15.0/24', '192.168.16.3') = 0;
26 | select '21', ipcontains('2001:db8::/64', '2001:db8::17') = 1;
27 | select '22', ipcontains('2001:db8:1::/64', '2001:db8::17') = 0;
28 | select '23', ipcontains('192.168.16.0/24', '192.168.16.0/26') = 1;
29 | select '24', ipcontains('192.168.16.0/27', '192.168.16.0/26') = 0;
30 | select '25', ipcontains('192.168.16.0/25', '192.168.16.128/26') = 0;
31 | select '26', ipcontains('2001:db8::/48', '2001:db8::/64') = 1;
32 | select '27', ipcontains('2001:db8::/56', '2001:db8::/48') = 0;
33 | select '28', ipcontains('2001:db8::/56', '2001:db8:1::/64') = 0;
34 | 


--------------------------------------------------------------------------------
/docs/unicode.md:
--------------------------------------------------------------------------------
 1 | # unicode: Unicode support for SQLite
 2 | 
 3 | Implements case-insensitive string comparison for Unicode strings. Has no external dependencies (like libicu). Adapted from [sqlite3_unicode](https://github.com/Zensey/sqlite3_unicode).
 4 | 
 5 | Provides the following unicode features:
 6 | 
 7 | - `upper()` and `lower()` functions to normalize case.
 8 | - `like()` function and `LIKE` operator with case-independent matching.
 9 | - `unaccent()` function to normalize strings by removing accents.
10 | 
11 | Tries to override the default NOCASE case-insensitive collation sequence to support UTF-8 characters (available in SQLite CLI and C API only).
12 | 
13 | ### Upper and lower
14 | 
15 | ```
16 | sqlite> select upper('привет');
17 | ПРИВЕТ
18 | sqlite> select nupper('привет');
19 | ПРИВЕТ
20 | ```
21 | 
22 | `nupper()` is an alias for `upper()` in case the latter is already overridden by some other extension.
23 | 
24 | ```
25 | sqlite> select lower('ПРИВЕТ');
26 | привет
27 | sqlite> select nlower('ПРИВЕТ');
28 | привет
29 | ```
30 | 
31 | `nlower()` is an alias for `lower()` in case the latter is already overridden by some other extension.
32 | 
33 | ### Case-insensitive LIKE
34 | 
35 | The pattern in `like()` function goes first:
36 | 
37 | ```
38 | sqlite> select like('пРиВ_Т', 'привет');
39 | 1
40 | ```
41 | 
42 | The pattern in `LIKE` operator goes second:
43 | 
44 | ```
45 | sqlite> select 'привет' like 'пРиВ_Т';
46 | 1
47 | ```
48 | 
49 | ### Unaccent
50 | 
51 | ```
52 | sqlite> select unaccent('hôtel');
53 | hotel
54 | ```
55 | 
56 | ## Usage
57 | 
58 | Before:
59 | 
60 | ```
61 | sqlite> select upper('hello');
62 | HELLO
63 | sqlite> select upper('привет');
64 | привет
65 | ```
66 | 
67 | After:
68 | 
69 | ```
70 | sqlite> .load ./unicode
71 | sqlite> select upper('hello');
72 | HELLO
73 | sqlite> select upper('привет');
74 | ПРИВЕТ
75 | ```
76 | 
77 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
78 | 


--------------------------------------------------------------------------------
/docs/stats.md:
--------------------------------------------------------------------------------
 1 | # stats: Mathematical statistics in SQLite
 2 | 
 3 | Common statistical functions. Adapted from [extension-functions.c](https://sqlite.org/contrib/) by Liam Healy, [percentile.c](https://sqlite.org/src/file/ext/misc/percentile.c) and [series.c](https://sqlite.org/src/file/ext/misc/series.c) by D. Richard Hipp.
 4 | 
 5 | ### Aggregate functions
 6 | 
 7 | -   `median(x)` — median (50th percentile),
 8 | -   `percentile_25(x)` — 25th percentile,
 9 | -   `percentile_75(x)` — 75th percentile,
10 | -   `percentile_90(x)` — 90th percentile,
11 | -   `percentile_95(x)` — 95th percentile,
12 | -   `percentile_99(x)` — 99th percentile,
13 | -   `percentile(x, perc)` — custom percentile (`perc` between 0 and 100),
14 | -   `stddev(x)` or `stddev_samp(x)` — sample standard deviation,
15 | -   `stddev_pop(x)` — population standard deviation,
16 | -   `variance(x)` or `var_samp(x)` — sample variance,
17 | -   `var_pop(x)` — population variance.
18 | 
19 | ### generate_series(start[, stop[, step]])
20 | 
21 | This table-valued function generates a sequence of integer values starting with `start`, ending with `stop` (inclusive) with an optional `step`.
22 | 
23 | Generate all integers from 1 to 99:
24 | 
25 | ```sql
26 | select * from generate_series(1, 99);
27 | ```
28 | 
29 | Generate all multiples of 5 less than or equal to 100:
30 | 
31 | ```sql
32 | select * from generate_series(5, 100, 5);
33 | ```
34 | 
35 | Generate 20 random integer values:
36 | 
37 | ```sql
38 | select random() from generate_series(1, 20);
39 | ```
40 | 
41 | The `generate_series()` table has a single result column named `value` holding integer values, and a number of rows determined by the parameters `start`, `stop`, and `step`. The first row of the table has a value of `start`. Subsequent rows increase by `step` up to `stop`.
42 | 
43 | `stop` defaults to 9223372036854775807. `step` defaults to 1.
44 | 
45 | ## Usage
46 | 
47 | ```
48 | sqlite> .load ./stats
49 | sqlite> select median(value) from generate_series(1, 99);
50 | ```
51 | 
52 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
53 | 


--------------------------------------------------------------------------------
/test/fileio.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/fileio
 5 | 
 6 | -- lsdir
 7 | select '01', (name, mode, size) = ('LICENSE', 33188, 1108) from lsdir('LICENSE');
 8 | select '02', count(*) >= 10 from lsdir('test');
 9 | select '03', count(*) = 0 from lsdir('whatever.txt');
10 | .shell mkdir parentdir
11 | .shell touch parentdir/parent.txt
12 | .shell mkdir parentdir/subdir
13 | .shell touch parentdir/subdir/child.txt
14 | select '04', count(*) = 3 from lsdir('parentdir');
15 | select '05', count(*) = 3 from lsdir('parentdir', false);
16 | select '06', count(*) = 4 from lsdir('parentdir', true);
17 | .shell rm -rf parentdir
18 | 
19 | -- lsmode
20 | select '11', lsmode(16877) = 'drwxr-xr-x';
21 | select '12', lsmode(33206) = '-rw-rw-rw-';
22 | select '13', lsmode(33188) = '-rw-r--r--';
23 | select '14', lsmode(384) = '?rw-------';
24 | select '15', lsmode(420) = '?rw-r--r--';
25 | select '16', lsmode(436) = '?rw-rw-r--';
26 | select '17', lsmode(438) = '?rw-rw-rw-';
27 | select '18', lsmode(493) = '?rwxr-xr-x';
28 | select '19', lsmode(511) = '?rwxrwxrwx';
29 | 
30 | -- mkdir
31 | .shell rm -rf hellodir
32 | select '21', mkdir('hellodir') is null;
33 | select '22', (name, mode) = ('hellodir', 16877) from fsdir('hellodir');
34 | 
35 | -- readfile
36 | .shell rm -f hello.txt
37 | .shell printf 'hello world' > hello.txt
38 | select '31', typeof(readfile('hello.txt')) = 'blob';
39 | select '32', length(readfile('hello.txt')) = 11;
40 | select '33', readfile('whatever') is null;
41 | 
42 | -- symlink
43 | .shell rm -f hello.txt
44 | .shell printf 'hello world' > hello.txt
45 | select '41', symlink('hello.txt', 'hello.lnk') is null;
46 | select '42', length(readfile('hello.lnk')) = 11;
47 | 
48 | -- writefile
49 | .shell rm -f hello.txt
50 | select '51', writefile('hello.txt', 'hello world') = 11;
51 | select '52', (name, mode) = ('hello.txt', 33206) from fsdir('hello.txt');
52 | select '53', writefile('hello.txt', 'hello world', 420) = 11;
53 | select '54', (name, mode) = ('hello.txt', 33188) from fsdir('hello.txt');
54 | 
55 | .shell rm -rf hellodir
56 | .shell rm -f hello.txt
57 | .shell rm -f hello.lnk
58 | 


--------------------------------------------------------------------------------
/test/math.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/math
 5 | 
 6 | -- total of 30 functions
 7 | 
 8 | -- rounding (4)
 9 | select '01', ceil(3.3) = 4;
10 | select '02', ceil(-3.9) = -3;
11 | select '03', ceiling(3.3) = 4;
12 | select '04', ceiling(-3.9) = -3;
13 | select '05', floor(3.9) = 3;
14 | select '06', floor(-3.9) = -4;
15 | select '07', trunc(3.3) = 3;
16 | select '08', trunc(3.9) = 3;
17 | select '09', trunc(-3.3) = -3;
18 | select '10', trunc(-3.9) = -3;
19 | 
20 | -- log (5)
21 | select '11', round(ln(2.71828*2.71828)) = 2;
22 | select '12', round(log(100)) = 2;
23 | select '13', round(log10(100)) = 2;
24 | select '14', round(log2(4)) = 2;
25 | select '15', round(log(3,9)) = 2;
26 | 
27 | -- power (4)
28 | select '16', round(exp(2), 3) = round(2.71828*2.71828, 3);
29 | select '17', pow(2, 10) = 1024;
30 | select '18', power(2, 10) = 1024;
31 | select '19', sqrt(100) = 10;
32 | 
33 | -- trigonometric (3)
34 | select '21', cos(0) = 1;
35 | select '22', round(cos(pi()/2)) = 0;
36 | select '23', sin(0) = 0;
37 | select '24', round(sin(pi()/2)) = 1;
38 | select '25', tan(0) = 0;
39 | select '26', round(tan(0.8)) = 1;
40 | 
41 | -- hyperbolic (3)
42 | select '31', cosh(0) = 1;
43 | select '32', round(cosh(2.07)) = 4;
44 | select '33', sinh(0) = 0;
45 | select '34', round(sinh(2.1)) = 4;
46 | select '35', tanh(0) = 0;
47 | select '36', round(tanh(3)) = 1;
48 | 
49 | -- inverse trigonometric (4)
50 | select '41', acos(1) = 0;
51 | select '42', round(acos(0), 2) = round(pi()/2, 2);
52 | select '43', asin(0) = 0;
53 | select '44', round(asin(1), 2) = round(0.5*pi(), 2);
54 | select '45', atan(0) = 0;
55 | select '46', round(atan(pi()/2)) = 1;
56 | select '47', round(atan2(1, 2), 2) = 0.46;
57 | select '48', round(atan2(pi(), 2)) = 1;
58 | 
59 | -- inverse hyperbolic (3)
60 | select '51', acosh(1) = 0;
61 | select '52', round(acosh(4)) = 2;
62 | select '53', asinh(0) = 0;
63 | select '54', round(asinh(4)) = 2;
64 | select '55', atanh(0) = 0;
65 | select '56', round(atanh(0.8)) = 1;
66 | 
67 | -- angular measures (2)
68 | select '61', radians(0) = 0;
69 | select '62', round(radians(180), 2) = round(pi(), 2);
70 | select '63', degrees(0) = 0;
71 | select '64', round(degrees(pi())) = 180;
72 | 
73 | -- other (2)
74 | select '71', mod(10,3) = 1;
75 | select '72', round(pi(), 5) = 3.14159;


--------------------------------------------------------------------------------
/src/fuzzy/levenshtein.c:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2014 Ross Bayer, MIT License
 2 | // https://github.com/Rostepher/libstrcmp
 3 | 
 4 | #include <assert.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | 
 8 | #include "common.h"
 9 | 
10 | /// Calculates and returns the Levenshtein distance of two non NULL strings.
11 | /// More information about the algorithm can be found here:
12 | ///     https://en.wikipedia.org/wiki/Levenshtein_distance
13 | ///
14 | /// @param str1 first non NULL string
15 | /// @param str2 second non NULL string
16 | ///
17 | /// @returns the levenshtein distance of str1 and str2
18 | unsigned levenshtein(const char* str1, const char* str2) {
19 |     // strings cannot be NULL
20 |     assert(str1 != NULL);
21 |     assert(str2 != NULL);
22 | 
23 |     size_t str1_len = strlen(str1);
24 |     size_t str2_len = strlen(str2);
25 | 
26 |     // handle cases where one or both strings are empty
27 |     if (str1_len == 0) {
28 |         return str2_len;
29 |     }
30 |     if (str2_len == 0) {
31 |         return str1_len;
32 |     }
33 | 
34 |     // remove common substring
35 |     while (str1_len > 0 && str2_len > 0 && EQ(str1[0], str2[0])) {
36 |         str1++, str2++;
37 |         str1_len--, str2_len--;
38 |     }
39 | 
40 |     // declare variables
41 |     unsigned row, col;
42 |     unsigned last_diag, cur, cost;
43 | 
44 |     // initialize array to hold values
45 |     unsigned* vector = calloc(str1_len + 1, sizeof(unsigned));
46 |     for (col = 1; col <= str1_len; col++) {
47 |         vector[col] = col;
48 |     }
49 | 
50 |     // itterate through the imagined rows of arrays
51 |     for (row = 1; row <= str2_len + 1; row++) {
52 |         vector[0] = row;
53 |         last_diag = row - 1;  // remember the last first slot
54 | 
55 |         // itterate throught each member of the vector
56 |         for (col = 1; col <= str1_len; col++) {
57 |             // remember the diagonal before overwriting the array
58 |             cur = vector[col];
59 | 
60 |             // calculate the cost
61 |             cost = EQ(str1[col - 1], str2[row - 1]) ? 0 : 1;
62 | 
63 |             // determine min of the possible values
64 |             vector[col] = MIN3(vector[col] + 1, vector[col - 1] + 1, last_diag + cost);
65 | 
66 |             // remember the new last_diag
67 |             last_diag = cur;
68 |         }
69 |     }
70 | 
71 |     free(vector);
72 |     return last_diag;
73 | }


--------------------------------------------------------------------------------
/docs/re.md:
--------------------------------------------------------------------------------
 1 | # re: Regular expressions in SQLite
 2 | 
 3 | Regexp search and replace functions.
 4 | Adapted from [regexp.old](https://github.com/garyhouston/regexp.old) by Henry Spencer.
 5 | 
 6 | Provides following functions:
 7 | 
 8 | ### `REGEXP` statement
 9 | 
10 | Checks if source string matches pattern.
11 | 
12 | ```
13 | sqlite> select true where 'the year is 2021' regexp '[0-9]+';
14 | 1
15 | ```
16 | 
17 | ### `regexp_like(source, pattern)`
18 | 
19 | Checks if source string matches pattern.
20 | 
21 | ```
22 | sqlite> select regexp_like('the year is 2021', '[0-9]+');
23 | 1
24 | sqlite> select regexp_like('the year is 2021', '2k21');
25 | 0
26 | ```
27 | 
28 | ### `regexp_substr(source, pattern)`
29 | 
30 | Returns source substring matching pattern.
31 | 
32 | ```
33 | sqlite> select regexp_substr('the year is 2021', '[0-9]+');
34 | 2021
35 | sqlite> select regexp_substr('the year is 2021', '2k21');
36 | 
37 | ```
38 | 
39 | ### `regexp_replace(source, pattern, replacement)`
40 | 
41 | Replaces matching substring with replacement string.
42 | 
43 | ```
44 | sqlite> select regexp_replace('the year is 2021', '[0-9]+', '2050');
45 | the year is 2050
46 | sqlite> select regexp_replace('the year is 2021', '2k21', '2050');
47 | the year is 2021
48 | ```
49 | 
50 | Supports backreferences to captured groups `\1` trough `\9` in replacement string:
51 | 
52 | ```
53 | sqlite> select regexp_replace('the year is 2021', '([0-9]+)', '\1 or 2050');
54 | the year is 2021 or 2050
55 | ```
56 | 
57 | ## Supported syntax
58 | 
59 | The following regular expression syntax is supported:
60 | 
61 | ```
62 | X*      zero or more occurrences of X
63 | X+      one or more occurrences of X
64 | X?      zero or one occurrences of X
65 | (X)     match X
66 | X|Y     X or Y
67 | ^X      X occurring at the beginning of the string
68 | X$      X occurring at the end of the string
69 | .       Match any single character
70 | \c      Character c where c is one of \{}()[]|*+?.
71 | \c      C-language escapes for c in afnrtv. ex: \t or \n
72 | [abc]   Any single character from the set abc
73 | [^abc]  Any single character not in the set abc
74 | [a-z]   Any single character in the range a-z
75 | [^a-z]  Any single character not in the range a-z
76 | ```
77 | 
78 | ## Usage
79 | 
80 | ```
81 | sqlite> .load ./re
82 | sqlite> select regexp_like('abcdef', 'b.d');
83 | ```
84 | 
85 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
86 | 


--------------------------------------------------------------------------------
/docs/fuzzy.md:
--------------------------------------------------------------------------------
 1 | # fuzzy: Fuzzy string matching and phonetics in SQLite
 2 | 
 3 | Fuzzy-matching helpers:
 4 | 
 5 | -   Measure distance between two strings.
 6 | -   Compute phonetic string code.
 7 | -   Transliterate a string.
 8 | 
 9 | Adapted from [libstrcmp](https://github.com/Rostepher/libstrcmp) by Ross Bayer and [spellfix.c](https://www.sqlite.org/src/file/ext/misc/spellfix.c) by D. Richard Hipp.
10 | 
11 | If you want a ready-to-use mechanism to search a large vocabulary for close matches, see the [spellfix](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1002297477) extension.
12 | 
13 | ## String distances
14 | 
15 | Measure distance between two strings:
16 | 
17 | -   `dlevenshtein(x, y)` - Damerau-Levenshtein distance,
18 | -   `edit_distance(x, y)` - Spellcheck edit distance,
19 | -   `hamming(x, y)` - Hamming distance,
20 | -   `jaro_winkler(x, y)` - Jaro-Winkler distance,
21 | -   `levenshtein(x, y)` - Levenshtein distance,
22 | -   `osa_distance(x, y)` - Optimal String Alignment distance.
23 | 
24 | ```
25 | sqlite> select dlevenshtein('awesome', 'aewsme');
26 | 2
27 | 
28 | sqlite> select edit_distance('awesome', 'aewsme');
29 | 215
30 | 
31 | sqlite> select hamming('awesome', 'aewsome');
32 | 2
33 | 
34 | sqlite> select jaro_winkler('awesome', 'aewsme');
35 | 0.907
36 | 
37 | sqlite> select levenshtein('awesome', 'aewsme');
38 | 3
39 | 
40 | sqlite> select osa_distance('awesome', 'aewsme');
41 | 3
42 | ```
43 | 
44 | Only ASCII strings are supported.
45 | 
46 | ## Phonetic codes
47 | 
48 | Compute phonetic string code:
49 | 
50 | -   `caverphone(x)` - Caverphone code,
51 | -   `phonetic_hash(x)` - Spellcheck phonetic code,
52 | -   `soundex(x)` - Soundex code,
53 | -   `rsoundex(x)` - Refined Soundex code.
54 | 
55 | ```
56 | sqlite> select caverphone('awesome');
57 | AWSM111111
58 | 
59 | sqlite> select phonetic_hash('awesome');
60 | ABACAMA
61 | 
62 | sqlite> select soundex('awesome');
63 | A250
64 | 
65 | sqlite> select rsoundex('awesome');
66 | A03080
67 | ```
68 | 
69 | Only ASCII strings are supported.
70 | 
71 | ## Transliteration
72 | 
73 | Transliteration converts the input string from UTF-8 into pure ASCII
74 | by converting all non-ASCII characters to some combination of characters
75 | in the ASCII subset.
76 | 
77 | Distance and phonetics functions are ASCII-only, so to work
78 | with Unicode string one should transliterate it first.
79 | 
80 | ```
81 | sqlite> select translit('привет');
82 | privet
83 | ```
84 | 
85 | ## Usage
86 | 
87 | ```
88 | sqlite> .load ./fuzzy
89 | sqlite> select soundex('hello');
90 | ```
91 | 
92 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
93 | 


--------------------------------------------------------------------------------
/src/fuzzy/optimal_string_alignment.c:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2014 Ross Bayer, MIT License
 2 | // https://github.com/Rostepher/libstrcmp
 3 | 
 4 | #include <assert.h>
 5 | #include <stdlib.h>
 6 | #include <string.h>
 7 | 
 8 | #include "common.h"
 9 | 
10 | /// Computes and returns the Optimal String Alignment distance for two non NULL
11 | /// strings. More information about the algorithm can be found here:
12 | ///     https://en.wikipedia.org/wiki/Damerau-Levenshtein_distance
13 | ///
14 | /// @param str1 first non NULL string
15 | /// @param str2 second non NULL string
16 | ///
17 | /// @returns optimal string alignment distance for str1 and str2
18 | unsigned optimal_string_alignment(const char* str1, const char* str2) {
19 |     // strings cannot be NULL
20 |     assert(str1 != NULL);
21 |     assert(str2 != NULL);
22 | 
23 |     size_t str1_len = strlen(str1);
24 |     size_t str2_len = strlen(str2);
25 | 
26 |     // handle cases where one or both strings are empty
27 |     if (str1_len == 0) {
28 |         return str2_len;
29 |     }
30 |     if (str2_len == 0) {
31 |         return str1_len;
32 |     }
33 | 
34 |     // remove common substring
35 |     while (str1_len > 0 && str2_len > 0 && EQ(str1[0], str2[0])) {
36 |         str1++, str2++;
37 |         str1_len--, str2_len--;
38 |     }
39 | 
40 |     unsigned row, col, cost, result;
41 | 
42 |     // initialize matrix to hold distance values
43 |     unsigned** matrix = malloc((str1_len + 1) * sizeof(unsigned*));
44 |     for (unsigned i = 0; i <= str1_len; i++) {
45 |         matrix[i] = calloc((str2_len + 1), sizeof(unsigned));
46 |     }
47 | 
48 |     // set all the starting values
49 |     matrix[0][0] = 0;
50 |     for (row = 1; row <= str1_len; row++) {
51 |         matrix[row][0] = row;
52 |     }
53 |     for (col = 1; col <= str2_len; col++) {
54 |         matrix[0][col] = col;
55 |     }
56 | 
57 |     // itterate through and fill in the matrix
58 |     for (row = 1; row <= str1_len; row++) {
59 |         for (col = 1; col <= str2_len; col++) {
60 |             cost = EQ(str1[row - 1], str2[col - 1]) ? 0 : 1;
61 | 
62 |             matrix[row][col] = MIN3(matrix[row - 1][col] + 1,        // deletion
63 |                                     matrix[row][col - 1] + 1,        // insertion
64 |                                     matrix[row - 1][col - 1] + cost  // substitution
65 |             );
66 | 
67 |             // transpositions
68 |             if (row > 1 && col > 1 && EQ(str1[row], str2[col - 1]) &&
69 |                 EQ(str1[row - 1], str2[col])) {
70 |                 matrix[row][col] = MIN(matrix[row][col], matrix[row - 2][col - 2] + cost);
71 |             }
72 |         }
73 |     }
74 | 
75 |     result = matrix[str1_len][str2_len];
76 | 
77 |     // free allocated memory
78 |     for (unsigned i = 0; i < str1_len + 1; i++) {
79 |         free(matrix[i]);
80 |     }
81 |     free(matrix);
82 | 
83 |     return result;
84 | }


--------------------------------------------------------------------------------
/src/fuzzy/phonetic.c:
--------------------------------------------------------------------------------
 1 | // Ooriginally from the spellfix SQLite exension, Public Domain
 2 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c
 3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License
 4 | 
 5 | #include <assert.h>
 6 | #include <stdlib.h>
 7 | 
 8 | #include "common.h"
 9 | 
10 | extern const unsigned char midClass[];
11 | extern const unsigned char initClass[];
12 | extern const unsigned char className[];
13 | 
14 | /*
15 | ** Generate a "phonetic hash" from a string of ASCII characters
16 | ** in zIn[0..nIn-1].
17 | **
18 | **   * Map characters by character class as defined above.
19 | **   * Omit double-letters
20 | **   * Omit vowels beside R and L
21 | **   * Omit T when followed by CH
22 | **   * Omit W when followed by R
23 | **   * Omit D when followed by J or G
24 | **   * Omit K in KN or G in GN at the beginning of a word
25 | **
26 | ** Space to hold the result is obtained from sqlite3_malloc()
27 | **
28 | ** Return NULL if memory allocation fails.
29 | */
30 | unsigned char* phonetic_hash(const unsigned char* zIn, int nIn) {
31 |     unsigned char* zOut = malloc(nIn + 1);
32 |     int i;
33 |     int nOut = 0;
34 |     char cPrev = 0x77;
35 |     char cPrevX = 0x77;
36 |     const unsigned char* aClass = initClass;
37 | 
38 |     if (zOut == 0)
39 |         return 0;
40 |     if (nIn > 2) {
41 |         switch (zIn[0]) {
42 |             case 'g':
43 |             case 'k': {
44 |                 if (zIn[1] == 'n') {
45 |                     zIn++;
46 |                     nIn--;
47 |                 }
48 |                 break;
49 |             }
50 |         }
51 |     }
52 |     for (i = 0; i < nIn; i++) {
53 |         unsigned char c = zIn[i];
54 |         if (i + 1 < nIn) {
55 |             if (c == 'w' && zIn[i + 1] == 'r')
56 |                 continue;
57 |             if (c == 'd' && (zIn[i + 1] == 'j' || zIn[i + 1] == 'g'))
58 |                 continue;
59 |             if (i + 2 < nIn) {
60 |                 if (c == 't' && zIn[i + 1] == 'c' && zIn[i + 2] == 'h')
61 |                     continue;
62 |             }
63 |         }
64 |         c = aClass[c & 0x7f];
65 |         if (c == CCLASS_SPACE)
66 |             continue;
67 |         if (c == CCLASS_OTHER && cPrev != CCLASS_DIGIT)
68 |             continue;
69 |         aClass = midClass;
70 |         if (c == CCLASS_VOWEL && (cPrevX == CCLASS_R || cPrevX == CCLASS_L)) {
71 |             continue; /* No vowels beside L or R */
72 |         }
73 |         if ((c == CCLASS_R || c == CCLASS_L) && cPrevX == CCLASS_VOWEL) {
74 |             nOut--; /* No vowels beside L or R */
75 |         }
76 |         cPrev = c;
77 |         if (c == CCLASS_SILENT)
78 |             continue;
79 |         cPrevX = c;
80 |         c = className[c];
81 |         assert(nOut >= 0);
82 |         if (nOut == 0 || c != zOut[nOut - 1])
83 |             zOut[nOut++] = c;
84 |     }
85 |     zOut[nOut] = 0;
86 |     return zOut;
87 | }


--------------------------------------------------------------------------------
/src/fuzzy/soundex.c:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2014 Ross Bayer, MIT License
  2 | // https://github.com/Rostepher/libstrcmp
  3 | 
  4 | #include <assert.h>
  5 | #include <ctype.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | 
  9 | #include "common.h"
 10 | 
 11 | /// Helper function that returns the numeric code for a given char as specified
 12 | /// by the soundex algorithm.
 13 | ///
 14 | /// @param c char to encode
 15 | ///
 16 | /// @returns char representation of the number associated with the given char
 17 | static char encode_char(const char c) {
 18 |     switch (tolower(c)) {
 19 |         case 'b':
 20 |         case 'f':
 21 |         case 'p':
 22 |         case 'v':
 23 |             return '1';
 24 | 
 25 |         case 'c':
 26 |         case 'g':
 27 |         case 'j':
 28 |         case 'k':
 29 |         case 'q':
 30 |         case 's':
 31 |         case 'x':
 32 |         case 'z':
 33 |             return '2';
 34 | 
 35 |         case 'd':
 36 |         case 't':
 37 |             return '3';
 38 | 
 39 |         case 'l':
 40 |             return '4';
 41 | 
 42 |         case 'm':
 43 |         case 'n':
 44 |             return '5';
 45 | 
 46 |         case 'r':
 47 |             return '6';
 48 | 
 49 |         default:
 50 |             break;
 51 |     }
 52 | 
 53 |     return '0';
 54 | }
 55 | 
 56 | /// Computes and returns the soundex representation of a given non NULL string.
 57 | /// More information about the algorithm can be found here:
 58 | ///     https://en.wikipedia.org/wiki/Soundex
 59 | ///
 60 | /// @param str non NULL string to encode
 61 | ///
 62 | /// @returns soundex representation of str
 63 | char* soundex(const char* str) {
 64 |     // string cannot be NULL
 65 |     assert(str != NULL);
 66 | 
 67 |     size_t str_len = strlen(str);
 68 | 
 69 |     // allocate space for final code and null terminator
 70 |     char* code = malloc(5 * sizeof(char));
 71 | 
 72 |     // temporary buffer to encode string
 73 |     char buf[str_len];
 74 | 
 75 |     // set first value to first char in str
 76 |     code[0] = toupper(str[0]);
 77 | 
 78 |     // number of digits in code
 79 |     unsigned d = 1;
 80 | 
 81 |     // encode all chars in str
 82 |     for (unsigned i = 0; i < str_len; i++) {
 83 |         buf[i] = encode_char(str[i]);
 84 |     }
 85 | 
 86 |     // add all viable chars to code
 87 |     for (unsigned i = 1; i < str_len && d < 4; i++) {
 88 |         // check if current char in buf is not the same as previous char
 89 |         // and that the current char is not '0'
 90 |         if (NOT_EQ(buf[i], buf[i - 1]) && NOT_EQ(buf[i], '0')) {
 91 |             // if digits separated by an 'h' or 'w' are the same, skip them
 92 |             if (i > 1 && EQ(buf[i], buf[i - 2]) && strchr("hw", str[i - 1])) {
 93 |                 continue;
 94 |             }
 95 | 
 96 |             // add digit to the code
 97 |             code[d] = buf[i];
 98 | 
 99 |             // increment digit counter
100 |             d++;
101 |         }
102 |     }
103 | 
104 |     // pad the end of code with '0' if too short
105 |     while (d < 4) {
106 |         code[d] = '0';
107 |         d++;
108 |     }
109 | 
110 |     // null terminate string
111 |     code[d] = '\0';
112 | 
113 |     return code;
114 | }


--------------------------------------------------------------------------------
/src/fuzzy/refined_soundex.c:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2014 Ross Bayer, MIT License
  2 | // https://github.com/Rostepher/libstrcmp
  3 | 
  4 | #include <assert.h>
  5 | #include <ctype.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | 
  9 | #include "common.h"
 10 | 
 11 | /// Helper function that returns the numeric code for a given char as specified
 12 | /// by the refined soundex algorithm.
 13 | ///
 14 | /// @param c char to encode
 15 | ///
 16 | /// @returns char representation of the number associated with the given char
 17 | static char encode_char(const char c) {
 18 |     switch (tolower(c)) {
 19 |         case 'b':
 20 |         case 'p':
 21 |             return '1';
 22 | 
 23 |         case 'f':
 24 |         case 'v':
 25 |             return '2';
 26 | 
 27 |         case 'c':
 28 |         case 'k':
 29 |         case 's':
 30 |             return '3';
 31 | 
 32 |         case 'g':
 33 |         case 'j':
 34 |             return '4';
 35 | 
 36 |         case 'q':
 37 |         case 'x':
 38 |         case 'z':
 39 |             return '5';
 40 | 
 41 |         case 'd':
 42 |         case 't':
 43 |             return '6';
 44 | 
 45 |         case 'l':
 46 |             return '7';
 47 | 
 48 |         case 'm':
 49 |         case 'n':
 50 |             return '8';
 51 | 
 52 |         case 'r':
 53 |             return '9';
 54 | 
 55 |         default:
 56 |             break;
 57 |     }
 58 | 
 59 |     return '0';
 60 | }
 61 | 
 62 | /// Computes and returns the soundex representation of a given non NULL string.
 63 | /// More information about the algorithm can be found here:
 64 | ///     http://ntz-develop.blogspot.com/2011/03/phonetic-algorithms.html
 65 | ///
 66 | /// @param str non NULL string to encode
 67 | ///
 68 | /// @returns soundex representation of str
 69 | char* refined_soundex(const char* str) {
 70 |     // string cannot be NULL
 71 |     assert(str != NULL);
 72 | 
 73 |     size_t str_len = strlen(str);
 74 | 
 75 |     // final code buffer
 76 |     char code[str_len + 1];
 77 | 
 78 |     // temporary buffer to encode string
 79 |     char buf[str_len];
 80 | 
 81 |     // set first value to first char in str
 82 |     code[0] = toupper(str[0]);
 83 | 
 84 |     // number of digits in code
 85 |     unsigned d = 1;
 86 | 
 87 |     // encode all chars in str
 88 |     for (unsigned i = 0; i < str_len; i++)
 89 |         buf[i] = encode_char(str[i]);
 90 | 
 91 |     // add all viable chars to code
 92 |     char prev = '\0';
 93 |     for (unsigned i = 0; i < str_len; i++) {
 94 |         // check if current char in buf is not the same as previous char
 95 |         if (NOT_EQ(buf[i], prev)) {
 96 |             // add digit to the code
 97 |             code[d] = buf[i];
 98 | 
 99 |             // increment digit counter
100 |             d++;
101 | 
102 |             // set prev to current char
103 |             prev = buf[i];
104 |         }
105 |     }
106 | 
107 |     // allocate space for final code
108 |     // d will be length of the code + 1
109 |     char* result = malloc(d * sizeof(char));
110 | 
111 |     // copy final code into result and null terminate
112 |     for (unsigned i = 0; i < d; i++) {
113 |         result[i] = code[i];
114 |     }
115 |     result[d] = '\0';
116 | 
117 |     return result;
118 | }


--------------------------------------------------------------------------------
/test/fuzzy.sql:
--------------------------------------------------------------------------------
 1 | -- Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | -- https://github.com/nalgeon/sqlean
 3 | 
 4 | .load dist/fuzzy
 5 | 
 6 | -- Damerau-Levenshtein distance
 7 | select '01', dlevenshtein('abc', 'abc') = 0;
 8 | select '02', dlevenshtein('abc', '') = 3;
 9 | select '03', dlevenshtein('', 'abc') = 3;
10 | select '04', dlevenshtein('abc', 'ab') = 1;
11 | select '05', dlevenshtein('abc', 'abcd') = 1;
12 | select '06', dlevenshtein('abc', 'acb') = 1;
13 | select '07', dlevenshtein('abc', 'ca') = 2;
14 | 
15 | -- Hamming distance
16 | select '21', hamming('abc', 'abc') = 0;
17 | select '22', hamming('abc', '') = -1;
18 | select '23', hamming('', 'abc') = -1;
19 | select '24', hamming('hello', 'hellp') = 1;
20 | select '25', hamming('hello', 'heloh') = 2;
21 | 
22 | -- Jaro-Winkler distance
23 | select '31', jaro_winkler('abc', 'abc') = 1.0;
24 | select '32', jaro_winkler('abc', '') = 0.0;
25 | select '33', jaro_winkler('', 'abc') = 0.0;
26 | select '34', round(jaro_winkler('my string', 'my tsring'), 3) = 0.974;
27 | select '35', round(jaro_winkler('my string', 'my ntrisg'), 3) = 0.896;
28 | 
29 | -- Levenshtein distance
30 | select '41', levenshtein('abc', 'abc') = 0;
31 | select '42', levenshtein('abc', '') = 3;
32 | select '43', levenshtein('', 'abc') = 3;
33 | select '44', levenshtein('abc', 'ab') = 1;
34 | select '45', levenshtein('abc', 'abcd') = 1;
35 | select '46', levenshtein('abc', 'acb') = 2;
36 | select '47', levenshtein('abc', 'ca') = 3;
37 | 
38 | -- Optimal String Alignment distance
39 | select '51', osa_distance('abc', 'abc') = 0;
40 | select '52', osa_distance('abc', '') = 3;
41 | select '53', osa_distance('', 'abc') = 3;
42 | select '54', osa_distance('abc', 'ab') = 1;
43 | select '55', osa_distance('abc', 'abcd') = 1;
44 | select '56', osa_distance('abc', 'acb') = 2;
45 | select '57', osa_distance('abc', 'ca') = 3;
46 | 
47 | -- Spellcheck edit distance
48 | select '61', edit_distance('abc', 'abc') = 0;
49 | select '62', edit_distance('abc', '') = 300;
50 | select '63', edit_distance('', 'abc') = 75;
51 | select '64', edit_distance('abc', 'ab') = 100;
52 | select '65', edit_distance('abc', 'abcd') = 25;
53 | select '66', edit_distance('abc', 'acb') = 110;
54 | select '67', edit_distance('abc', 'ca') = 225;
55 | 
56 | -- Spellcheck phonetic code
57 | select '101', phonetic_hash(null) is null;
58 | select '102', phonetic_hash('') = '';
59 | select '103', phonetic_hash('phonetics') = 'BAMADAC';
60 | select '104', phonetic_hash('is') = 'AC';
61 | select '105', phonetic_hash('awesome') = 'ABACAMA';
62 | 
63 | -- Soundex code
64 | select '111', soundex(null) is null;
65 | select '112', soundex('') = '';
66 | select '113', soundex('phonetics') = 'P532';
67 | select '114', soundex('is') = 'I200';
68 | select '115', soundex('awesome') = 'A250';
69 | 
70 | -- Refined Soundex code
71 | select '121', rsoundex(null) is null;
72 | select '122', rsoundex('') = '';
73 | select '123', rsoundex('phonetics') = 'P1080603';
74 | select '124', rsoundex('is') = 'I03';
75 | select '125', rsoundex('awesome') = 'A03080';
76 | 
77 | -- Caverphone phonetic code
78 | select '131', caverphone(null) is null;
79 | select '132', caverphone('') = '';
80 | select '133', caverphone('phonetics') = 'FNTKS11111';
81 | select '134', caverphone('is') = 'AS11111111';
82 | select '135', caverphone('awesome') = 'AWSM111111';
83 | 


--------------------------------------------------------------------------------
/src/fuzzy/damerau_levenshtein.c:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2014 Ross Bayer, MIT License
  2 | // https://github.com/Rostepher/libstrcmp
  3 | 
  4 | #include <assert.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | 
  8 | #include "common.h"
  9 | 
 10 | /// Calculates and returns the Damerau-Levenshtein distance of two non NULL
 11 | /// strings. More information about the algorithm can be found here:
 12 | ///     https://en.wikipedia.org/wiki/Damerau-Levenshtein_distance
 13 | ///
 14 | /// @param str1 first non NULL string
 15 | /// @param str2 second non NULL string
 16 | ///
 17 | /// @returns Damerau-Levenshtein distance of str1 and str2
 18 | unsigned damerau_levenshtein(const char* str1, const char* str2) {
 19 |     // strings cannot be NULL
 20 |     assert(str1 != NULL);
 21 |     assert(str2 != NULL);
 22 | 
 23 |     // size of the alphabet
 24 |     const unsigned alpha_size = 255;
 25 | 
 26 |     size_t str1_len = strlen(str1);
 27 |     size_t str2_len = strlen(str2);
 28 | 
 29 |     // handle cases where one or both strings are empty
 30 |     if (str1_len == 0) {
 31 |         return str2_len;
 32 |     }
 33 |     if (str2_len == 0) {
 34 |         return str1_len;
 35 |     }
 36 | 
 37 |     // remove common substring
 38 |     while (str1_len > 0 && str2_len > 0 && EQ(str1[0], str2[0])) {
 39 |         str1++, str2++;
 40 |         str1_len--, str2_len--;
 41 |     }
 42 | 
 43 |     const unsigned INFINITY = str1_len + str2_len;
 44 |     unsigned row, col;
 45 | 
 46 |     // create "dictionary"
 47 |     unsigned* dict = calloc(alpha_size, sizeof(unsigned));
 48 | 
 49 |     size_t m_rows = str1_len + 2;  // matrix rows
 50 |     size_t m_cols = str2_len + 2;  // matrix cols
 51 | 
 52 |     // matrix to hold computed values
 53 |     unsigned** matrix = malloc(m_rows * sizeof(unsigned*));
 54 |     for (unsigned i = 0; i < m_rows; i++) {
 55 |         matrix[i] = calloc(m_cols, sizeof(unsigned));
 56 |     }
 57 | 
 58 |     // set all the starting values and add all characters to the dict
 59 |     matrix[0][0] = INFINITY;
 60 |     for (row = 1; row < m_rows; row++) {
 61 |         matrix[row][0] = INFINITY;
 62 |         matrix[row][1] = row - 1;
 63 |     }
 64 |     for (col = 1; col < m_cols; col++) {
 65 |         matrix[0][col] = INFINITY;
 66 |         matrix[1][col] = col - 1;
 67 |     }
 68 | 
 69 |     unsigned db;
 70 |     unsigned i, k;
 71 |     unsigned cost;
 72 | 
 73 |     // fill in the matrix
 74 |     for (row = 1; row <= str1_len; row++) {
 75 |         db = 0;
 76 | 
 77 |         for (col = 1; col <= str2_len; col++) {
 78 |             i = dict[(unsigned)str2[col - 1]];
 79 |             k = db;
 80 |             cost = EQ(str1[row - 1], str2[col - 1]) ? 0 : 1;
 81 | 
 82 |             if (cost == 0) {
 83 |                 db = col;
 84 |             }
 85 | 
 86 |             matrix[row + 1][col + 1] =
 87 |                 MIN4(matrix[row][col] + cost, matrix[row + 1][col] + 1, matrix[row][col + 1] + 1,
 88 |                      matrix[i][k] + (row - i - 1) + (col - k - 1) + 1);
 89 |         }
 90 | 
 91 |         dict[(unsigned)str1[row - 1]] = row;
 92 |     }
 93 | 
 94 |     unsigned result = matrix[m_rows - 1][m_cols - 1];
 95 | 
 96 |     // free allocated memory
 97 |     free(dict);
 98 |     for (unsigned i = 0; i < m_rows; i++) {
 99 |         free(matrix[i]);
100 |     }
101 |     free(matrix);
102 | 
103 |     return result;
104 | }


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 Anton Zhiyanov, MIT License
 2 | # https://github.com/nalgeon/sqlean
 3 | 
 4 | .PHONY: prepare-dist download-sqlite download-external compile-linux compile-windows compile-macos test test-all
 5 | 
 6 | prepare-dist:
 7 | 	mkdir -p dist
 8 | 	rm -f dist/*
 9 | 
10 | download-sqlite:
11 | 	curl -L http://sqlite.org/$(SQLITE_RELEASE_YEAR)/sqlite-amalgamation-$(SQLITE_VERSION).zip --output src.zip
12 | 	unzip src.zip
13 | 	mv sqlite-amalgamation-$(SQLITE_VERSION)/* src
14 | 
15 | download-external:
16 | 	curl -L https://github.com/sqlite/sqlite/raw/branch-$(SQLITE_BRANCH)/ext/misc/json1.c --output src/sqlite3-json1.c
17 | 	curl -L https://github.com/mackyle/sqlite/raw/branch-$(SQLITE_BRANCH)/src/test_windirent.h --output src/test_windirent.h
18 | 
19 | compile-linux:
20 | 	gcc -fPIC -shared src/sqlite3-crypto.c src/crypto/*.c -o dist/crypto.so
21 | 	gcc -fPIC -shared src/sqlite3-fileio.c -o dist/fileio.so
22 | 	gcc -fPIC -shared src/sqlite3-fuzzy.c src/fuzzy/*.c -o dist/fuzzy.so
23 | 	gcc -fPIC -shared src/sqlite3-ipaddr.c -o dist/ipaddr.so
24 | 	gcc -fPIC -shared src/sqlite3-json1.c -o dist/json1.so
25 | 	gcc -fPIC -shared src/sqlite3-math.c -o dist/math.so -lm
26 | 	gcc -fPIC -shared src/sqlite3-re.c src/re.c -o dist/re.so
27 | 	gcc -fPIC -shared src/sqlite3-stats.c -o dist/stats.so -lm
28 | 	gcc -fPIC -shared src/sqlite3-text.c -o dist/text.so
29 | 	gcc -fPIC -shared src/sqlite3-unicode.c -o dist/unicode.so
30 | 	gcc -fPIC -shared src/sqlite3-uuid.c -o dist/uuid.so
31 | 	gcc -fPIC -shared src/sqlite3-vsv.c -o dist/vsv.so -lm
32 | 
33 | compile-windows:
34 | 	gcc -shared -I. src/sqlite3-crypto.c src/crypto/*.c -o dist/crypto.dll
35 | 	gcc -shared -I. src/sqlite3-fileio.c -o dist/fileio.dll
36 | 	gcc -shared -I. src/sqlite3-fuzzy.c src/fuzzy/*.c -o dist/fuzzy.dll
37 | 	gcc -shared -I. src/sqlite3-json1.c -o dist/json1.dll
38 | 	gcc -shared -I. src/sqlite3-math.c -o dist/math.dll -lm
39 | 	gcc -shared -I. src/sqlite3-re.c src/re.c -o dist/re.dll
40 | 	gcc -shared -I. src/sqlite3-stats.c -o dist/stats.dll -lm
41 | 	gcc -shared -I. src/sqlite3-text.c -o dist/text.dll
42 | 	gcc -shared -I. src/sqlite3-unicode.c -o dist/unicode.dll
43 | 	gcc -shared -I. src/sqlite3-uuid.c -o dist/uuid.dll
44 | 	gcc -shared -I. src/sqlite3-vsv.c -o dist/vsv.dll -lm
45 | 
46 | compile-macos:
47 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-crypto.c src/crypto/*.c -o dist/crypto.dylib
48 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-fileio.c -o dist/fileio.dylib
49 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-fuzzy.c src/fuzzy/*.c -o dist/fuzzy.dylib
50 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-ipaddr.c -o dist/ipaddr.dylib
51 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-json1.c -o dist/json1.dylib
52 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-math.c -o dist/math.dylib -lm
53 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-re.c src/re.c -o dist/re.dylib
54 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-stats.c -o dist/stats.dylib -lm
55 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-text.c -o dist/text.dylib
56 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-unicode.c -o dist/unicode.dylib
57 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-uuid.c -o dist/uuid.dylib
58 | 	gcc -fPIC -dynamiclib -I src src/sqlite3-vsv.c -o dist/vsv.dylib -lm
59 | 
60 | test-all:
61 | 	make test suite=crypto
62 | 	make test suite=fileio
63 | 	make test suite=fuzzy
64 | 	make test suite=ipaddr
65 | 	make test suite=json1
66 | 	make test suite=math
67 | 	make test suite=re
68 | 	make test suite=stats
69 | 	make test suite=text
70 | 	make test suite=unicode
71 | 	make test suite=uuid
72 | 	make test suite=vsv
73 | 
74 | # fails if grep does find a failed test case
75 | # https://stackoverflow.com/questions/15367674/bash-one-liner-to-exit-with-the-opposite-status-of-a-grep-command/21788642
76 | test:
77 | 	@sqlite3 < test/$(suite).sql > test.log
78 | 	@cat test.log | (! grep -Ex "[0-9]+.[^1]")
79 | 


--------------------------------------------------------------------------------
/src/crypto/sha2.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * FILE:    sha2.h
 3 |  * AUTHOR:  Aaron D. Gifford - http://www.aarongifford.com/
 4 |  *
 5 |  * Copyright (c) 2000-2001, Aaron D. Gifford
 6 |  * All rights reserved.
 7 |  *
 8 |  * Redistribution and use in source and binary forms, with or without
 9 |  * modification, are permitted provided that the following conditions
10 |  * are met:
11 |  * 1. Redistributions of source code must retain the above copyright
12 |  *    notice, this list of conditions and the following disclaimer.
13 |  * 2. Redistributions in binary form must reproduce the above copyright
14 |  *    notice, this list of conditions and the following disclaimer in the
15 |  *    documentation and/or other materials provided with the distribution.
16 |  * 3. Neither the name of the copyright holder nor the names of contributors
17 |  *    may be used to endorse or promote products derived from this software
18 |  *    without specific prior written permission.
19 |  *
20 |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTOR(S) ``AS IS'' AND
21 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTOR(S) BE LIABLE
24 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 |  * SUCH DAMAGE.
31 |  *
32 |  * $Id: sha2.h,v 1.1 2001/11/08 00:02:01 adg Exp adg $
33 |  */
34 | 
35 | #ifndef __SHA2_H__
36 | #define __SHA2_H__
37 | 
38 | #define SHA2_USE_INTTYPES_H
39 | #define SHA2_UNROLL_TRANSFORM
40 | #define NOPROTO
41 | 
42 | /*
43 |  * Import u_intXX_t size_t type definitions from system headers.  You
44 |  * may need to change this, or define these things yourself in this
45 |  * file.
46 |  */
47 | #include <sys/types.h>
48 | 
49 | #ifdef SHA2_USE_INTTYPES_H
50 | 
51 | #include <inttypes.h>
52 | 
53 | #endif /* SHA2_USE_INTTYPES_H */
54 | 
55 | /*** SHA-256/384/512 Various Length Definitions ***********************/
56 | #define SHA256_BLOCK_LENGTH 64
57 | #define SHA256_DIGEST_LENGTH 32
58 | #define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1)
59 | #define SHA384_BLOCK_LENGTH 128
60 | #define SHA384_DIGEST_LENGTH 48
61 | #define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1)
62 | #define SHA512_BLOCK_LENGTH 128
63 | #define SHA512_DIGEST_LENGTH 64
64 | #define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1)
65 | 
66 | /*** SHA-256/384/512 Context Structures *******************************/
67 | 
68 | typedef struct _SHA256_CTX {
69 |     uint32_t state[8];
70 |     uint64_t bitcount;
71 |     uint8_t buffer[SHA256_BLOCK_LENGTH];
72 | } SHA256_CTX;
73 | 
74 | typedef struct _SHA512_CTX {
75 |     uint64_t state[8];
76 |     uint64_t bitcount[2];
77 |     uint8_t buffer[SHA512_BLOCK_LENGTH];
78 | } SHA512_CTX;
79 | 
80 | typedef SHA512_CTX SHA384_CTX;
81 | 
82 | /*** SHA-256/384/512 Function Prototypes ******************************/
83 | 
84 | void* sha256_init();
85 | void sha256_update(SHA256_CTX*, const uint8_t*, size_t);
86 | int sha256_final(SHA256_CTX*, uint8_t[SHA256_DIGEST_LENGTH]);
87 | 
88 | void* sha384_init();
89 | void sha384_update(SHA384_CTX*, const uint8_t*, size_t);
90 | int sha384_final(SHA384_CTX*, uint8_t[SHA384_DIGEST_LENGTH]);
91 | 
92 | void* sha512_init();
93 | void sha512_update(SHA512_CTX*, const uint8_t*, size_t);
94 | int sha512_final(SHA512_CTX*, uint8_t[SHA512_DIGEST_LENGTH]);
95 | 
96 | #endif  // MD5_H


--------------------------------------------------------------------------------
/src/sqlite3-crypto.c:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021 Anton Zhiyanov, MIT License
  2 | // https://github.com/nalgeon/sqlean
  3 | 
  4 | /*
  5 |  * SQLite secure hash functions.
  6 |  */
  7 | #include <assert.h>
  8 | #include <stdint.h>
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | 
 12 | #include "crypto/md5.h"
 13 | #include "crypto/sha1.h"
 14 | #include "crypto/sha2.h"
 15 | #include "sqlite3ext.h"
 16 | 
 17 | SQLITE_EXTENSION_INIT1
 18 | 
 19 | /**
 20 |  * Generic compute hash function. Algorithm is encoded in the user data field.
 21 |  */
 22 | static void sqlite3_hash(sqlite3_context* context, int argc, sqlite3_value** argv) {
 23 |     assert(argc == 1);
 24 | 
 25 |     void* (*init_func)() = NULL;
 26 |     void (*update_func)(void*, void*, size_t) = NULL;
 27 |     int (*final_func)(void*, void*) = NULL;
 28 |     int algo = (intptr_t)sqlite3_user_data(context);
 29 | 
 30 |     switch (algo) {
 31 |         case 1: /* Hardened SHA1 */
 32 |             init_func = (void*)sha1_init;
 33 |             update_func = (void*)sha1_update;
 34 |             final_func = (void*)sha1_final;
 35 |             algo = 1;
 36 |             break;
 37 |         case 5: /* MD5 */
 38 |             init_func = (void*)md5_init;
 39 |             update_func = (void*)md5_update;
 40 |             final_func = (void*)md5_final;
 41 |             algo = 1;
 42 |             break;
 43 |         case 2256: /* SHA2-256 */
 44 |             init_func = (void*)sha256_init;
 45 |             update_func = (void*)sha256_update;
 46 |             final_func = (void*)sha256_final;
 47 |             algo = 1;
 48 |             break;
 49 |         case 2384: /* SHA2-384 */
 50 |             init_func = (void*)sha384_init;
 51 |             update_func = (void*)sha384_update;
 52 |             final_func = (void*)sha384_final;
 53 |             algo = 1;
 54 |             break;
 55 |         case 2512: /* SHA2-512 */
 56 |             init_func = (void*)sha512_init;
 57 |             update_func = (void*)sha512_update;
 58 |             final_func = (void*)sha512_final;
 59 |             algo = 1;
 60 |             break;
 61 |         default:
 62 |             sqlite3_result_error(context, "Unknown Algorithm", -1);
 63 |             return;
 64 |     }
 65 | 
 66 |     void* ctx;
 67 |     if (algo) {
 68 |         ctx = init_func();
 69 |     }
 70 |     if (!ctx) {
 71 |         sqlite3_result_error(context, "Algorithm could not allocate it's context", -1);
 72 |         return;
 73 |     }
 74 | 
 75 |     void* data = NULL;
 76 |     if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
 77 |         sqlite3_result_null(context);
 78 |         return;
 79 |     } else if (sqlite3_value_type(argv[0]) == SQLITE_BLOB) {
 80 |         data = (void*)sqlite3_value_blob(argv[0]);
 81 |     } else {
 82 |         data = (void*)sqlite3_value_text(argv[0]);
 83 |     }
 84 |     size_t datalen = sqlite3_value_bytes(argv[0]);
 85 |     if (datalen > 0)
 86 |         update_func(ctx, data, datalen);
 87 | 
 88 |     unsigned char hash[128] = {0};
 89 |     int hashlen = final_func(ctx, hash);
 90 |     sqlite3_result_blob(context, hash, hashlen, SQLITE_TRANSIENT);
 91 | }
 92 | 
 93 | /*
 94 |  * Registers the extension.
 95 |  */
 96 | #ifdef _WIN32
 97 | __declspec(dllexport)
 98 | #endif
 99 | 
100 |     int sqlite3_crypto_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) {
101 |     SQLITE_EXTENSION_INIT2(pApi);
102 |     static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC;
103 |     sqlite3_create_function(db, "md5", 1, flags, (void*)5, sqlite3_hash, 0, 0);
104 |     sqlite3_create_function(db, "sha1", 1, flags, (void*)1, sqlite3_hash, 0, 0);
105 |     sqlite3_create_function(db, "sha256", -1, flags, (void*)2256, sqlite3_hash, 0, 0);
106 |     sqlite3_create_function(db, "sha384", -1, flags, (void*)2384, sqlite3_hash, 0, 0);
107 |     sqlite3_create_function(db, "sha512", -1, flags, (void*)2512, sqlite3_hash, 0, 0);
108 |     return SQLITE_OK;
109 | }


--------------------------------------------------------------------------------
/src/fuzzy/jaro_winkler.c:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2014 Ross Bayer, MIT License
  2 | // https://github.com/Rostepher/libstrcmp
  3 | 
  4 | #include <assert.h>
  5 | #include <stdbool.h>
  6 | #include <stdlib.h>
  7 | #include <string.h>
  8 | 
  9 | #include "common.h"
 10 | 
 11 | /// Calculates and returns the Jaro distance of two non NULL strings.
 12 | /// More information about the algorithm can be found here:
 13 | ///     http://en.wikipedia.org/wiki/Jaro-Winkler_distance
 14 | ///
 15 | /// @param str1 first non NULL string
 16 | /// @param str2 second non NULL string
 17 | ///
 18 | /// @returns the jaro distance of str1 and str2
 19 | double jaro(const char* str1, const char* str2) {
 20 |     // strings cannot be NULL
 21 |     assert(str1 != NULL);
 22 |     assert(str2 != NULL);
 23 | 
 24 |     int str1_len = strlen(str1);
 25 |     int str2_len = strlen(str2);
 26 | 
 27 |     // if both strings are empty return 1
 28 |     // if only one of the strings is empty return 0
 29 |     if (str1_len == 0) {
 30 |         return (str2_len == 0) ? 1.0 : 0.0;
 31 |     }
 32 | 
 33 |     // max distance between two chars to be considered matching
 34 |     // floor() is ommitted due to integer division rules
 35 |     int match_dist = (int)MAX(str1_len, str2_len) / 2 - 1;
 36 | 
 37 |     // arrays of bools that signify if that char in the matcing string has a
 38 |     // match
 39 |     int* str1_matches = calloc(str1_len, sizeof(int));
 40 |     int* str2_matches = calloc(str2_len, sizeof(int));
 41 | 
 42 |     // number of matches and transpositions
 43 |     double matches = 0.0;
 44 |     double trans = 0.0;
 45 | 
 46 |     // find the matches
 47 |     for (int i = 0; i < str1_len; i++) {
 48 |         // start and end take into account the match distance
 49 |         int start = MAX(0, i - match_dist);
 50 |         int end = MIN(i + match_dist + 1, str2_len);
 51 | 
 52 |         for (int k = start; k < end; k++) {
 53 |             // if str2 already has a match or str1 and str2 are not equal
 54 |             // continue
 55 |             if (str2_matches[k] || NOT_EQ(str1[i], str2[k])) {
 56 |                 continue;
 57 |             }
 58 | 
 59 |             // otherwise assume there is a match
 60 |             str1_matches[i] = true;
 61 |             str2_matches[k] = true;
 62 |             matches++;
 63 |             break;
 64 |         }
 65 |     }
 66 | 
 67 |     // if there are no matches return 0
 68 |     if (matches == 0) {
 69 |         free(str1_matches);
 70 |         free(str2_matches);
 71 |         return 0.0;
 72 |     }
 73 | 
 74 |     // count transpositions
 75 |     int k = 0;
 76 |     for (int i = 0; i < str1_len; i++) {
 77 |         // if there are no matches in str1 continue
 78 |         if (!str1_matches[i]) {
 79 |             continue;
 80 |         }
 81 | 
 82 |         // while there is no match in str2 increment k
 83 |         while (!str2_matches[k]) {
 84 |             k++;
 85 |         }
 86 | 
 87 |         // increment trans
 88 |         if (NOT_EQ(str1[i], str2[k])) {
 89 |             trans++;
 90 |         }
 91 | 
 92 |         k++;
 93 |     }
 94 | 
 95 |     // divide the number of transpositions by two as per the algorithm specs
 96 |     // this division is valid because the counted transpositions include both
 97 |     // instances of the transposed characters.
 98 |     trans /= 2.0;
 99 | 
100 |     // free allocated memory
101 |     free(str1_matches);
102 |     free(str2_matches);
103 | 
104 |     // return the jaro distance
105 |     return ((matches / str1_len) + (matches / str2_len) + ((matches - trans) / matches)) / 3.0;
106 | }
107 | 
108 | /// Calculates and returns the Jaro-Winkler distance of two non NULL strings.
109 | /// More information about the algorithm can be found here:
110 | ///     http://en.wikipedia.org/wiki/Jaro-Winkler_distance
111 | ///
112 | /// @param str1 first non NULL string
113 | /// @param str2 second non NULL string
114 | ///
115 | /// @returns the jaro-winkler distance of str1 and str2
116 | double jaro_winkler(const char* str1, const char* str2) {
117 |     // strings cannot be NULL
118 |     assert(str1 != NULL);
119 |     assert(str2 != NULL);
120 | 
121 |     // compute the jaro distance
122 |     double dist = jaro(str1, str2);
123 | 
124 |     // finds the number of common terms in the first 3 strings, max 3.
125 |     int prefix_length = 0;
126 |     if (strlen(str1) != 0 && strlen(str2) != 0) {
127 |         while (prefix_length < 3 && EQ(*str1++, *str2++)) {
128 |             prefix_length++;
129 |         }
130 |     }
131 | 
132 |     // 0.1 is the default scaling factor
133 |     return dist + prefix_length * 0.1 * (1 - dist);
134 | }


--------------------------------------------------------------------------------
/docs/fileio.md:
--------------------------------------------------------------------------------
  1 | # fileio: Read and write files in SQLite
  2 | 
  3 | Access the file system directly from SQL. Adapted from [fileio.c](https://sqlite.org/src/file/ext/misc/fileio.c) by D. Richard Hipp.
  4 | 
  5 | ### writefile(path, data [,perm [,mtime]])
  6 | 
  7 | Writes blob `data` to a file specified by `path`. Returns the number of written bytes. If an error occurs, returns NULL.
  8 | 
  9 | ```
 10 | sqlite> select writefile('hello.txt', 'hello world');
 11 | 11
 12 | ```
 13 | 
 14 | The `perm` argument specifies permission bits for the file (octal `666` by default). Expects _decimal_ value, not octal. Here are some popular values:
 15 | 
 16 | | Octal | Decimal | Description |
 17 | | ----- | ------- | ----------- |
 18 | | 600   | 384     | `rw-------` |
 19 | | 644   | 420     | `rw-r--r--` |
 20 | | 664   | 436     | `rw-rw-r--` |
 21 | | 666   | 438     | `rw-rw-rw-` |
 22 | | 755   | 493     | `rwxr-xr-x` |
 23 | | 777   | 511     | `rwxrwxrwx` |
 24 | 
 25 | ```
 26 | sqlite> select writefile('hello.txt', 'hello world', 436);
 27 | 11
 28 | ```
 29 | 
 30 | If the optional `mtime` argument is present, it expects an integer — the number of seconds since the unix epoch. The modification-time of the target file is set to this value before returning.
 31 | 
 32 | ### readfile(path)
 33 | 
 34 | Reads the file specified by `path` and returns its contents as `blob`.
 35 | 
 36 | ```
 37 | sqlite> select writefile('hello.txt', 'hello world');
 38 | 11
 39 | 
 40 | sqlite> select typeof(readfile('hello.txt'));
 41 | blob
 42 | 
 43 | sqlite> select length(readfile('hello.txt'));
 44 | 11
 45 | ```
 46 | 
 47 | ### mkdir(path[, perm])
 48 | 
 49 | Creates a directory named `path` with permission bits `perm` (octal `777` by default).
 50 | 
 51 | ```
 52 | sqlite> mkdir('hellodir')
 53 | ```
 54 | 
 55 | ### symlink(src, dst)
 56 | 
 57 | Creates a symbolic link named `dst`, pointing to `src`.
 58 | 
 59 | ```
 60 | select symlink('hello.txt', 'hello.lnk');
 61 | ```
 62 | 
 63 | ### lsdir(path[, recursive])
 64 | 
 65 | Lists files and directories as a virtual table.
 66 | 
 67 | List a single file specified by `path`:
 68 | 
 69 | ```
 70 | sqlite> select * from lsdir('hello.txt');
 71 | ┌───────────┬───────┬────────────┬──────┐
 72 | │   name    │ mode  │   mtime    │ size │
 73 | ├───────────┼───────┼────────────┼──────┤
 74 | │ hello.txt │ 33206 │ 1639516692 │ 11   │
 75 | └───────────┴───────┴────────────┴──────┘
 76 | ```
 77 | 
 78 | List a whole directory. Lists only the direct children by default:
 79 | 
 80 | ```
 81 | sqlite> select * from lsdir('test') order by name;
 82 | ┌─────────────────┬───────┬────────────┬──────┐
 83 | │      name       │ mode  │   mtime    │ size │
 84 | ├─────────────────┼───────┼────────────┼──────┤
 85 | │ test            │ 16877 │ 1639514106 │ 384  │
 86 | │ test/crypto.sql │ 33188 │ 1639349274 │ 1426 │
 87 | │ test/fileio.sql │ 33188 │ 1639516282 │ 1606 │
 88 | │ test/fuzzy.sql  │ 33188 │ 1639349290 │ 2957 │
 89 | │ ...             │ ...   │ ...        │ ...  │
 90 | └─────────────────┴───────┴────────────┴──────┘
 91 | ```
 92 | 
 93 | List a whole directory recursively. When `recursive = true`, lists all the descendants:
 94 | 
 95 | ```
 96 | sqlite> select * from lsdir('src', true);
 97 | ```
 98 | 
 99 | Each row has the following columns:
100 | 
101 | -   `name`: Path to file or directory (text value).
102 | -   `mode`: File mode (`stat.st_mode`, integer value).
103 | -   `mtime`: Last modification time (`stat.st_mtime`, integer number of seconds since the epoch).
104 | -   `size`: Total size in bytes (`stat.st_size`, integer value).
105 | 
106 | Use `lsmode()` helper function to get a human-readable representation of the `mode`:
107 | 
108 | ```
109 | sqlite> select name, lsmode(mode) from fsdir('test');
110 | ┌─────────────────┬──────────────┐
111 | │      name       │ lsmode(mode) │
112 | ├─────────────────┼──────────────┤
113 | │ test            │ drwxr-xr-x   │
114 | │ test/crypto.sq  │ -rw-r--r--   │
115 | │ test/fileio.sql │ -rw-r--r--   │
116 | │ test/fuzzy.sql  │ -rw-r--r--   │
117 | │ ...             │ ...          │
118 | └─────────────────┴──────────────┘
119 | ```
120 | 
121 | Parameter `path` is an absolute or relative pathname:
122 | 
123 | -   If the path refers to a file that does not exist — `lsdir()` returns zero rows.
124 | -   If the path refers to a regular file or symbolic link — it returns a single row.
125 | -   If the path refers to a directory — it returns one row for the directory and one row for each direct child. Optionally returns a row for every descendant, if `recursive = true`.
126 | 
127 | ## Usage
128 | 
129 | ```
130 | sqlite> .load ./fileio
131 | sqlite> select readfile('whatever.txt');
132 | ```
133 | 
134 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
135 | 


--------------------------------------------------------------------------------
/src/sqlite3-text.c:
--------------------------------------------------------------------------------
  1 | // Originally by Liam Healy, Public Domain
  2 | // extension-functions.c at https://sqlite.org/contrib/
  3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean, MIT License
  4 | 
  5 | /*
  6 |  * SQLite text functions.
  7 |  */
  8 | #include <assert.h>
  9 | #include <stdint.h>
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | #include <string.h>
 13 | 
 14 | #include "sqlite3ext.h"
 15 | 
 16 | SQLITE_EXTENSION_INIT1
 17 | 
 18 | /**
 19 |  * From sqlite3 utf.c
 20 |  */
 21 | static const unsigned char sqlite3Utf8Trans1[] = {
 22 |     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 23 |     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
 24 |     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
 25 |     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
 26 | };
 27 | 
 28 | #define READ_UTF8(zIn, zTerm, c)                                                    \
 29 |     c = *(zIn++);                                                                   \
 30 |     if (c >= 0xc0) {                                                                \
 31 |         c = sqlite3Utf8Trans1[c - 0xc0];                                            \
 32 |         while (zIn != zTerm && (*zIn & 0xc0) == 0x80) {                             \
 33 |             c = (c << 6) + (0x3f & *(zIn++));                                       \
 34 |         }                                                                           \
 35 |         if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE) { \
 36 |             c = 0xFFFD;                                                             \
 37 |         }                                                                           \
 38 |     }
 39 | 
 40 | /*
 41 |  * reverse() and friends extracted from
 42 |  * extension-functions.c (https://sqlite.org/contrib/)
 43 |  * by Liam Healy
 44 |  */
 45 | #define advance_char(X)               \
 46 |     while ((0xc0 & *++(X)) == 0x80) { \
 47 |     }
 48 | 
 49 | static int read_char(const unsigned char* str) {
 50 |     int c;
 51 |     READ_UTF8(str, 0, c);
 52 |     return c;
 53 | }
 54 | 
 55 | /*
 56 |  * Returns reversed string.
 57 |  * reverse("abcde") == "edcba"
 58 |  */
 59 | static char* reverse(const char* source) {
 60 |     int len = strlen(source);
 61 |     char* result = sqlite3_malloc(len + 1);
 62 |     char* rzt = result + len;
 63 |     *(rzt--) = '\0';
 64 | 
 65 |     const char* zt = source;
 66 |     while (read_char((unsigned char*)zt) != 0) {
 67 |         source = zt;
 68 |         advance_char(zt);
 69 |         for (int i = 1; zt - i >= source; ++i) {
 70 |             *(rzt--) = *(zt - i);
 71 |         }
 72 |     }
 73 |     return result;
 74 | }
 75 | 
 76 | static void sqlite3_reverse(sqlite3_context* context, int argc, sqlite3_value** argv) {
 77 |     assert(argc == 1);
 78 |     if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
 79 |         sqlite3_result_null(context);
 80 |         return;
 81 |     }
 82 |     const char* source = (char*)sqlite3_value_text(argv[0]);
 83 |     char* result = reverse(source);
 84 |     sqlite3_result_text(context, result, -1, sqlite3_free);
 85 | }
 86 | 
 87 | /*
 88 |  * strsep() implementation, Windows doesn't have it
 89 |  * copied from https://unixpapa.com/incnote/string.html
 90 |  */
 91 | static char* str_sep(char** sp, const char* sep) {
 92 |     if (sp == NULL || *sp == NULL || **sp == '\0') {
 93 |         return NULL;
 94 |     }
 95 |     char* s = *sp;
 96 |     char* p = s + strcspn(s, sep);
 97 |     if (*p != '\0')
 98 |         *p++ = '\0';
 99 |     *sp = p;
100 |     return s;
101 | }
102 | 
103 | /*
104 |  * Splits `source` string on `sep` and returns the given `part` (counting from one)
105 |  * split_part("one;two;three", ";", 2) == "two"
106 |  */
107 | static char* split_part(char* source, const char* sep, int64_t part) {
108 |     char* token;
109 |     int64_t index = 1;
110 |     while ((token = str_sep(&source, sep)) != NULL) {
111 |         if (index == part) {
112 |             break;
113 |         }
114 |         index++;
115 |     }
116 |     return token;
117 | }
118 | 
119 | static void sqlite3_split_part(sqlite3_context* context, int argc, sqlite3_value** argv) {
120 |     assert(argc == 3);
121 | 
122 |     char* source = (char*)sqlite3_value_text(argv[0]);
123 |     if (source == NULL) {
124 |         sqlite3_result_null(context);
125 |         return;
126 |     }
127 |     if (strcmp(source, "") == 0) {
128 |         sqlite3_result_text(context, "", -1, SQLITE_TRANSIENT);
129 |         return;
130 |     }
131 | 
132 |     const char* sep = (const char*)sqlite3_value_text(argv[1]);
133 |     if (!sep) {
134 |         sqlite3_result_null(context);
135 |         return;
136 |     }
137 | 
138 |     if (sqlite3_value_type(argv[2]) != SQLITE_INTEGER) {
139 |         sqlite3_result_error(context, "part parameter should be integer", -1);
140 |         return;
141 |     }
142 |     int64_t part = sqlite3_value_int64(argv[2]);
143 |     if (part <= 0) {
144 |         sqlite3_result_error(context, "part parameter should be > 0", -1);
145 |         return;
146 |     }
147 | 
148 |     char* token = split_part(source, sep, part);
149 | 
150 |     if (token == NULL) {
151 |         sqlite3_result_text(context, "", -1, SQLITE_TRANSIENT);
152 |         return;
153 |     }
154 |     sqlite3_result_text(context, token, -1, SQLITE_TRANSIENT);
155 | }
156 | 
157 | /*
158 |  * Registers the extension.
159 |  */
160 | #ifdef _WIN32
161 | __declspec(dllexport)
162 | #endif
163 |     int sqlite3_text_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) {
164 |     SQLITE_EXTENSION_INIT2(pApi);
165 |     static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC;
166 |     sqlite3_create_function(db, "reverse", 1, flags, 0, sqlite3_reverse, 0, 0);
167 |     sqlite3_create_function(db, "split_part", 3, flags, 0, sqlite3_split_part, 0, 0);
168 |     return SQLITE_OK;
169 | }


--------------------------------------------------------------------------------
/docs/vsv.md:
--------------------------------------------------------------------------------
  1 | # vsv: CSV files as virtual tables in SQLite
  2 | 
  3 | Provides virtual table for working directly with CSV files, without importing data into the database. Useful for very large datasets.
  4 | 
  5 | Adapted from [vsv.c](http://www.dessus.com/files/vsv.c) by Keith Medcalf.
  6 | 
  7 | ## Example
  8 | 
  9 | For the `people.csv` file with the following data:
 10 | 
 11 | ```csv
 12 | 11,Diane,London
 13 | 22,Grace,Berlin
 14 | 33,Alice,Paris
 15 | ```
 16 | 
 17 | The `vsv` virtual table could look like this:
 18 | 
 19 | ```
 20 | .load ./vsv
 21 | 
 22 | create virtual table people using vsv(
 23 |     filename=people.csv,
 24 |     schema="create table people(id integer, name text, city text)",
 25 |     columns=3,
 26 |     affinity=integer
 27 | );
 28 | ```
 29 | 
 30 | ```
 31 | select * from people;
 32 | ┌────┬───────┬────────┐
 33 | │ id │ name  │  city  │
 34 | ├────┼───────┼────────┤
 35 | │ 11 │ Diane │ London │
 36 | │ 22 │ Grace │ Berlin │
 37 | │ 33 │ Alice │ Paris  │
 38 | └────┴───────┴────────┘
 39 | ```
 40 | 
 41 | ## Parameters
 42 | 
 43 | The parameters to the vsv module (the vsv(...) part) are as follows:
 44 | 
 45 | ```
 46 | filename=STRING     the filename, passed to the Operating System
 47 | data=STRING         alternative data
 48 | schema=STRING       Alternate Schema to use
 49 | columns=N           columns parsed from the VSV file
 50 | header=BOOL         whether or not a header row is present
 51 | skip=N              number of leading data rows to skip
 52 | rsep=STRING         record separator
 53 | fsep=STRING         field separator
 54 | validatetext=BOOL   validate UTF-8 encoding of text fields
 55 | affinity=AFFINITY   affinity to apply to each returned value
 56 | nulls=BOOL          empty fields are returned as NULL
 57 | ```
 58 | 
 59 | ### Defaults
 60 | 
 61 | ```
 62 | filename / data     nothing.  You must provide one or the other
 63 |                     it is an error to provide both or neither
 64 | 
 65 | schema              nothing.  If not provided then one will be
 66 |                     generated for you from the header, or if no
 67 |                     header is available then autogenerated using
 68 |                     field names manufactured as cX where X is the
 69 |                     column number
 70 | 
 71 | columns             nothing.  If not specified then the number of
 72 |                     columns is determined by counting the fields
 73 |                     in the first record of the VSV file (which
 74 |                     will be the header row if header is specified),
 75 |                     the number of columns is not parsed from the
 76 |                     schema even if one is provided
 77 | 
 78 | header=no           no header row in the VSV file
 79 | skip=0              do not skip any data rows in the VSV file
 80 | fsep=','            default field separator is a comma
 81 | rsep='\n'           default record separator is a newline
 82 | validatetext=no     do not validate text field encoding
 83 | affinity=none       do not apply affinity to each returned value
 84 | nulls=off           empty fields returned as zero-length
 85 | ```
 86 | 
 87 | ### Options
 88 | 
 89 | The `validatetext` setting will cause the validity of the field
 90 | encoding (not its contents) to be verified. It effects how
 91 | fields that are supposed to contain text will be returned to
 92 | the SQLite3 library in order to prevent invalid utf8 data from
 93 | being stored or processed as if it were valid utf8 text.
 94 | 
 95 | The `nulls` option will cause fields that do not contain anything
 96 | to return NULL rather than an empty result. Two separators
 97 | side-by-each with no intervening characters at all will be
 98 | returned as NULL if nulls is true and if nulls is false or
 99 | the contents are explicity empty ("") then a 0 length blob
100 | (if affinity=blob) or 0 length text string.
101 | 
102 | For the `affinity` setting, the following processing is applied to
103 | each value returned by the VSV virtual table:
104 | 
105 | -   `none` no affinity is applied, all fields will be
106 |     returned as text just like in the original
107 |     csv module, embedded nulls will terminate
108 |     the text. if validatetext is in effect then
109 |     an error will be thrown if the field does
110 |     not contain validly encoded text or contains
111 |     embedded nulls
112 | -   `blob` all fields will be returned as blobs
113 |     validatetext has no effect
114 | -   `text` all fields will be returned as text just
115 |     like in the original csv module, embedded
116 |     nulls will terminate the text.
117 |     if validatetext is in effect then a blob
118 |     will be returned if the field does not
119 |     contain validly encoded text or the field
120 |     contains embedded nulls
121 | -   `integer` if the field data looks like an integer,
122 |     (regex "^ _(\+|-)?\d+ _$"),
123 |     then an integer will be returned as
124 |     provided by the compiler and platform
125 |     runtime strtoll function
126 |     otherwise the field will be processed as
127 |     text as defined above
128 | -   `real` if the field data looks like a number,
129 |     (regex "^ _(\+|-)?(\d+\.?\d_|\d*\.?\d+)([eE](+|-)?\d+)? *$")
130 |     then a double will be returned as
131 |     provided by the compiler and platform
132 |     runtime strtold function otherwise the
133 |     field will be processed as text as
134 |     defined above
135 | -   `numeric` if the field looks like an integer
136 |     (see integer above) that integer will be
137 |     returned; if the field looks like a number
138 |     (see real above) then the number will
139 |     returned as an integer if it has no
140 |     fractional part; otherwise a double will be returned
141 | 
142 | ### Parameter types
143 | 
144 | -   `STRING` means a quoted string
145 | -   `N` means a whole number not containing a sign
146 | -   `BOOL` means something that evaluates as true or false. Case insensitive: `yes`, `no`, `true`, `false`, `1`, `0`. Defaults to `true`
147 | -   `AFFINITY` means an SQLite3 type specification. Case insensitive: `none`, `blob`, `text`, `integer`, `real`, `numeric`
148 | -   STRING means a quoted string. The quote character may be either
149 |     a single quote or a double quote. Two quote characters in a row
150 |     will be replaced with a single quote character. STRINGS do not
151 |     need to be quoted if it is obvious where they begin and end
152 |     (that is, they do not contain a comma). Leading and trailing
153 |     spaces will be trimmed from unquoted strings.
154 | 
155 | The `separator` string containing exactly one character, or a valid
156 | escape sequence. Recognized escape sequences are:
157 | 
158 | ```
159 | \t horizontal tab, ascii character 9 (0x09)
160 | \n linefeed, ascii character 10 (0x0a)
161 | \v vertical tab, ascii character 11 (0x0b)
162 | \f form feed, ascii character 12 (0x0c)
163 | \xhh specific byte where hh is hexadecimal
164 | ```
165 | 
166 | ## Usage
167 | 
168 | ```sql
169 | .load ./vsv
170 | 
171 | create virtual table temp.vsv using vsv(...);
172 | select * from vsv;
173 | ```
174 | 
175 | [Download](https://github.com/nalgeon/sqlean/releases/latest)
176 | 


--------------------------------------------------------------------------------
/src/fuzzy/common.c:
--------------------------------------------------------------------------------
  1 | // Originally from the spellfix SQLite exension, Public Domain
  2 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c
  3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License
  4 | 
  5 | #include "common.h"
  6 | 
  7 | /*
  8 | ** The following table gives the character class for non-initial ASCII
  9 | ** characters.
 10 | */
 11 | const unsigned char midClass[] = {
 12 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 13 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 14 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 15 |     /*   */ CCLASS_SPACE,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 16 |     /*   */ CCLASS_SPACE,  /*   */ CCLASS_SPACE, /*   */ CCLASS_OTHER,
 17 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 18 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 19 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 20 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 21 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 22 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_SPACE,
 23 |     /* ! */ CCLASS_OTHER,  /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER,
 24 |     /* $ */ CCLASS_OTHER,  /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER,
 25 |     /* ' */ CCLASS_SILENT, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER,
 26 |     /* * */ CCLASS_OTHER,  /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER,
 27 |     /* - */ CCLASS_OTHER,  /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER,
 28 |     /* 0 */ CCLASS_DIGIT,  /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT,
 29 |     /* 3 */ CCLASS_DIGIT,  /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT,
 30 |     /* 6 */ CCLASS_DIGIT,  /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT,
 31 |     /* 9 */ CCLASS_DIGIT,  /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER,
 32 |     /* < */ CCLASS_OTHER,  /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER,
 33 |     /* ? */ CCLASS_OTHER,  /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL,
 34 |     /* B */ CCLASS_B,      /* C */ CCLASS_C,     /* D */ CCLASS_D,
 35 |     /* E */ CCLASS_VOWEL,  /* F */ CCLASS_B,     /* G */ CCLASS_C,
 36 |     /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C,
 37 |     /* K */ CCLASS_C,      /* L */ CCLASS_L,     /* M */ CCLASS_M,
 38 |     /* N */ CCLASS_M,      /* O */ CCLASS_VOWEL, /* P */ CCLASS_B,
 39 |     /* Q */ CCLASS_C,      /* R */ CCLASS_R,     /* S */ CCLASS_C,
 40 |     /* T */ CCLASS_D,      /* U */ CCLASS_VOWEL, /* V */ CCLASS_B,
 41 |     /* W */ CCLASS_B,      /* X */ CCLASS_C,     /* Y */ CCLASS_VOWEL,
 42 |     /* Z */ CCLASS_C,      /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER,
 43 |     /* ] */ CCLASS_OTHER,  /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER,
 44 |     /* ` */ CCLASS_OTHER,  /* a */ CCLASS_VOWEL, /* b */ CCLASS_B,
 45 |     /* c */ CCLASS_C,      /* d */ CCLASS_D,     /* e */ CCLASS_VOWEL,
 46 |     /* f */ CCLASS_B,      /* g */ CCLASS_C,     /* h */ CCLASS_SILENT,
 47 |     /* i */ CCLASS_VOWEL,  /* j */ CCLASS_C,     /* k */ CCLASS_C,
 48 |     /* l */ CCLASS_L,      /* m */ CCLASS_M,     /* n */ CCLASS_M,
 49 |     /* o */ CCLASS_VOWEL,  /* p */ CCLASS_B,     /* q */ CCLASS_C,
 50 |     /* r */ CCLASS_R,      /* s */ CCLASS_C,     /* t */ CCLASS_D,
 51 |     /* u */ CCLASS_VOWEL,  /* v */ CCLASS_B,     /* w */ CCLASS_B,
 52 |     /* x */ CCLASS_C,      /* y */ CCLASS_VOWEL, /* z */ CCLASS_C,
 53 |     /* { */ CCLASS_OTHER,  /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER,
 54 |     /* ~ */ CCLASS_OTHER,  /*   */ CCLASS_OTHER,
 55 | };
 56 | /*
 57 | ** This tables gives the character class for ASCII characters that form the
 58 | ** initial character of a word.  The only difference from midClass is with
 59 | ** the letters H, W, and Y.
 60 | */
 61 | const unsigned char initClass[] = {
 62 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 63 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 64 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 65 |     /*   */ CCLASS_SPACE,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 66 |     /*   */ CCLASS_SPACE,  /*   */ CCLASS_SPACE, /*   */ CCLASS_OTHER,
 67 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 68 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 69 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 70 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 71 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_OTHER,
 72 |     /*   */ CCLASS_OTHER,  /*   */ CCLASS_OTHER, /*   */ CCLASS_SPACE,
 73 |     /* ! */ CCLASS_OTHER,  /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER,
 74 |     /* $ */ CCLASS_OTHER,  /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER,
 75 |     /* ' */ CCLASS_OTHER,  /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER,
 76 |     /* * */ CCLASS_OTHER,  /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER,
 77 |     /* - */ CCLASS_OTHER,  /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER,
 78 |     /* 0 */ CCLASS_DIGIT,  /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT,
 79 |     /* 3 */ CCLASS_DIGIT,  /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT,
 80 |     /* 6 */ CCLASS_DIGIT,  /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT,
 81 |     /* 9 */ CCLASS_DIGIT,  /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER,
 82 |     /* < */ CCLASS_OTHER,  /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER,
 83 |     /* ? */ CCLASS_OTHER,  /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL,
 84 |     /* B */ CCLASS_B,      /* C */ CCLASS_C,     /* D */ CCLASS_D,
 85 |     /* E */ CCLASS_VOWEL,  /* F */ CCLASS_B,     /* G */ CCLASS_C,
 86 |     /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C,
 87 |     /* K */ CCLASS_C,      /* L */ CCLASS_L,     /* M */ CCLASS_M,
 88 |     /* N */ CCLASS_M,      /* O */ CCLASS_VOWEL, /* P */ CCLASS_B,
 89 |     /* Q */ CCLASS_C,      /* R */ CCLASS_R,     /* S */ CCLASS_C,
 90 |     /* T */ CCLASS_D,      /* U */ CCLASS_VOWEL, /* V */ CCLASS_B,
 91 |     /* W */ CCLASS_B,      /* X */ CCLASS_C,     /* Y */ CCLASS_Y,
 92 |     /* Z */ CCLASS_C,      /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER,
 93 |     /* ] */ CCLASS_OTHER,  /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER,
 94 |     /* ` */ CCLASS_OTHER,  /* a */ CCLASS_VOWEL, /* b */ CCLASS_B,
 95 |     /* c */ CCLASS_C,      /* d */ CCLASS_D,     /* e */ CCLASS_VOWEL,
 96 |     /* f */ CCLASS_B,      /* g */ CCLASS_C,     /* h */ CCLASS_SILENT,
 97 |     /* i */ CCLASS_VOWEL,  /* j */ CCLASS_C,     /* k */ CCLASS_C,
 98 |     /* l */ CCLASS_L,      /* m */ CCLASS_M,     /* n */ CCLASS_M,
 99 |     /* o */ CCLASS_VOWEL,  /* p */ CCLASS_B,     /* q */ CCLASS_C,
100 |     /* r */ CCLASS_R,      /* s */ CCLASS_C,     /* t */ CCLASS_D,
101 |     /* u */ CCLASS_VOWEL,  /* v */ CCLASS_B,     /* w */ CCLASS_B,
102 |     /* x */ CCLASS_C,      /* y */ CCLASS_Y,     /* z */ CCLASS_C,
103 |     /* { */ CCLASS_OTHER,  /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER,
104 |     /* ~ */ CCLASS_OTHER,  /*   */ CCLASS_OTHER,
105 | };
106 | 
107 | /*
108 | ** Mapping from the character class number (0-13) to a symbol for each
109 | ** character class.  Note that initClass[] can be used to map the class
110 | ** symbol back into the class number.
111 | */
112 | const unsigned char className[] = ".ABCDHLRMY9 ?";


--------------------------------------------------------------------------------
/src/crypto/md5.c:
--------------------------------------------------------------------------------
  1 | /*********************************************************************
  2 |  * Filename:   md5.c
  3 |  * Author:     Brad Conte (brad AT bradconte.com)
  4 |  * Source:     https://github.com/B-Con/crypto-algorithms
  5 |  * License:    Public Domain
  6 |  * Details:    Implementation of the MD5 hashing algorithm.
  7 |  * Algorithm specification can be found here:
  8 |  * http://tools.ietf.org/html/rfc1321
  9 |  * This implementation uses little endian byte order.
 10 |  *********************************************************************/
 11 | 
 12 | /*************************** HEADER FILES ***************************/
 13 | #include "md5.h"
 14 | 
 15 | #include <memory.h>
 16 | #include <stdlib.h>
 17 | 
 18 | /****************************** MACROS ******************************/
 19 | #define ROTLEFT(a, b) ((a << b) | (a >> (32 - b)))
 20 | 
 21 | #define F(x, y, z) ((x & y) | (~x & z))
 22 | #define G(x, y, z) ((x & z) | (y & ~z))
 23 | #define H(x, y, z) (x ^ y ^ z)
 24 | #define I(x, y, z) (y ^ (x | ~z))
 25 | 
 26 | #define FF(a, b, c, d, m, s, t)  \
 27 |     {                            \
 28 |         a += F(b, c, d) + m + t; \
 29 |         a = b + ROTLEFT(a, s);   \
 30 |     }
 31 | #define GG(a, b, c, d, m, s, t)  \
 32 |     {                            \
 33 |         a += G(b, c, d) + m + t; \
 34 |         a = b + ROTLEFT(a, s);   \
 35 |     }
 36 | #define HH(a, b, c, d, m, s, t)  \
 37 |     {                            \
 38 |         a += H(b, c, d) + m + t; \
 39 |         a = b + ROTLEFT(a, s);   \
 40 |     }
 41 | #define II(a, b, c, d, m, s, t)  \
 42 |     {                            \
 43 |         a += I(b, c, d) + m + t; \
 44 |         a = b + ROTLEFT(a, s);   \
 45 |     }
 46 | 
 47 | /*********************** FUNCTION DEFINITIONS ***********************/
 48 | void md5_transform(MD5_CTX* ctx, const BYTE data[]) {
 49 |     WORD a, b, c, d, m[16], i, j;
 50 | 
 51 |     // MD5 specifies big endian byte order, but this implementation assumes a little
 52 |     // endian byte order CPU. Reverse all the bytes upon input, and re-reverse them
 53 |     // on output (in md5_final()).
 54 |     for (i = 0, j = 0; i < 16; ++i, j += 4)
 55 |         m[i] = (data[j]) + (data[j + 1] << 8) + (data[j + 2] << 16) + (data[j + 3] << 24);
 56 | 
 57 |     a = ctx->state[0];
 58 |     b = ctx->state[1];
 59 |     c = ctx->state[2];
 60 |     d = ctx->state[3];
 61 | 
 62 |     FF(a, b, c, d, m[0], 7, 0xd76aa478);
 63 |     FF(d, a, b, c, m[1], 12, 0xe8c7b756);
 64 |     FF(c, d, a, b, m[2], 17, 0x242070db);
 65 |     FF(b, c, d, a, m[3], 22, 0xc1bdceee);
 66 |     FF(a, b, c, d, m[4], 7, 0xf57c0faf);
 67 |     FF(d, a, b, c, m[5], 12, 0x4787c62a);
 68 |     FF(c, d, a, b, m[6], 17, 0xa8304613);
 69 |     FF(b, c, d, a, m[7], 22, 0xfd469501);
 70 |     FF(a, b, c, d, m[8], 7, 0x698098d8);
 71 |     FF(d, a, b, c, m[9], 12, 0x8b44f7af);
 72 |     FF(c, d, a, b, m[10], 17, 0xffff5bb1);
 73 |     FF(b, c, d, a, m[11], 22, 0x895cd7be);
 74 |     FF(a, b, c, d, m[12], 7, 0x6b901122);
 75 |     FF(d, a, b, c, m[13], 12, 0xfd987193);
 76 |     FF(c, d, a, b, m[14], 17, 0xa679438e);
 77 |     FF(b, c, d, a, m[15], 22, 0x49b40821);
 78 | 
 79 |     GG(a, b, c, d, m[1], 5, 0xf61e2562);
 80 |     GG(d, a, b, c, m[6], 9, 0xc040b340);
 81 |     GG(c, d, a, b, m[11], 14, 0x265e5a51);
 82 |     GG(b, c, d, a, m[0], 20, 0xe9b6c7aa);
 83 |     GG(a, b, c, d, m[5], 5, 0xd62f105d);
 84 |     GG(d, a, b, c, m[10], 9, 0x02441453);
 85 |     GG(c, d, a, b, m[15], 14, 0xd8a1e681);
 86 |     GG(b, c, d, a, m[4], 20, 0xe7d3fbc8);
 87 |     GG(a, b, c, d, m[9], 5, 0x21e1cde6);
 88 |     GG(d, a, b, c, m[14], 9, 0xc33707d6);
 89 |     GG(c, d, a, b, m[3], 14, 0xf4d50d87);
 90 |     GG(b, c, d, a, m[8], 20, 0x455a14ed);
 91 |     GG(a, b, c, d, m[13], 5, 0xa9e3e905);
 92 |     GG(d, a, b, c, m[2], 9, 0xfcefa3f8);
 93 |     GG(c, d, a, b, m[7], 14, 0x676f02d9);
 94 |     GG(b, c, d, a, m[12], 20, 0x8d2a4c8a);
 95 | 
 96 |     HH(a, b, c, d, m[5], 4, 0xfffa3942);
 97 |     HH(d, a, b, c, m[8], 11, 0x8771f681);
 98 |     HH(c, d, a, b, m[11], 16, 0x6d9d6122);
 99 |     HH(b, c, d, a, m[14], 23, 0xfde5380c);
100 |     HH(a, b, c, d, m[1], 4, 0xa4beea44);
101 |     HH(d, a, b, c, m[4], 11, 0x4bdecfa9);
102 |     HH(c, d, a, b, m[7], 16, 0xf6bb4b60);
103 |     HH(b, c, d, a, m[10], 23, 0xbebfbc70);
104 |     HH(a, b, c, d, m[13], 4, 0x289b7ec6);
105 |     HH(d, a, b, c, m[0], 11, 0xeaa127fa);
106 |     HH(c, d, a, b, m[3], 16, 0xd4ef3085);
107 |     HH(b, c, d, a, m[6], 23, 0x04881d05);
108 |     HH(a, b, c, d, m[9], 4, 0xd9d4d039);
109 |     HH(d, a, b, c, m[12], 11, 0xe6db99e5);
110 |     HH(c, d, a, b, m[15], 16, 0x1fa27cf8);
111 |     HH(b, c, d, a, m[2], 23, 0xc4ac5665);
112 | 
113 |     II(a, b, c, d, m[0], 6, 0xf4292244);
114 |     II(d, a, b, c, m[7], 10, 0x432aff97);
115 |     II(c, d, a, b, m[14], 15, 0xab9423a7);
116 |     II(b, c, d, a, m[5], 21, 0xfc93a039);
117 |     II(a, b, c, d, m[12], 6, 0x655b59c3);
118 |     II(d, a, b, c, m[3], 10, 0x8f0ccc92);
119 |     II(c, d, a, b, m[10], 15, 0xffeff47d);
120 |     II(b, c, d, a, m[1], 21, 0x85845dd1);
121 |     II(a, b, c, d, m[8], 6, 0x6fa87e4f);
122 |     II(d, a, b, c, m[15], 10, 0xfe2ce6e0);
123 |     II(c, d, a, b, m[6], 15, 0xa3014314);
124 |     II(b, c, d, a, m[13], 21, 0x4e0811a1);
125 |     II(a, b, c, d, m[4], 6, 0xf7537e82);
126 |     II(d, a, b, c, m[11], 10, 0xbd3af235);
127 |     II(c, d, a, b, m[2], 15, 0x2ad7d2bb);
128 |     II(b, c, d, a, m[9], 21, 0xeb86d391);
129 | 
130 |     ctx->state[0] += a;
131 |     ctx->state[1] += b;
132 |     ctx->state[2] += c;
133 |     ctx->state[3] += d;
134 | }
135 | 
136 | void* md5_init() {
137 |     MD5_CTX* ctx;
138 |     ctx = malloc(sizeof(MD5_CTX));
139 |     ctx->datalen = 0;
140 |     ctx->bitlen = 0;
141 |     ctx->state[0] = 0x67452301;
142 |     ctx->state[1] = 0xEFCDAB89;
143 |     ctx->state[2] = 0x98BADCFE;
144 |     ctx->state[3] = 0x10325476;
145 |     return ctx;
146 | }
147 | 
148 | void md5_update(MD5_CTX* ctx, const BYTE data[], size_t len) {
149 |     size_t i;
150 | 
151 |     for (i = 0; i < len; ++i) {
152 |         ctx->data[ctx->datalen] = data[i];
153 |         ctx->datalen++;
154 |         if (ctx->datalen == 64) {
155 |             md5_transform(ctx, ctx->data);
156 |             ctx->bitlen += 512;
157 |             ctx->datalen = 0;
158 |         }
159 |     }
160 | }
161 | 
162 | int md5_final(MD5_CTX* ctx, BYTE hash[]) {
163 |     size_t i;
164 | 
165 |     i = ctx->datalen;
166 | 
167 |     // Pad whatever data is left in the buffer.
168 |     if (ctx->datalen < 56) {
169 |         ctx->data[i++] = 0x80;
170 |         while (i < 56)
171 |             ctx->data[i++] = 0x00;
172 |     } else if (ctx->datalen >= 56) {
173 |         ctx->data[i++] = 0x80;
174 |         while (i < 64)
175 |             ctx->data[i++] = 0x00;
176 |         md5_transform(ctx, ctx->data);
177 |         memset(ctx->data, 0, 56);
178 |     }
179 | 
180 |     // Append to the padding the total message's length in bits and transform.
181 |     ctx->bitlen += ctx->datalen * 8;
182 |     ctx->data[56] = ctx->bitlen;
183 |     ctx->data[57] = ctx->bitlen >> 8;
184 |     ctx->data[58] = ctx->bitlen >> 16;
185 |     ctx->data[59] = ctx->bitlen >> 24;
186 |     ctx->data[60] = ctx->bitlen >> 32;
187 |     ctx->data[61] = ctx->bitlen >> 40;
188 |     ctx->data[62] = ctx->bitlen >> 48;
189 |     ctx->data[63] = ctx->bitlen >> 56;
190 |     md5_transform(ctx, ctx->data);
191 | 
192 |     // Since this implementation uses little endian byte ordering and MD uses big endian,
193 |     // reverse all the bytes when copying the final state to the output hash.
194 |     for (i = 0; i < 4; ++i) {
195 |         hash[i] = (ctx->state[0] >> (i * 8)) & 0x000000ff;
196 |         hash[i + 4] = (ctx->state[1] >> (i * 8)) & 0x000000ff;
197 |         hash[i + 8] = (ctx->state[2] >> (i * 8)) & 0x000000ff;
198 |         hash[i + 12] = (ctx->state[3] >> (i * 8)) & 0x000000ff;
199 |     }
200 |     free(ctx);
201 |     return MD5_BLOCK_SIZE;
202 | }
203 | 


--------------------------------------------------------------------------------
/src/crypto/sha1.c:
--------------------------------------------------------------------------------
  1 | // Originally from the sha1 SQLite exension, Public Domain
  2 | // https://sqlite.org/src/file/ext/misc/sha1.c
  3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License
  4 | 
  5 | #include "sha1.h"
  6 | 
  7 | #include <assert.h>
  8 | #include <stdarg.h>
  9 | #include <stdlib.h>
 10 | #include <string.h>
 11 | 
 12 | #define SHA_ROT(x, l, r) ((x) << (l) | (x) >> (r))
 13 | #define rol(x, k) SHA_ROT(x, k, 32 - (k))
 14 | #define ror(x, k) SHA_ROT(x, 32 - (k), k)
 15 | 
 16 | #define blk0le(i) (block[i] = (ror(block[i], 8) & 0xFF00FF00) | (rol(block[i], 8) & 0x00FF00FF))
 17 | #define blk0be(i) block[i]
 18 | #define blk(i)       \
 19 |     (block[i & 15] = \
 20 |          rol(block[(i + 13) & 15] ^ block[(i + 8) & 15] ^ block[(i + 2) & 15] ^ block[i & 15], 1))
 21 | 
 22 | /*
 23 |  * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1
 24 |  *
 25 |  * Rl0() for little-endian and Rb0() for big-endian.  Endianness is
 26 |  * determined at run-time.
 27 |  */
 28 | #define Rl0(v, w, x, y, z, i)                                      \
 29 |     z += ((w & (x ^ y)) ^ y) + blk0le(i) + 0x5A827999 + rol(v, 5); \
 30 |     w = ror(w, 2);
 31 | #define Rb0(v, w, x, y, z, i)                                      \
 32 |     z += ((w & (x ^ y)) ^ y) + blk0be(i) + 0x5A827999 + rol(v, 5); \
 33 |     w = ror(w, 2);
 34 | #define R1(v, w, x, y, z, i)                                    \
 35 |     z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + rol(v, 5); \
 36 |     w = ror(w, 2);
 37 | #define R2(v, w, x, y, z, i)                            \
 38 |     z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + rol(v, 5); \
 39 |     w = ror(w, 2);
 40 | #define R3(v, w, x, y, z, i)                                          \
 41 |     z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + rol(v, 5); \
 42 |     w = ror(w, 2);
 43 | #define R4(v, w, x, y, z, i)                            \
 44 |     z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + rol(v, 5); \
 45 |     w = ror(w, 2);
 46 | 
 47 | /*
 48 |  * Hash a single 512-bit block. This is the core of the algorithm.
 49 |  */
 50 | void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]) {
 51 |     unsigned int qq[5]; /* a, b, c, d, e; */
 52 |     static int one = 1;
 53 |     unsigned int block[16];
 54 |     memcpy(block, buffer, 64);
 55 |     memcpy(qq, state, 5 * sizeof(unsigned int));
 56 | 
 57 | #define a qq[0]
 58 | #define b qq[1]
 59 | #define c qq[2]
 60 | #define d qq[3]
 61 | #define e qq[4]
 62 | 
 63 |     /* Copy ctx->state[] to working vars */
 64 |     /*
 65 |   a = state[0];
 66 |   b = state[1];
 67 |   c = state[2];
 68 |   d = state[3];
 69 |   e = state[4];
 70 |   */
 71 | 
 72 |     /* 4 rounds of 20 operations each. Loop unrolled. */
 73 |     if (1 == *(unsigned char*)&one) {
 74 |         Rl0(a, b, c, d, e, 0);
 75 |         Rl0(e, a, b, c, d, 1);
 76 |         Rl0(d, e, a, b, c, 2);
 77 |         Rl0(c, d, e, a, b, 3);
 78 |         Rl0(b, c, d, e, a, 4);
 79 |         Rl0(a, b, c, d, e, 5);
 80 |         Rl0(e, a, b, c, d, 6);
 81 |         Rl0(d, e, a, b, c, 7);
 82 |         Rl0(c, d, e, a, b, 8);
 83 |         Rl0(b, c, d, e, a, 9);
 84 |         Rl0(a, b, c, d, e, 10);
 85 |         Rl0(e, a, b, c, d, 11);
 86 |         Rl0(d, e, a, b, c, 12);
 87 |         Rl0(c, d, e, a, b, 13);
 88 |         Rl0(b, c, d, e, a, 14);
 89 |         Rl0(a, b, c, d, e, 15);
 90 |     } else {
 91 |         Rb0(a, b, c, d, e, 0);
 92 |         Rb0(e, a, b, c, d, 1);
 93 |         Rb0(d, e, a, b, c, 2);
 94 |         Rb0(c, d, e, a, b, 3);
 95 |         Rb0(b, c, d, e, a, 4);
 96 |         Rb0(a, b, c, d, e, 5);
 97 |         Rb0(e, a, b, c, d, 6);
 98 |         Rb0(d, e, a, b, c, 7);
 99 |         Rb0(c, d, e, a, b, 8);
100 |         Rb0(b, c, d, e, a, 9);
101 |         Rb0(a, b, c, d, e, 10);
102 |         Rb0(e, a, b, c, d, 11);
103 |         Rb0(d, e, a, b, c, 12);
104 |         Rb0(c, d, e, a, b, 13);
105 |         Rb0(b, c, d, e, a, 14);
106 |         Rb0(a, b, c, d, e, 15);
107 |     }
108 |     R1(e, a, b, c, d, 16);
109 |     R1(d, e, a, b, c, 17);
110 |     R1(c, d, e, a, b, 18);
111 |     R1(b, c, d, e, a, 19);
112 |     R2(a, b, c, d, e, 20);
113 |     R2(e, a, b, c, d, 21);
114 |     R2(d, e, a, b, c, 22);
115 |     R2(c, d, e, a, b, 23);
116 |     R2(b, c, d, e, a, 24);
117 |     R2(a, b, c, d, e, 25);
118 |     R2(e, a, b, c, d, 26);
119 |     R2(d, e, a, b, c, 27);
120 |     R2(c, d, e, a, b, 28);
121 |     R2(b, c, d, e, a, 29);
122 |     R2(a, b, c, d, e, 30);
123 |     R2(e, a, b, c, d, 31);
124 |     R2(d, e, a, b, c, 32);
125 |     R2(c, d, e, a, b, 33);
126 |     R2(b, c, d, e, a, 34);
127 |     R2(a, b, c, d, e, 35);
128 |     R2(e, a, b, c, d, 36);
129 |     R2(d, e, a, b, c, 37);
130 |     R2(c, d, e, a, b, 38);
131 |     R2(b, c, d, e, a, 39);
132 |     R3(a, b, c, d, e, 40);
133 |     R3(e, a, b, c, d, 41);
134 |     R3(d, e, a, b, c, 42);
135 |     R3(c, d, e, a, b, 43);
136 |     R3(b, c, d, e, a, 44);
137 |     R3(a, b, c, d, e, 45);
138 |     R3(e, a, b, c, d, 46);
139 |     R3(d, e, a, b, c, 47);
140 |     R3(c, d, e, a, b, 48);
141 |     R3(b, c, d, e, a, 49);
142 |     R3(a, b, c, d, e, 50);
143 |     R3(e, a, b, c, d, 51);
144 |     R3(d, e, a, b, c, 52);
145 |     R3(c, d, e, a, b, 53);
146 |     R3(b, c, d, e, a, 54);
147 |     R3(a, b, c, d, e, 55);
148 |     R3(e, a, b, c, d, 56);
149 |     R3(d, e, a, b, c, 57);
150 |     R3(c, d, e, a, b, 58);
151 |     R3(b, c, d, e, a, 59);
152 |     R4(a, b, c, d, e, 60);
153 |     R4(e, a, b, c, d, 61);
154 |     R4(d, e, a, b, c, 62);
155 |     R4(c, d, e, a, b, 63);
156 |     R4(b, c, d, e, a, 64);
157 |     R4(a, b, c, d, e, 65);
158 |     R4(e, a, b, c, d, 66);
159 |     R4(d, e, a, b, c, 67);
160 |     R4(c, d, e, a, b, 68);
161 |     R4(b, c, d, e, a, 69);
162 |     R4(a, b, c, d, e, 70);
163 |     R4(e, a, b, c, d, 71);
164 |     R4(d, e, a, b, c, 72);
165 |     R4(c, d, e, a, b, 73);
166 |     R4(b, c, d, e, a, 74);
167 |     R4(a, b, c, d, e, 75);
168 |     R4(e, a, b, c, d, 76);
169 |     R4(d, e, a, b, c, 77);
170 |     R4(c, d, e, a, b, 78);
171 |     R4(b, c, d, e, a, 79);
172 | 
173 |     /* Add the working vars back into context.state[] */
174 |     state[0] += a;
175 |     state[1] += b;
176 |     state[2] += c;
177 |     state[3] += d;
178 |     state[4] += e;
179 | 
180 | #undef a
181 | #undef b
182 | #undef c
183 | #undef d
184 | #undef e
185 | }
186 | 
187 | /* Initialize a SHA1 context */
188 | void* sha1_init() {
189 |     /* SHA1 initialization constants */
190 |     SHA1Context* ctx;
191 |     ctx = malloc(sizeof(SHA1Context));
192 |     ctx->state[0] = 0x67452301;
193 |     ctx->state[1] = 0xEFCDAB89;
194 |     ctx->state[2] = 0x98BADCFE;
195 |     ctx->state[3] = 0x10325476;
196 |     ctx->state[4] = 0xC3D2E1F0;
197 |     ctx->count[0] = ctx->count[1] = 0;
198 |     return ctx;
199 | }
200 | 
201 | /* Add new content to the SHA1 hash */
202 | void sha1_update(SHA1Context* ctx, const unsigned char* data, size_t len) {
203 |     unsigned int i, j;
204 | 
205 |     j = ctx->count[0];
206 |     if ((ctx->count[0] += len << 3) < j) {
207 |         ctx->count[1] += (len >> 29) + 1;
208 |     }
209 |     j = (j >> 3) & 63;
210 |     if ((j + len) > 63) {
211 |         (void)memcpy(&ctx->buffer[j], data, (i = 64 - j));
212 |         SHA1Transform(ctx->state, ctx->buffer);
213 |         for (; i + 63 < len; i += 64) {
214 |             SHA1Transform(ctx->state, &data[i]);
215 |         }
216 |         j = 0;
217 |     } else {
218 |         i = 0;
219 |     }
220 |     (void)memcpy(&ctx->buffer[j], &data[i], len - i);
221 | }
222 | 
223 | int sha1_final(SHA1Context* ctx, unsigned char hash[]) {
224 |     unsigned int i;
225 |     unsigned char finalcount[8];
226 |     static const char zEncode[] = "0123456789abcdef";
227 | 
228 |     for (i = 0; i < 8; i++) {
229 |         finalcount[i] = (unsigned char)((ctx->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) &
230 |                                         255); /* Endian independent */
231 |     }
232 |     sha1_update(ctx, (const unsigned char*)"\200", 1);
233 |     while ((ctx->count[0] & 504) != 448) {
234 |         sha1_update(ctx, (const unsigned char*)"\0", 1);
235 |     }
236 |     sha1_update(ctx, finalcount, 8); /* Should cause a SHA1Transform() */
237 |     for (i = 0; i < 20; i++) {
238 |         hash[i] = (unsigned char)((ctx->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
239 |     }
240 |     free(ctx);
241 |     return SHA1_BLOCK_SIZE;
242 | }


--------------------------------------------------------------------------------
/src/sqlite3-ipaddr.c:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021 Vincent Bernat, MIT License
  2 | // https://github.com/nalgeon/sqlean
  3 | 
  4 | /*
  5 |  * SQLite IP address functions.
  6 |  */
  7 | #include <arpa/inet.h>
  8 | #include <assert.h>
  9 | #include <errno.h>
 10 | #include <inttypes.h>
 11 | #include <stdint.h>
 12 | #include <stdio.h>
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | 
 16 | #include "sqlite3ext.h"
 17 | 
 18 | SQLITE_EXTENSION_INIT1
 19 | 
 20 | struct ipaddress {
 21 |     int af;
 22 |     union {
 23 |         struct in6_addr ipv6;
 24 |         struct in_addr ipv4;
 25 |     };
 26 |     unsigned masklen;
 27 | };
 28 | 
 29 | static struct ipaddress* parse_ipaddress(const char* address) {
 30 |     struct ipaddress* ip = NULL;
 31 |     unsigned char buf[sizeof(struct in6_addr)];
 32 |     char* sep = strchr(address, '/');
 33 |     unsigned long masklen;
 34 |     if (sep) {
 35 |         char* end;
 36 |         errno = 0;
 37 |         masklen = strtoul(sep + 1, &end, 10);
 38 |         if (errno != 0 || sep + 1 == end || *end != '\0')
 39 |             return NULL;
 40 |         *sep = '\0';
 41 |     }
 42 |     if (inet_pton(AF_INET, address, buf)) {
 43 |         if (sep && masklen > 32)
 44 |             goto end;
 45 | 
 46 |         ip = sqlite3_malloc(sizeof(struct ipaddress));
 47 |         memcpy(&ip->ipv4, buf, sizeof(struct in_addr));
 48 |         ip->af = AF_INET;
 49 |         ip->masklen = sep ? masklen : 32;
 50 |     } else if (inet_pton(AF_INET6, address, buf)) {
 51 |         if (sep && masklen > 128)
 52 |             goto end;
 53 | 
 54 |         ip = sqlite3_malloc(sizeof(struct ipaddress));
 55 |         memcpy(&ip->ipv6, buf, sizeof(struct in6_addr));
 56 |         ip->af = AF_INET6;
 57 |         ip->masklen = sep ? masklen : 128;
 58 |     }
 59 | end:
 60 |     if (sep)
 61 |         *sep = '/';
 62 |     return ip;
 63 | }
 64 | 
 65 | static void sqlite3_ipfamily(sqlite3_context* context, int argc, sqlite3_value** argv) {
 66 |     assert(argc == 1);
 67 |     if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
 68 |         sqlite3_result_null(context);
 69 |         return;
 70 |     }
 71 |     const char* address = (char*)sqlite3_value_text(argv[0]);
 72 |     struct ipaddress* ip = parse_ipaddress(address);
 73 |     if (ip == NULL) {
 74 |         sqlite3_result_null(context);
 75 |         return;
 76 |     }
 77 |     sqlite3_result_int(context, ip->af == AF_INET ? 4 : 6);
 78 |     sqlite3_free(ip);
 79 | }
 80 | 
 81 | static void sqlite3_iphost(sqlite3_context* context, int argc, sqlite3_value** argv) {
 82 |     assert(argc == 1);
 83 |     if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
 84 |         sqlite3_result_null(context);
 85 |         return;
 86 |     }
 87 |     const char* address = (char*)sqlite3_value_text(argv[0]);
 88 |     struct ipaddress* ip = parse_ipaddress(address);
 89 |     if (ip == NULL) {
 90 |         sqlite3_result_null(context);
 91 |         return;
 92 |     }
 93 |     if (ip->af == AF_INET) {
 94 |         char* result = sqlite3_malloc(INET_ADDRSTRLEN);
 95 |         inet_ntop(AF_INET, &ip->ipv4, result, INET_ADDRSTRLEN);
 96 |         sqlite3_result_text(context, result, -1, sqlite3_free);
 97 |     } else if (ip->af == AF_INET6) {
 98 |         char* result = sqlite3_malloc(INET6_ADDRSTRLEN);
 99 |         inet_ntop(AF_INET6, &ip->ipv6, result, INET6_ADDRSTRLEN);
100 |         sqlite3_result_text(context, result, -1, sqlite3_free);
101 |     }
102 |     sqlite3_free(ip);
103 | }
104 | 
105 | static void sqlite3_ipmasklen(sqlite3_context* context, int argc, sqlite3_value** argv) {
106 |     assert(argc == 1);
107 |     if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
108 |         sqlite3_result_null(context);
109 |         return;
110 |     }
111 |     const char* address = (char*)sqlite3_value_text(argv[0]);
112 |     struct ipaddress* ip = parse_ipaddress(address);
113 |     if (ip == NULL) {
114 |         sqlite3_result_null(context);
115 |         return;
116 |     }
117 |     sqlite3_result_int(context, ip->masklen);
118 |     return;
119 | }
120 | 
121 | static void sqlite3_ipnetwork(sqlite3_context* context, int argc, sqlite3_value** argv) {
122 |     assert(argc == 1);
123 |     if (sqlite3_value_type(argv[0]) == SQLITE_NULL) {
124 |         sqlite3_result_null(context);
125 |         return;
126 |     }
127 |     const char* address = (char*)sqlite3_value_text(argv[0]);
128 |     struct ipaddress* ip = parse_ipaddress(address);
129 |     if (ip == NULL) {
130 |         sqlite3_result_null(context);
131 |         return;
132 |     }
133 |     if (ip->af == AF_INET) {
134 |         char buf[INET_ADDRSTRLEN];
135 |         ip->ipv4.s_addr =
136 |             htonl(ntohl(ip->ipv4.s_addr) & ~(uint32_t)((1ULL << (32 - ip->masklen)) - 1));
137 |         inet_ntop(AF_INET, &ip->ipv4, buf, INET_ADDRSTRLEN);
138 |         char* result = sqlite3_malloc(INET_ADDRSTRLEN + 3);
139 |         sprintf(result, "%s/%u", buf, ip->masklen);
140 |         sqlite3_result_text(context, result, -1, sqlite3_free);
141 |     } else if (ip->af == AF_INET6) {
142 |         char buf[INET6_ADDRSTRLEN];
143 |         for (unsigned i = 0; i < 16; i++) {
144 |             if (ip->masklen / 8 < i)
145 |                 ip->ipv6.s6_addr[i] = 0;
146 |             else if (ip->masklen / 8 == i)
147 |                 ip->ipv6.s6_addr[i] &= ~(ip->masklen % 8);
148 |         }
149 |         inet_ntop(AF_INET6, &ip->ipv6, buf, INET6_ADDRSTRLEN);
150 |         char* result = sqlite3_malloc(INET6_ADDRSTRLEN + 4);
151 |         sprintf(result, "%s/%u", buf, ip->masklen);
152 |         sqlite3_result_text(context, result, -1, sqlite3_free);
153 |     }
154 |     sqlite3_free(ip);
155 | }
156 | 
157 | static void sqlite3_ipcontains(sqlite3_context* context, int argc, sqlite3_value** argv) {
158 |     assert(argc == 2);
159 |     if (sqlite3_value_type(argv[0]) == SQLITE_NULL || sqlite3_value_type(argv[1]) == SQLITE_NULL) {
160 |         sqlite3_result_null(context);
161 |         return;
162 |     }
163 | 
164 |     const char* address1 = (char*)sqlite3_value_text(argv[0]);
165 |     struct ipaddress* ip1 = parse_ipaddress(address1);
166 |     const char* address2 = (char*)sqlite3_value_text(argv[1]);
167 |     struct ipaddress* ip2 = parse_ipaddress(address2);
168 |     if (ip1 == NULL || ip2 == NULL) {
169 |         sqlite3_result_null(context);
170 |         goto end;
171 |     }
172 |     if (ip1->af != ip2->af || ip1->masklen > ip2->masklen) {
173 |         sqlite3_result_int(context, 0);
174 |         goto end;
175 |     }
176 | 
177 |     if (ip1->af == AF_INET) {
178 |         ip1->ipv4.s_addr =
179 |             htonl(ntohl(ip1->ipv4.s_addr) & ~(uint32_t)((1ULL << (32 - ip1->masklen)) - 1));
180 |         ip2->ipv4.s_addr =
181 |             htonl(ntohl(ip2->ipv4.s_addr) & ~(uint32_t)((1ULL << (32 - ip1->masklen)) - 1));
182 |         sqlite3_result_int(context, ip1->ipv4.s_addr == ip2->ipv4.s_addr);
183 |         goto end;
184 |     }
185 |     if (ip1->af == AF_INET6) {
186 |         for (unsigned i = 0; i < 16; i++) {
187 |             if (ip1->masklen / 8 < i) {
188 |                 ip1->ipv6.s6_addr[i] = 0;
189 |                 ip2->ipv6.s6_addr[i] = 0;
190 |             } else if (ip1->masklen / 8 == i) {
191 |                 ip1->ipv6.s6_addr[i] &= ~(ip1->masklen % 8);
192 |                 ip2->ipv6.s6_addr[i] &= ~(ip1->masklen % 8);
193 |             }
194 |             if (ip1->ipv6.s6_addr[i] != ip2->ipv6.s6_addr[i]) {
195 |                 sqlite3_result_int(context, 0);
196 |                 goto end;
197 |             }
198 |         }
199 |         sqlite3_result_int(context, 1);
200 |     }
201 | end:
202 |     sqlite3_free(ip1);
203 |     sqlite3_free(ip2);
204 | }
205 | 
206 | /*
207 |  * Registers the extension.
208 |  */
209 | #ifdef _WIN32
210 | __declspec(dllexport)
211 | #endif
212 |     int sqlite3_ipaddr_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) {
213 |     SQLITE_EXTENSION_INIT2(pApi);
214 |     static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC;
215 |     sqlite3_create_function(db, "ipfamily", 1, flags, 0, sqlite3_ipfamily, 0, 0);
216 |     sqlite3_create_function(db, "iphost", 1, flags, 0, sqlite3_iphost, 0, 0);
217 |     sqlite3_create_function(db, "ipmasklen", 1, flags, 0, sqlite3_ipmasklen, 0, 0);
218 |     sqlite3_create_function(db, "ipnetwork", 1, flags, 0, sqlite3_ipnetwork, 0, 0);
219 |     sqlite3_create_function(db, "ipcontains", 2, flags, 0, sqlite3_ipcontains, 0, 0);
220 | 
221 |     return SQLITE_OK;
222 | }
223 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # All the missing SQLite functions
  2 | 
  3 | SQLite has few functions compared to other database management systems. SQLite authors see this as a feature rather than a problem, because SQLite has an extension mechanism in place.
  4 | 
  5 | There are a lot of SQLite extensions out there, but they are incomplete, inconsistent and scattered across the internet. `sqlean` brings them together, neatly packaged into domain modules, documented, tested, and built for Linux, Windows and macOS.
  6 | 
  7 | We do not try to gather all the existing extensions into one giant pile — that would not be very useful. The goal is to create a well-thought set of domain modules with a convenient API. A kind of standard library for SQLite.
  8 | 
  9 | To achieve it, we split extensions that are too broad, merge the ones that are too narrow, refactor, add missing features, test, document, and do a ton of other small things.
 10 | 
 11 | ## The main set
 12 | 
 13 | These are the most popular functions. They are tested, documented and organized into the domain modules with clear API.
 14 | 
 15 | Think of them as the extended standard library for SQLite:
 16 | 
 17 | -   [crypto](docs/crypto.md): secure hashes
 18 | -   [fileio](docs/fileio.md): read and write files
 19 | -   [fuzzy](docs/fuzzy.md): fuzzy string matching and phonetics
 20 | -   [ipaddr](docs/ipaddr.md): IP address manipulation
 21 | -   [json1](docs/json1.md): JSON functions
 22 | -   [math](docs/math.md): math functions
 23 | -   [re](docs/re.md): regular expressions
 24 | -   [stats](docs/stats.md): math statistics
 25 | -   [text](docs/text.md): string functions
 26 | -   [unicode](docs/unicode.md): Unicode support
 27 | -   [uuid](docs/uuid.md): Universally Unique IDentifiers
 28 | -   [vsv](docs/vsv.md): CSV files as virtual tables
 29 | 
 30 | ## The incubator
 31 | 
 32 | These extensions haven't yet made their way to the main set. They may be untested, poorly documented, too broad, too narrow, or without a well-thought API.
 33 | 
 34 | Think of them as candidates for the standard library:
 35 | 
 36 | - [array](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1004109889): one-dimensional arrays
 37 | - [besttype](https://github.com/nalgeon/sqlean/issues/27#issuecomment-999732640): convert string value to numeric
 38 | - [bloom](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1002267134): a fast way to tell if a value is already in a table
 39 | - [btreeinfo](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1004896027), [memstat](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007421989), [recsize](https://github.com/nalgeon/sqlean/issues/27#issuecomment-999732907) and [stmt](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007654407): various database introspection features
 40 | - [cbrt](https://github.com/nalgeon/sqlean/issues/27#issuecomment-996605444) and [math2](https://github.com/nalgeon/sqlean/issues/27#issuecomment-999128539): additional math functions and bit arithmetics
 41 | - [classifier](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1001239676): binary classifier via logistic regression
 42 | - [closure](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1004931771): navigate hierarchic tables with parent/child relationships
 43 | - [compress](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1000937999) and [sqlar](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1000938046): compress / uncompress data
 44 | - [cron](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997427979): match dates against cron patterns
 45 | - [dbdump](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1006791300): export database as SQL
 46 | - [decimal](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007348326), [fcmp](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997482625) and [ieee754](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007375162): decimal and floating-point arithmetic
 47 | - [define](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1004347222): create scalar and table-valued functions from SQL
 48 | - [envfuncs](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997423609): read environment variables
 49 | - [eval](https://github.com/nalgeon/sqlean/issues/27#issuecomment-996432840): run arbitrary SQL statements
 50 | - [isodate](https://github.com/nalgeon/sqlean/issues/27#issuecomment-998138191): additional date and time functions
 51 | - [pearson](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997417836): Pearson correlation coefficient between two data sets
 52 | - [pivotvtab](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997052157): pivot tables
 53 | - [prefixes](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007464840): generate string prefixes
 54 | - [rotate](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007500659): string obfuscation
 55 | - [spellfix](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1002297477): search a large vocabulary for close matches
 56 | - [stats2](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1000902666) and [stats3](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1002703581): additional math statistics functions
 57 | - [text2](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1003105288): additional string functions
 58 | - [uint](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1001232670): natural string sorting and comparison
 59 | - [unhex](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997432989): reverse for `hex()`
 60 | - [unionvtab](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007687162): union similar tables into one
 61 | - [xmltojson](https://github.com/nalgeon/sqlean/issues/27#issuecomment-997018486): convert XML to JSON string
 62 | - [zipfile](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1001190336): read and write zip files
 63 | - [zorder](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1007733209): map multidimensional data to a single dimension
 64 | 
 65 | [Vote for your favorites](https://github.com/nalgeon/sqlean/issues/27)! We'll refactor and merge popular ones into the main set.
 66 | 
 67 | ## Download
 68 | 
 69 | There are [precompiled binaries](https://github.com/nalgeon/sqlean/releases/latest) for every OS:
 70 | 
 71 | -   `*.dll` - for Windows
 72 | -   `*.so` - for Linux
 73 | -   `*.dylib` - for macOS
 74 | 
 75 | Binaries are 64-bit and require a 64-bit SQLite version. If you are using SQLite shell on Windows (`sqlite.exe`), its 64-bit version is available at https://github.com/nalgeon/sqlite.
 76 | 
 77 | Incubator extensions are [also available](https://github.com/nalgeon/sqlean/releases/tag/incubator).
 78 | 
 79 | ## Usage
 80 | 
 81 | CLI usage:
 82 | 
 83 | ```
 84 | sqlite> .load ./stats
 85 | sqlite> select median(value) from generate_series(1, 99);
 86 | ```
 87 | 
 88 | IDE usage:
 89 | 
 90 | ```
 91 | select load_extension('c:\Users\anton\sqlite\stats.dll');
 92 | select median(value) from generate_series(1, 99);
 93 | ```
 94 | 
 95 | In-app usage:
 96 | 
 97 | ```python
 98 | import sqlite3
 99 | 
100 | connection = sqlite3.connect(":memory:")
101 | connection.enable_load_extension(True)
102 | connection.load_extension("./stats.so")
103 | connection.execute("select median(value) from generate_series(1, 99)")
104 | connection.close()
105 | ```
106 | 
107 | You can specify any other supported extension instead of `stats`.
108 | 
109 | ## Contributing
110 | 
111 | Contributions are welcome! Submit your own or third-party extension to the incubator:
112 | 
113 | - [How to submit your extension](https://github.com/nalgeon/sqlean/blob/incubator/docs/submit.md)
114 | - [How to submit a third-party extension](https://github.com/nalgeon/sqlean/blob/incubator/docs/external.md)
115 | 
116 | We want every extension to be self-contained. So we limit the project scope to extensions without external dependencies (other than the C standard library and SQLite itself).
117 | 
118 | Please note that we only accept extensions with permissive licenses (MIT License, Apache License etc) or public domain. Copyleft licenses like GPL won't do.
119 | 
120 | ## License
121 | 
122 | Copyright 2021+ [Anton Zhiyanov](https://antonz.org/), [Contributors](https://github.com/nalgeon/sqlean/graphs/contributors) and [Third-party Authors](docs/third-party.md).
123 | 
124 | The software is available under the MIT License.
125 | 
126 | ## Stay tuned
127 | 
128 | Follow [**@ohmypy**](https://twitter.com/ohmypy) on Twitter to keep up with new features 🚀
129 | 


--------------------------------------------------------------------------------
/src/sqlite3-uuid.c:
--------------------------------------------------------------------------------
  1 | // Originally from the uuid SQLite exension, Public Domain
  2 | // https://www.sqlite.org/src/file/ext/misc/uuid.c
  3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License
  4 | 
  5 | /*
  6 |  * This SQLite extension implements functions that handling RFC-4122 UUIDs
  7 |  * Three SQL functions are implemented:
  8 |  *
  9 |  *     uuid4()       - generate a version 4 UUID as a string
 10 |  *     uuid_str(X)   - convert a UUID X into a well-formed UUID string
 11 |  *     uuid_blob(X)  - convert a UUID X into a 16-byte blob
 12 |  *
 13 |  * The output from uuid4() and uuid_str(X) are always well-formed RFC-4122
 14 |  * UUID strings in this format:
 15 |  *
 16 |  *        xxxxxxxx-xxxx-Mxxx-Nxxx-xxxxxxxxxxxx
 17 |  *
 18 |  * All of the 'x', 'M', and 'N' values are lower-case hexadecimal digits.
 19 |  * The M digit indicates the "version".  For uuid4()-generated UUIDs, the
 20 |  * version is always "4" (a random UUID).  The upper three bits of N digit
 21 |  * are the "variant".  This library only supports variant 1 (indicated
 22 |  * by values of N between '8' and 'b') as those are overwhelming the most
 23 |  * common.  Other variants are for legacy compatibility only.
 24 |  *
 25 |  * The output of uuid_blob(X) is always a 16-byte blob. The UUID input
 26 |  * string is converted in network byte order (big-endian) in accordance
 27 |  * with RFC-4122 specifications for variant-1 UUIDs.  Note that network
 28 |  * byte order is *always* used, even if the input self-identifies as a
 29 |  * variant-2 UUID.
 30 |  *
 31 |  * The input X to the uuid_str() and uuid_blob() functions can be either
 32 |  * a string or a BLOB. If it is a BLOB it must be exactly 16 bytes in
 33 |  * length or else a NULL is returned.  If the input is a string it must
 34 |  * consist of 32 hexadecimal digits, upper or lower case, optionally
 35 |  * surrounded by {...} and with optional "-" characters interposed in the
 36 |  * middle.  The flexibility of input is inspired by the PostgreSQL
 37 |  * implementation of UUID functions that accept in all of the following
 38 |  * formats:
 39 |  *
 40 |  *     A0EEBC99-9C0B-4EF8-BB6D-6BB9BD380A11
 41 |  *     {a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11}
 42 |  *     a0eebc999c0b4ef8bb6d6bb9bd380a11
 43 |  *     a0ee-bc99-9c0b-4ef8-bb6d-6bb9-bd38-0a11
 44 |  *     {a0eebc99-9c0b4ef8-bb6d6bb9-bd380a11}
 45 |  *
 46 |  * If any of the above inputs are passed into uuid_str(), the output will
 47 |  * always be in the canonical RFC-4122 format:
 48 |  *
 49 |  *     a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11
 50 |  *
 51 |  * If the X input string has too few or too many digits or contains
 52 |  * stray characters other than {, }, or -, then NULL is returned.
 53 |  */
 54 | #include "sqlite3ext.h"
 55 | SQLITE_EXTENSION_INIT1
 56 | #include <assert.h>
 57 | #include <ctype.h>
 58 | #include <string.h>
 59 | 
 60 | #if !defined(SQLITE_ASCII) && !defined(SQLITE_EBCDIC)
 61 | #define SQLITE_ASCII 1
 62 | #endif
 63 | 
 64 | /*
 65 |  * Translate a single byte of Hex into an integer.
 66 |  * This routine only works if h really is a valid hexadecimal
 67 |  * character:  0..9a..fA..F
 68 |  */
 69 | static unsigned char sqlite3UuidHexToInt(int h) {
 70 |     assert((h >= '0' && h <= '9') || (h >= 'a' && h <= 'f') || (h >= 'A' && h <= 'F'));
 71 | #ifdef SQLITE_ASCII
 72 |     h += 9 * (1 & (h >> 6));
 73 | #endif
 74 | #ifdef SQLITE_EBCDIC
 75 |     h += 9 * (1 & ~(h >> 4));
 76 | #endif
 77 |     return (unsigned char)(h & 0xf);
 78 | }
 79 | 
 80 | /*
 81 |  * Convert a 16-byte BLOB into a well-formed RFC-4122 UUID.  The output
 82 |  * buffer zStr should be at least 37 bytes in length.   The output will
 83 |  * be zero-terminated.
 84 |  */
 85 | static void sqlite3_uuid_blob_to_str(const unsigned char* aBlob, /* Input blob */
 86 |                                      unsigned char* zStr         /* Write the answer here */
 87 | ) {
 88 |     static const char zDigits[] = "0123456789abcdef";
 89 |     int i, k;
 90 |     unsigned char x;
 91 |     k = 0;
 92 |     for (i = 0, k = 0x550; i < 16; i++, k = k >> 1) {
 93 |         if (k & 1) {
 94 |             zStr[0] = '-';
 95 |             zStr++;
 96 |         }
 97 |         x = aBlob[i];
 98 |         zStr[0] = zDigits[x >> 4];
 99 |         zStr[1] = zDigits[x & 0xf];
100 |         zStr += 2;
101 |     }
102 |     *zStr = 0;
103 | }
104 | 
105 | /*
106 |  * Attempt to parse a zero-terminated input string zStr into a binary
107 |  * UUID.  Return 0 on success, or non-zero if the input string is not
108 |  * parsable.
109 |  */
110 | static int sqlite3_uuid_str_to_blob(const unsigned char* zStr, /* Input string */
111 |                                     unsigned char* aBlob       /* Write results here */
112 | ) {
113 |     int i;
114 |     if (zStr[0] == '{')
115 |         zStr++;
116 |     for (i = 0; i < 16; i++) {
117 |         if (zStr[0] == '-')
118 |             zStr++;
119 |         if (isxdigit(zStr[0]) && isxdigit(zStr[1])) {
120 |             aBlob[i] = (sqlite3UuidHexToInt(zStr[0]) << 4) + sqlite3UuidHexToInt(zStr[1]);
121 |             zStr += 2;
122 |         } else {
123 |             return 1;
124 |         }
125 |     }
126 |     if (zStr[0] == '}')
127 |         zStr++;
128 |     return zStr[0] != 0;
129 | }
130 | 
131 | /*
132 |  * Render sqlite3_value pIn as a 16-byte UUID blob.  Return a pointer
133 |  * to the blob, or NULL if the input is not well-formed.
134 |  */
135 | static const unsigned char* sqlite3_uuid_input_to_blob(sqlite3_value* pIn, /* Input text */
136 |                                                        unsigned char* pBuf /* output buffer */
137 | ) {
138 |     switch (sqlite3_value_type(pIn)) {
139 |         case SQLITE_TEXT: {
140 |             const unsigned char* z = sqlite3_value_text(pIn);
141 |             if (sqlite3_uuid_str_to_blob(z, pBuf))
142 |                 return 0;
143 |             return pBuf;
144 |         }
145 |         case SQLITE_BLOB: {
146 |             int n = sqlite3_value_bytes(pIn);
147 |             return n == 16 ? sqlite3_value_blob(pIn) : 0;
148 |         }
149 |         default: {
150 |             return 0;
151 |         }
152 |     }
153 | }
154 | 
155 | /*
156 |  * sqlite3_uuid generates a version 4 UUID as a string
157 |  */
158 | static void sqlite3_uuid(sqlite3_context* context, int argc, sqlite3_value** argv) {
159 |     unsigned char aBlob[16];
160 |     unsigned char zStr[37];
161 |     (void)argc;
162 |     (void)argv;
163 |     sqlite3_randomness(16, aBlob);
164 |     aBlob[6] = (aBlob[6] & 0x0f) + 0x40;
165 |     aBlob[8] = (aBlob[8] & 0x3f) + 0x80;
166 |     sqlite3_uuid_blob_to_str(aBlob, zStr);
167 |     sqlite3_result_text(context, (char*)zStr, 36, SQLITE_TRANSIENT);
168 | }
169 | 
170 | /*
171 |  * sqlite3_uuid_str converts a UUID X into a well-formed UUID string.
172 |  * X can be either a string or a blob.
173 |  */
174 | static void sqlite3_uuid_str(sqlite3_context* context, int argc, sqlite3_value** argv) {
175 |     unsigned char aBlob[16];
176 |     unsigned char zStr[37];
177 |     const unsigned char* pBlob;
178 |     (void)argc;
179 |     pBlob = sqlite3_uuid_input_to_blob(argv[0], aBlob);
180 |     if (pBlob == 0)
181 |         return;
182 |     sqlite3_uuid_blob_to_str(pBlob, zStr);
183 |     sqlite3_result_text(context, (char*)zStr, 36, SQLITE_TRANSIENT);
184 | }
185 | 
186 | /*
187 |  * sqlite3_uuid_blob converts a UUID X into a 16-byte blob.
188 |  * X can be either a string or a blob.
189 |  */
190 | static void sqlite3_uuid_blob(sqlite3_context* context, int argc, sqlite3_value** argv) {
191 |     unsigned char aBlob[16];
192 |     const unsigned char* pBlob;
193 |     (void)argc;
194 |     pBlob = sqlite3_uuid_input_to_blob(argv[0], aBlob);
195 |     if (pBlob == 0)
196 |         return;
197 |     sqlite3_result_blob(context, pBlob, 16, SQLITE_TRANSIENT);
198 | }
199 | 
200 | #ifdef _WIN32
201 | __declspec(dllexport)
202 | #endif
203 |     int sqlite3_uuid_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) {
204 |     SQLITE_EXTENSION_INIT2(pApi);
205 |     (void)pzErrMsg; /* Unused parameter */
206 |     sqlite3_create_function(db, "uuid4", 0, SQLITE_UTF8 | SQLITE_INNOCUOUS, 0, sqlite3_uuid, 0, 0);
207 |     /* for postgresql compatibility */
208 |     sqlite3_create_function(db, "gen_random_uuid", 0, SQLITE_UTF8 | SQLITE_INNOCUOUS, 0,
209 |                             sqlite3_uuid, 0, 0);
210 |     static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC;
211 |     sqlite3_create_function(db, "uuid_str", 1, flags, 0, sqlite3_uuid_str, 0, 0);
212 |     sqlite3_create_function(db, "uuid_blob", 1, flags, 0, sqlite3_uuid_blob, 0, 0);
213 |     return SQLITE_OK;
214 | }
215 | 


--------------------------------------------------------------------------------
/src/fuzzy/editdist.c:
--------------------------------------------------------------------------------
  1 | // Originally from the spellfix SQLite exension, Public Domain
  2 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c
  3 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License
  4 | 
  5 | #include <assert.h>
  6 | #include <stdlib.h>
  7 | 
  8 | #include "common.h"
  9 | 
 10 | extern const unsigned char midClass[];
 11 | extern const unsigned char initClass[];
 12 | extern const unsigned char className[];
 13 | 
 14 | /*
 15 | ** Return the character class number for a character given its
 16 | ** context.
 17 | */
 18 | static char characterClass(char cPrev, char c) {
 19 |     return cPrev == 0 ? initClass[c & 0x7f] : midClass[c & 0x7f];
 20 | }
 21 | 
 22 | /*
 23 | ** Return the cost of inserting or deleting character c immediately
 24 | ** following character cPrev.  If cPrev==0, that means c is the first
 25 | ** character of the word.
 26 | */
 27 | static int insertOrDeleteCost(char cPrev, char c, char cNext) {
 28 |     char classC = characterClass(cPrev, c);
 29 |     char classCprev;
 30 | 
 31 |     if (classC == CCLASS_SILENT) {
 32 |         /* Insert or delete "silent" characters such as H or W */
 33 |         return 1;
 34 |     }
 35 |     if (cPrev == c) {
 36 |         /* Repeated characters, or miss a repeat */
 37 |         return 10;
 38 |     }
 39 |     if (classC == CCLASS_VOWEL && (cPrev == 'r' || cNext == 'r')) {
 40 |         return 20; /* Insert a vowel before or after 'r' */
 41 |     }
 42 |     classCprev = characterClass(cPrev, cPrev);
 43 |     if (classC == classCprev) {
 44 |         if (classC == CCLASS_VOWEL) {
 45 |             /* Remove or add a new vowel to a vowel cluster */
 46 |             return 15;
 47 |         } else {
 48 |             /* Remove or add a consonant not in the same class */
 49 |             return 50;
 50 |         }
 51 |     }
 52 | 
 53 |     /* any other character insertion or deletion */
 54 |     return 100;
 55 | }
 56 | 
 57 | /*
 58 | ** Divide the insertion cost by this factor when appending to the
 59 | ** end of the word.
 60 | */
 61 | #define FINAL_INS_COST_DIV 4
 62 | 
 63 | /*
 64 | ** Return the cost of substituting cTo in place of cFrom assuming
 65 | ** the previous character is cPrev.  If cPrev==0 then cTo is the first
 66 | ** character of the word.
 67 | */
 68 | static int substituteCost(char cPrev, char cFrom, char cTo) {
 69 |     char classFrom, classTo;
 70 |     if (cFrom == cTo) {
 71 |         /* Exact match */
 72 |         return 0;
 73 |     }
 74 |     if (cFrom == (cTo ^ 0x20) && ((cTo >= 'A' && cTo <= 'Z') || (cTo >= 'a' && cTo <= 'z'))) {
 75 |         /* differ only in case */
 76 |         return 0;
 77 |     }
 78 |     classFrom = characterClass(cPrev, cFrom);
 79 |     classTo = characterClass(cPrev, cTo);
 80 |     if (classFrom == classTo) {
 81 |         /* Same character class */
 82 |         return 40;
 83 |     }
 84 |     if (classFrom >= CCLASS_B && classFrom <= CCLASS_Y && classTo >= CCLASS_B &&
 85 |         classTo <= CCLASS_Y) {
 86 |         /* Convert from one consonant to another, but in a different class */
 87 |         return 75;
 88 |     }
 89 |     /* Any other subsitution */
 90 |     return 100;
 91 | }
 92 | 
 93 | /*
 94 | ** Given two strings zA and zB which are pure ASCII, return the cost
 95 | ** of transforming zA into zB.  If zA ends with '*' assume that it is
 96 | ** a prefix of zB and give only minimal penalty for extra characters
 97 | ** on the end of zB.
 98 | **
 99 | ** Smaller numbers mean a closer match.
100 | **
101 | ** Negative values indicate an error:
102 | **    -1  One of the inputs is NULL
103 | **    -2  Non-ASCII characters on input
104 | **    -3  Unable to allocate memory
105 | **
106 | ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
107 | ** of zB that matched the pattern in zA. If zA does not end with a '*',
108 | ** then this value is always the number of bytes in zB (i.e. strlen(zB)).
109 | ** If zA does end in a '*', then it is the number of bytes in the prefix
110 | ** of zB that was deemed to match zA.
111 | */
112 | int edit_distance(const char* zA, const char* zB, int* pnMatch) {
113 |     int nA, nB;          /* Number of characters in zA[] and zB[] */
114 |     int xA, xB;          /* Loop counters for zA[] and zB[] */
115 |     char cA = 0, cB;     /* Current character of zA and zB */
116 |     char cAprev, cBprev; /* Previous character of zA and zB */
117 |     char cAnext, cBnext; /* Next character in zA and zB */
118 |     int d;               /* North-west cost value */
119 |     int dc = 0;          /* North-west character value */
120 |     int res;             /* Final result */
121 |     int* m;              /* The cost matrix */
122 |     char* cx;            /* Corresponding character values */
123 |     int* toFree = 0;     /* Malloced space */
124 |     int nMatch = 0;
125 |     int mStack[60 + 15]; /* Stack space to use if not too much is needed */
126 | 
127 |     /* Early out if either input is NULL */
128 |     if (zA == 0 || zB == 0)
129 |         return -1;
130 | 
131 |     /* Skip any common prefix */
132 |     while (zA[0] && zA[0] == zB[0]) {
133 |         dc = zA[0];
134 |         zA++;
135 |         zB++;
136 |         nMatch++;
137 |     }
138 |     if (pnMatch)
139 |         *pnMatch = nMatch;
140 |     if (zA[0] == 0 && zB[0] == 0)
141 |         return 0;
142 | 
143 | #if 0
144 |   printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
145 | #endif
146 | 
147 |     /* Verify input strings and measure their lengths */
148 |     for (nA = 0; zA[nA]; nA++) {
149 |         if (zA[nA] & 0x80)
150 |             return -2;
151 |     }
152 |     for (nB = 0; zB[nB]; nB++) {
153 |         if (zB[nB] & 0x80)
154 |             return -2;
155 |     }
156 | 
157 |     /* Special processing if either string is empty */
158 |     if (nA == 0) {
159 |         cBprev = (char)dc;
160 |         for (xB = res = 0; (cB = zB[xB]) != 0; xB++) {
161 |             res += insertOrDeleteCost(cBprev, cB, zB[xB + 1]) / FINAL_INS_COST_DIV;
162 |             cBprev = cB;
163 |         }
164 |         return res;
165 |     }
166 |     if (nB == 0) {
167 |         cAprev = (char)dc;
168 |         for (xA = res = 0; (cA = zA[xA]) != 0; xA++) {
169 |             res += insertOrDeleteCost(cAprev, cA, zA[xA + 1]);
170 |             cAprev = cA;
171 |         }
172 |         return res;
173 |     }
174 | 
175 |     /* A is a prefix of B */
176 |     if (zA[0] == '*' && zA[1] == 0)
177 |         return 0;
178 | 
179 |     /* Allocate and initialize the Wagner matrix */
180 |     if (nB < (sizeof(mStack) * 4) / (sizeof(mStack[0]) * 5)) {
181 |         m = mStack;
182 |     } else {
183 |         m = toFree = malloc((nB + 1) * 5 * sizeof(m[0]) / 4);
184 |         if (m == 0)
185 |             return -3;
186 |     }
187 |     cx = (char*)&m[nB + 1];
188 | 
189 |     /* Compute the Wagner edit distance */
190 |     m[0] = 0;
191 |     cx[0] = (char)dc;
192 |     cBprev = (char)dc;
193 |     for (xB = 1; xB <= nB; xB++) {
194 |         cBnext = zB[xB];
195 |         cB = zB[xB - 1];
196 |         cx[xB] = cB;
197 |         m[xB] = m[xB - 1] + insertOrDeleteCost(cBprev, cB, cBnext);
198 |         cBprev = cB;
199 |     }
200 |     cAprev = (char)dc;
201 |     for (xA = 1; xA <= nA; xA++) {
202 |         int lastA = (xA == nA);
203 |         cA = zA[xA - 1];
204 |         cAnext = zA[xA];
205 |         if (cA == '*' && lastA)
206 |             break;
207 |         d = m[0];
208 |         dc = cx[0];
209 |         m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
210 |         cBprev = 0;
211 |         for (xB = 1; xB <= nB; xB++) {
212 |             int totalCost, insCost, delCost, subCost, ncx;
213 |             cB = zB[xB - 1];
214 |             cBnext = zB[xB];
215 | 
216 |             /* Cost to insert cB */
217 |             insCost = insertOrDeleteCost(cx[xB - 1], cB, cBnext);
218 |             if (lastA)
219 |                 insCost /= FINAL_INS_COST_DIV;
220 | 
221 |             /* Cost to delete cA */
222 |             delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
223 | 
224 |             /* Cost to substitute cA->cB */
225 |             subCost = substituteCost(cx[xB - 1], cA, cB);
226 | 
227 |             /* Best cost */
228 |             totalCost = insCost + m[xB - 1];
229 |             ncx = cB;
230 |             if ((delCost + m[xB]) < totalCost) {
231 |                 totalCost = delCost + m[xB];
232 |                 ncx = cA;
233 |             }
234 |             if ((subCost + d) < totalCost) {
235 |                 totalCost = subCost + d;
236 |             }
237 | 
238 | #if 0
239 |       printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
240 |              " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
241 |              xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
242 |              insCost, delCost, subCost, totalCost, ncx?ncx:' ');
243 | #endif
244 | 
245 |             /* Update the matrix */
246 |             d = m[xB];
247 |             dc = cx[xB];
248 |             m[xB] = totalCost;
249 |             cx[xB] = (char)ncx;
250 |             cBprev = cB;
251 |         }
252 |         cAprev = cA;
253 |     }
254 | 
255 |     /* Free the wagner matrix and return the result */
256 |     if (cA == '*') {
257 |         res = m[1];
258 |         for (xB = 1; xB <= nB; xB++) {
259 |             if (m[xB] < res) {
260 |                 res = m[xB];
261 |                 if (pnMatch)
262 |                     *pnMatch = xB + nMatch;
263 |             }
264 |         }
265 |     } else {
266 |         res = m[nB];
267 |         /* In the current implementation, pnMatch is always NULL if zA does
268 |         ** not end in "*" */
269 |         assert(pnMatch == 0);
270 |     }
271 |     free(toFree);
272 |     return res;
273 | }


--------------------------------------------------------------------------------
/src/sqlite3-math.c:
--------------------------------------------------------------------------------
  1 | // Originally from SQLite 3.35.4 source code (func.c), Public Domain
  2 | // Modified by Anton Zhiyanov, https://github.com/nalgeon/sqlean/, MIT License
  3 | 
  4 | /*
  5 |  * SQLite math functions.
  6 |  */
  7 | #include <assert.h>
  8 | #include <math.h>
  9 | 
 10 | #include "sqlite3ext.h"
 11 | SQLITE_EXTENSION_INIT1
 12 | 
 13 | #if defined(HAVE_STDINT_H) /* Use this case if we have ANSI headers */
 14 | #define SQLITE_INT_TO_PTR(X) ((void*)(intptr_t)(X))
 15 | #define SQLITE_PTR_TO_INT(X) ((int)(intptr_t)(X))
 16 | #elif defined(__PTRDIFF_TYPE__) /* This case should work for GCC */
 17 | #define SQLITE_INT_TO_PTR(X) ((void*)(__PTRDIFF_TYPE__)(X))
 18 | #define SQLITE_PTR_TO_INT(X) ((int)(__PTRDIFF_TYPE__)(X))
 19 | #elif !defined(__GNUC__) /* Works for compilers other than LLVM */
 20 | #define SQLITE_INT_TO_PTR(X) ((void*)&((char*)0)[X])
 21 | #define SQLITE_PTR_TO_INT(X) ((int)(((char*)X) - (char*)0))
 22 | #else /* Generates a warning - but it always works */
 23 | #define SQLITE_INT_TO_PTR(X) ((void*)(X))
 24 | #define SQLITE_PTR_TO_INT(X) ((int)(X))
 25 | #endif
 26 | 
 27 | /* Mathematical Constants */
 28 | #ifndef M_PI
 29 | #define M_PI 3.141592653589793238462643383279502884
 30 | #endif
 31 | #ifndef M_LN10
 32 | #define M_LN10 2.302585092994045684017991454684364208
 33 | #endif
 34 | #ifndef M_LN2
 35 | #define M_LN2 0.693147180559945309417232121458176568
 36 | #endif
 37 | 
 38 | /*
 39 | ** Implementation SQL functions:
 40 | **
 41 | **   ceil(X)
 42 | **   ceiling(X)
 43 | **   floor(X)
 44 | **
 45 | ** The sqlite3_user_data() pointer is a pointer to the libm implementation
 46 | ** of the underlying C function.
 47 | */
 48 | static void ceilingFunc(sqlite3_context* context, int argc, sqlite3_value** argv) {
 49 |     assert(argc == 1);
 50 |     switch (sqlite3_value_numeric_type(argv[0])) {
 51 |         case SQLITE_INTEGER: {
 52 |             sqlite3_result_int64(context, sqlite3_value_int64(argv[0]));
 53 |             break;
 54 |         }
 55 |         case SQLITE_FLOAT: {
 56 |             double (*x)(double) = (double (*)(double))sqlite3_user_data(context);
 57 |             sqlite3_result_double(context, x(sqlite3_value_double(argv[0])));
 58 |             break;
 59 |         }
 60 |         default: {
 61 |             break;
 62 |         }
 63 |     }
 64 | }
 65 | 
 66 | /*
 67 | ** On some systems, ceil() and floor() are intrinsic function.  You are
 68 | ** unable to take a pointer to these functions.  Hence, we here wrap them
 69 | ** in our own actual functions.
 70 | */
 71 | static double xCeil(double x) {
 72 |     return ceil(x);
 73 | }
 74 | static double xFloor(double x) {
 75 |     return floor(x);
 76 | }
 77 | 
 78 | /*
 79 | ** Implementation of SQL functions:
 80 | **
 81 | **   ln(X)       - natural logarithm
 82 | **   log(X)      - log X base 10
 83 | **   log10(X)    - log X base 10
 84 | */
 85 | static void log1Func(sqlite3_context* context, int argc, sqlite3_value** argv) {
 86 |     double x, b, ans;
 87 |     assert(argc == 1);
 88 |     switch (sqlite3_value_numeric_type(argv[0])) {
 89 |         case SQLITE_INTEGER:
 90 |         case SQLITE_FLOAT:
 91 |             x = sqlite3_value_double(argv[0]);
 92 |             if (x <= 0.0)
 93 |                 return;
 94 |             break;
 95 |         default:
 96 |             return;
 97 |     }
 98 |     ans = log(x);
 99 |     switch (SQLITE_PTR_TO_INT(sqlite3_user_data(context))) {
100 |         case 1:
101 |             /* Convert from natural logarithm to log base 10 */
102 |             ans *= 1.0 / M_LN10;
103 |             break;
104 |         case 2:
105 |             /* Convert from natural logarithm to log base 2 */
106 |             ans *= 1.0 / M_LN2;
107 |             break;
108 |         default:
109 |             break;
110 |     }
111 |     sqlite3_result_double(context, ans);
112 | }
113 | 
114 | /*
115 | ** Implementation of SQL functions:
116 | **
117 | **   log(B,X)    - log X base B
118 | */
119 | static void log2Func(sqlite3_context* context, int argc, sqlite3_value** argv) {
120 |     double x, b, ans;
121 |     assert(argc == 2);
122 |     switch (sqlite3_value_numeric_type(argv[0])) {
123 |         case SQLITE_INTEGER:
124 |         case SQLITE_FLOAT:
125 |             x = sqlite3_value_double(argv[0]);
126 |             if (x <= 0.0)
127 |                 return;
128 |             break;
129 |         default:
130 |             return;
131 |     }
132 |     switch (sqlite3_value_numeric_type(argv[0])) {
133 |         case SQLITE_INTEGER:
134 |         case SQLITE_FLOAT:
135 |             b = log(x);
136 |             if (b <= 0.0)
137 |                 return;
138 |             x = sqlite3_value_double(argv[1]);
139 |             if (x <= 0.0)
140 |                 return;
141 |             break;
142 |         default:
143 |             return;
144 |     }
145 |     ans = log(x) / b;
146 |     sqlite3_result_double(context, ans);
147 | }
148 | 
149 | /*
150 | ** Functions to converts degrees to radians and radians to degrees.
151 | */
152 | static double degToRad(double x) {
153 |     return x * (M_PI / 180.0);
154 | }
155 | static double radToDeg(double x) {
156 |     return x * (180.0 / M_PI);
157 | }
158 | 
159 | /*
160 | ** Implementation of 1-argument SQL math functions:
161 | **
162 | **   exp(X)  - Compute e to the X-th power
163 | */
164 | static void math1Func(sqlite3_context* context, int argc, sqlite3_value** argv) {
165 |     int type0;
166 |     double v0, ans;
167 |     double (*x)(double);
168 |     assert(argc == 1);
169 |     type0 = sqlite3_value_numeric_type(argv[0]);
170 |     if (type0 != SQLITE_INTEGER && type0 != SQLITE_FLOAT)
171 |         return;
172 |     v0 = sqlite3_value_double(argv[0]);
173 |     x = (double (*)(double))sqlite3_user_data(context);
174 |     ans = x(v0);
175 |     sqlite3_result_double(context, ans);
176 | }
177 | 
178 | /*
179 | ** Implementation of 2-argument SQL math functions:
180 | **
181 | **   power(X,Y)  - Compute X to the Y-th power
182 | */
183 | static void math2Func(sqlite3_context* context, int argc, sqlite3_value** argv) {
184 |     int type0, type1;
185 |     double v0, v1, ans;
186 |     double (*x)(double, double);
187 |     assert(argc == 2);
188 |     type0 = sqlite3_value_numeric_type(argv[0]);
189 |     if (type0 != SQLITE_INTEGER && type0 != SQLITE_FLOAT)
190 |         return;
191 |     type1 = sqlite3_value_numeric_type(argv[1]);
192 |     if (type1 != SQLITE_INTEGER && type1 != SQLITE_FLOAT)
193 |         return;
194 |     v0 = sqlite3_value_double(argv[0]);
195 |     v1 = sqlite3_value_double(argv[1]);
196 |     x = (double (*)(double, double))sqlite3_user_data(context);
197 |     ans = x(v0, v1);
198 |     sqlite3_result_double(context, ans);
199 | }
200 | 
201 | /*
202 | ** Implementation of pi() SQL math function
203 | */
204 | static void piFunc(sqlite3_context* context, int argc, sqlite3_value** argv) {
205 |     assert(argc == 0);
206 |     sqlite3_result_double(context, M_PI);
207 | }
208 | 
209 | /*
210 |  * Registers the extension.
211 |  */
212 | #ifdef _WIN32
213 | __declspec(dllexport)
214 | #endif
215 |     int sqlite3_math_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) {
216 |     static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC;
217 |     SQLITE_EXTENSION_INIT2(pApi);
218 |     sqlite3_create_function(db, "ceil", 1, flags, xCeil, ceilingFunc, 0, 0);
219 |     sqlite3_create_function(db, "ceiling", 1, flags, xCeil, ceilingFunc, 0, 0);
220 |     sqlite3_create_function(db, "floor", 1, flags, xFloor, ceilingFunc, 0, 0);
221 |     sqlite3_create_function(db, "trunc", 1, flags, trunc, ceilingFunc, 0, 0);
222 |     sqlite3_create_function(db, "ln", 1, flags, 0, log1Func, 0, 0);
223 |     sqlite3_create_function(db, "log", 1, flags, (void*)(1), log1Func, 0, 0);
224 |     sqlite3_create_function(db, "log10", 1, flags, (void*)(1), log1Func, 0, 0);
225 |     sqlite3_create_function(db, "log2", 1, flags, (void*)(2), log1Func, 0, 0);
226 |     sqlite3_create_function(db, "log", 2, flags, 0, log2Func, 0, 0);
227 |     sqlite3_create_function(db, "exp", 1, flags, exp, math1Func, 0, 0);
228 |     sqlite3_create_function(db, "pow", 2, flags, pow, math2Func, 0, 0);
229 |     sqlite3_create_function(db, "power", 2, flags, pow, math2Func, 0, 0);
230 |     sqlite3_create_function(db, "mod", 2, flags, fmod, math2Func, 0, 0);
231 |     sqlite3_create_function(db, "acos", 1, flags, acos, math1Func, 0, 0);
232 |     sqlite3_create_function(db, "asin", 1, flags, asin, math1Func, 0, 0);
233 |     sqlite3_create_function(db, "atan", 1, flags, atan, math1Func, 0, 0);
234 |     sqlite3_create_function(db, "atan2", 2, flags, atan2, math2Func, 0, 0);
235 |     sqlite3_create_function(db, "cos", 1, flags, cos, math1Func, 0, 0);
236 |     sqlite3_create_function(db, "sin", 1, flags, sin, math1Func, 0, 0);
237 |     sqlite3_create_function(db, "tan", 1, flags, tan, math1Func, 0, 0);
238 |     sqlite3_create_function(db, "cosh", 1, flags, cosh, math1Func, 0, 0);
239 |     sqlite3_create_function(db, "sinh", 1, flags, sinh, math1Func, 0, 0);
240 |     sqlite3_create_function(db, "tanh", 1, flags, tanh, math1Func, 0, 0);
241 |     sqlite3_create_function(db, "acosh", 1, flags, acosh, math1Func, 0, 0);
242 |     sqlite3_create_function(db, "asinh", 1, flags, asinh, math1Func, 0, 0);
243 |     sqlite3_create_function(db, "atanh", 1, flags, atanh, math1Func, 0, 0);
244 |     sqlite3_create_function(db, "sqrt", 1, flags, sqrt, math1Func, 0, 0);
245 |     sqlite3_create_function(db, "radians", 1, flags, degToRad, math1Func, 0, 0);
246 |     sqlite3_create_function(db, "degrees", 1, flags, radToDeg, math1Func, 0, 0);
247 |     sqlite3_create_function(db, "pi", 0, flags, 0, piFunc, 0, 0);
248 |     return SQLITE_OK;
249 | }


--------------------------------------------------------------------------------
/src/sqlite3-re.c:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021 Anton Zhiyanov, MIT License
  2 | // https://github.com/nalgeon/sqlean
  3 | 
  4 | /*
  5 |  * SQLite extension for working with regular expressions.
  6 |  *
  7 |  * regexp_like(source, pattern)
  8 |  *   - checks if source string matches pattern
  9 |  * regexp_substr(source, pattern)
 10 |  *   - returns source substring matching pattern
 11 |  * regexp_replace(source, pattern, replacement)
 12 |  *   - replaces matching substring with replacement string
 13 |  *
 14 |  *  The following regular expression syntax is supported:
 15 |  *     X*      zero or more occurrences of X
 16 |  *     X+      one or more occurrences of X
 17 |  *     X?      zero or one occurrences of X
 18 |  *     (X)     match X
 19 |  *     X|Y     X or Y
 20 |  *     ^X      X occurring at the beginning of the string
 21 |  *     X$      X occurring at the end of the string
 22 |  *     .       Match any single character
 23 |  *     \c      Character c where c is one of \{}()[]|*+?.
 24 |  *     \c      C-language escapes for c in afnrtv.  ex: \t or \n
 25 |  *     [abc]   Any single character from the set abc
 26 |  *     [^abc]  Any single character not in the set abc
 27 |  *     [a-z]   Any single character in the range a-z
 28 |  *     [^a-z]  Any single character not in the range a-z
 29 |  **
 30 |  */
 31 | #include <assert.h>
 32 | #include <stdio.h>
 33 | #include <stdlib.h>
 34 | #include <string.h>
 35 | 
 36 | #include "re.h"
 37 | #include "sqlite3ext.h"
 38 | SQLITE_EXTENSION_INIT1
 39 | 
 40 | /*
 41 |  * Replaces `rep` substring of the `orig` string with `with` substring.
 42 |  */
 43 | static char* str_replace(char* orig, char* rep, char* with) {
 44 |     char* result;   // the return string
 45 |     char* ins;      // the next insert point
 46 |     char* tmp;      // varies
 47 |     int len_rep;    // length of rep (the string to remove)
 48 |     int len_with;   // length of with (the string to replace rep with)
 49 |     int len_front;  // distance between rep and end of last rep
 50 |     int count;      // number of replacements
 51 | 
 52 |     // sanity checks and initialization
 53 |     if (!orig || !rep)
 54 |         return NULL;
 55 |     len_rep = strlen(rep);
 56 |     if (len_rep == 0)
 57 |         return NULL;  // empty rep causes infinite loop during count
 58 |     if (!with)
 59 |         with = "";
 60 |     len_with = strlen(with);
 61 | 
 62 |     // count the number of replacements needed
 63 |     ins = orig;
 64 |     for (count = 0; (tmp = strstr(ins, rep)); ++count) {
 65 |         ins = tmp + len_rep;
 66 |     }
 67 | 
 68 |     tmp = result = sqlite3_malloc(strlen(orig) + (len_with - len_rep) * count + 1);
 69 | 
 70 |     if (!result)
 71 |         return NULL;
 72 | 
 73 |     // first time through the loop, all the variable are set correctly
 74 |     // from here on,
 75 |     //    tmp points to the end of the result string
 76 |     //    ins points to the next occurrence of rep in orig
 77 |     //    orig points to the remainder of orig after "end of rep"
 78 |     while (count--) {
 79 |         ins = strstr(orig, rep);
 80 |         len_front = ins - orig;
 81 |         tmp = strncpy(tmp, orig, len_front) + len_front;
 82 |         tmp = strcpy(tmp, with) + len_with;
 83 |         orig += len_front + len_rep;  // move to next "end of rep"
 84 |     }
 85 |     strcpy(tmp, orig);
 86 |     return result;
 87 | }
 88 | 
 89 | /*
 90 |  * Checks if source string matches pattern.
 91 |  * regexp_statement(pattern, source)
 92 |  * E.g.:
 93 |  * select true where 'abc' regexp 'a.c';
 94 |  */
 95 | static void regexp_statement(sqlite3_context* context, int argc, sqlite3_value** argv) {
 96 |     regexp* r;
 97 |     const char* source;
 98 |     const char* pattern;
 99 |     int is_match = 0;
100 | 
101 |     assert(argc == 2);
102 | 
103 |     source = (const char*)sqlite3_value_text(argv[1]);
104 | #ifdef DEBUG
105 |     fprintf(stderr, "source = %s\n", source);
106 | #endif
107 |     if (!source) {
108 |         sqlite3_result_int(context, is_match);
109 |         return;
110 |     }
111 | 
112 |     pattern = (const char*)sqlite3_value_text(argv[0]);
113 | #ifdef DEBUG
114 |     fprintf(stderr, "pattern = %s\n", pattern);
115 | #endif
116 |     if (!pattern) {
117 |         sqlite3_result_error(context, "missing regexp pattern", -1);
118 |         return;
119 |     }
120 | 
121 |     r = re_compile(pattern);
122 |     if (r == NULL) {
123 |         sqlite3_result_error(context, "invalid regexp pattern", -1);
124 |         return;
125 |     }
126 | 
127 |     is_match = re_execute(r, source);
128 |     sqlite3_result_int(context, is_match);
129 |     free((char*)r);
130 | }
131 | 
132 | /*
133 |  * Checks if source string matches pattern.
134 |  * regexp_like(source, pattern)
135 |  * E.g.:
136 |  * select regexp_like('abc', 'a.c');
137 |  */
138 | static void regexp_like(sqlite3_context* context, int argc, sqlite3_value** argv) {
139 |     regexp* r;
140 |     const char* source;
141 |     const char* pattern;
142 |     int is_match = 0;
143 | 
144 |     assert(argc == 2);
145 | 
146 |     source = (const char*)sqlite3_value_text(argv[0]);
147 | #ifdef DEBUG
148 |     fprintf(stderr, "source = %s\n", source);
149 | #endif
150 |     if (!source) {
151 |         sqlite3_result_int(context, is_match);
152 |         return;
153 |     }
154 | 
155 |     pattern = (const char*)sqlite3_value_text(argv[1]);
156 | #ifdef DEBUG
157 |     fprintf(stderr, "pattern = %s\n", pattern);
158 | #endif
159 |     if (!pattern) {
160 |         sqlite3_result_error(context, "missing regexp pattern", -1);
161 |         return;
162 |     }
163 | 
164 |     r = re_compile(pattern);
165 |     if (r == NULL) {
166 |         sqlite3_result_error(context, "invalid regexp pattern", -1);
167 |         return;
168 |     }
169 | 
170 |     is_match = re_execute(r, source);
171 |     sqlite3_result_int(context, is_match);
172 |     free((char*)r);
173 | }
174 | 
175 | /*
176 |  * Returns source substring matching pattern.
177 |  * regexp_substr(source, pattern)
178 |  * E.g.: select regexp_substr('abcdef', 'b.d') = 'bcd';
179 |  */
180 | static void regexp_substr(sqlite3_context* context, int argc, sqlite3_value** argv) {
181 |     regexp* r;
182 |     const char* source;
183 |     const char* pattern;
184 |     int is_match = 0;
185 | 
186 |     assert(argc == 2);
187 | 
188 |     source = (const char*)sqlite3_value_text(argv[0]);
189 |     if (!source) {
190 |         return;
191 |     }
192 | 
193 |     pattern = (const char*)sqlite3_value_text(argv[1]);
194 |     if (!pattern) {
195 |         sqlite3_result_error(context, "missing regexp pattern", -1);
196 |         return;
197 |     }
198 | 
199 |     r = re_compile(pattern);
200 |     if (r == NULL) {
201 |         sqlite3_result_error(context, "invalid regexp pattern", -1);
202 |         return;
203 |     }
204 | 
205 |     is_match = re_execute(r, source);
206 |     if (!is_match) {
207 |         return;
208 |     }
209 | 
210 |     int len = r->endp[0] - r->startp[0];
211 |     char* matched_str = sqlite3_malloc(len + 1);
212 |     (void)strncpy(matched_str, r->startp[0], len);
213 |     matched_str[len] = '\0';
214 | #ifdef DEBUG
215 |     fprintf(stderr, "matched_str = '%s'\n", matched_str);
216 | #endif
217 | 
218 |     sqlite3_result_text(context, (char*)matched_str, -1, sqlite3_free);
219 |     free((char*)r);
220 | }
221 | 
222 | /*
223 |  * Returns source substring matching pattern.
224 |  * regexp_replace(source, pattern, replacement)
225 |  * E.g.: select regexp_replace('abcdef', 'b.d', '...') = 'a...ef';
226 |  */
227 | static void regexp_replace(sqlite3_context* context, int argc, sqlite3_value** argv) {
228 |     regexp* r;
229 |     char* source;
230 |     char* pattern;
231 |     char* replacement;
232 |     char* result;
233 | 
234 |     int is_match = 0;
235 | 
236 |     assert(argc == 3);
237 | 
238 |     source = (char*)sqlite3_value_text(argv[0]);
239 |     if (!source) {
240 |         return;
241 |     }
242 | 
243 |     pattern = (char*)sqlite3_value_text(argv[1]);
244 |     if (!pattern) {
245 |         sqlite3_result_error(context, "missing regexp pattern", -1);
246 |         return;
247 |     }
248 | 
249 |     r = re_compile(pattern);
250 |     if (r == NULL) {
251 |         sqlite3_result_error(context, "invalid regexp pattern", -1);
252 |         return;
253 |     }
254 | 
255 |     replacement = (char*)sqlite3_value_text(argv[2]);
256 |     if (!replacement) {
257 |         sqlite3_result_value(context, argv[0]);
258 |         return;
259 |     }
260 | 
261 |     is_match = re_execute(r, source);
262 |     if (!is_match) {
263 |         sqlite3_result_value(context, argv[0]);
264 |         return;
265 |     }
266 | 
267 |     int matched_len = r->endp[0] - r->startp[0];
268 |     char* matched_str = sqlite3_malloc(matched_len + 1);
269 |     (void)strncpy(matched_str, r->startp[0], matched_len);
270 |     matched_str[matched_len] = '\0';
271 | 
272 |     char replacement_str[BUFSIZ];
273 |     int err = re_substitute(r, replacement, replacement_str);
274 |     if (err) {
275 |         sqlite3_result_error(context, "invalid replacement pattern", -1);
276 |         return;
277 |     }
278 | 
279 |     int head_len = r->startp[0] - source;
280 |     char* head_str = sqlite3_malloc(head_len + 1);
281 |     (void)strncpy(head_str, source, head_len);
282 |     head_str[head_len] = '\0';
283 | 
284 |     int tail_len = source + strlen(source) - r->endp[0];
285 |     char* tail_str = sqlite3_malloc(tail_len + 1);
286 |     (void)strncpy(tail_str, r->endp[0], tail_len);
287 |     tail_str[tail_len] = '\0';
288 | 
289 |     int replacement_len = strlen(replacement_str);
290 | 
291 |     int result_len = head_len + replacement_len + tail_len;
292 |     result = sqlite3_malloc(result_len + 1);
293 |     strcat(result, head_str);
294 |     strcat(result, replacement_str);
295 |     strcat(result, tail_str);
296 |     result[result_len] = '\0';
297 | 
298 | #ifdef DEBUG
299 |     fprintf(stderr, "head string (%d) = '%s'\n", head_len, head_str);
300 |     fprintf(stderr, "matched string (%d) = '%s'\n", matched_len, matched_str);
301 |     fprintf(stderr, "repl string (%d) = '%s'\n", replacement_len, replacement_str);
302 |     fprintf(stderr, "tail string (%d) = '%s'\n", tail_len, tail_str);
303 |     fprintf(stderr, "result string (%d) = '%s'\n", result_len, result);
304 |     fprintf(stderr, "replace('%s', '%s', '%s') = '%s'\n", source, matched_str, replacement_str,
305 |             result);
306 | #endif
307 | 
308 |     sqlite3_result_text(context, (char*)result, -1, sqlite3_free);
309 |     sqlite3_free(head_str);
310 |     sqlite3_free(matched_str);
311 |     sqlite3_free(tail_str);
312 |     free((char*)r);
313 | }
314 | 
315 | /*
316 |  * Registers the extension.
317 |  */
318 | #ifdef _WIN32
319 | __declspec(dllexport)
320 | #endif
321 |     int sqlite3_re_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) {
322 |     SQLITE_EXTENSION_INIT2(pApi);
323 |     sqlite3_create_function(db, "regexp", 2, SQLITE_UTF8, 0, regexp_statement, 0, 0);
324 |     sqlite3_create_function(db, "regexp_like", 2, SQLITE_UTF8, 0, regexp_like, 0, 0);
325 |     sqlite3_create_function(db, "regexp_substr", 2, SQLITE_UTF8, 0, regexp_substr, 0, 0);
326 |     sqlite3_create_function(db, "regexp_replace", 3, SQLITE_UTF8, 0, regexp_replace, 0, 0);
327 |     return SQLITE_OK;
328 | }


--------------------------------------------------------------------------------
/src/sqlite3-fuzzy.c:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021 Anton Zhiyanov, MIT License
  2 | // https://github.com/nalgeon/sqlean
  3 | 
  4 | /*
  5 |  * Fuzzy string matching and phonetics.
  6 |  */
  7 | #include <assert.h>
  8 | #include <stdbool.h>
  9 | #include <stdlib.h>
 10 | 
 11 | #include "sqlite3ext.h"
 12 | SQLITE_EXTENSION_INIT1
 13 | 
 14 | #include "fuzzy/fuzzy.h"
 15 | 
 16 | // is_ascii checks if the string consists of ASCII symbols only
 17 | static bool is_ascii(const unsigned char* str) {
 18 |     for (int idx = 0; str[idx]; idx++) {
 19 |         if (str[idx] & 0x80) {
 20 |             return false;
 21 |         }
 22 |     }
 23 |     return true;
 24 | }
 25 | 
 26 | // Below are functions extracted from the
 27 | // https://github.com/Rostepher/libstrcmp/
 28 | 
 29 | // sqlite3_dlevenshtein implements Damerau-Levenshtein distance
 30 | static void sqlite3_dlevenshtein(sqlite3_context* context, int argc, sqlite3_value** argv) {
 31 |     assert(argc == 2);
 32 |     const unsigned char* str1 = sqlite3_value_text(argv[0]);
 33 |     const unsigned char* str2 = sqlite3_value_text(argv[1]);
 34 |     if (str1 == 0 || str2 == 0) {
 35 |         sqlite3_result_error(context, "arguments should not be NULL", -1);
 36 |         return;
 37 |     }
 38 |     if (!is_ascii(str1) || !is_ascii(str2)) {
 39 |         sqlite3_result_error(context, "arguments should be ASCII strings", -1);
 40 |         return;
 41 |     }
 42 |     unsigned distance = damerau_levenshtein((const char*)str1, (const char*)str2);
 43 |     sqlite3_result_int(context, distance);
 44 | }
 45 | 
 46 | // sqlite3_hamming implements Hamming distance
 47 | static void sqlite3_hamming(sqlite3_context* context, int argc, sqlite3_value** argv) {
 48 |     assert(argc == 2);
 49 |     const unsigned char* str1 = sqlite3_value_text(argv[0]);
 50 |     const unsigned char* str2 = sqlite3_value_text(argv[1]);
 51 |     if (str1 == 0 || str2 == 0) {
 52 |         sqlite3_result_error(context, "arguments should not be NULL", -1);
 53 |         return;
 54 |     }
 55 |     if (!is_ascii(str1) || !is_ascii(str2)) {
 56 |         sqlite3_result_error(context, "arguments should be ASCII strings", -1);
 57 |         return;
 58 |     }
 59 |     int distance = hamming((const char*)str1, (const char*)str2);
 60 |     sqlite3_result_int(context, distance);
 61 | }
 62 | 
 63 | // sqlite3_jaro_winkler implements Jaro-Winkler distance
 64 | static void sqlite3_jaro_winkler(sqlite3_context* context, int argc, sqlite3_value** argv) {
 65 |     assert(argc == 2);
 66 |     const unsigned char* str1 = sqlite3_value_text(argv[0]);
 67 |     const unsigned char* str2 = sqlite3_value_text(argv[1]);
 68 |     if (str1 == 0 || str2 == 0) {
 69 |         sqlite3_result_error(context, "arguments should not be NULL", -1);
 70 |         return;
 71 |     }
 72 |     if (!is_ascii(str1) || !is_ascii(str2)) {
 73 |         sqlite3_result_error(context, "arguments should be ASCII strings", -1);
 74 |         return;
 75 |     }
 76 |     double distance = jaro_winkler((const char*)str1, (const char*)str2);
 77 |     sqlite3_result_double(context, distance);
 78 | }
 79 | 
 80 | // sqlite3_levenshtein implements Levenshtein distance
 81 | static void sqlite3_levenshtein(sqlite3_context* context, int argc, sqlite3_value** argv) {
 82 |     assert(argc == 2);
 83 |     const unsigned char* str1 = sqlite3_value_text(argv[0]);
 84 |     const unsigned char* str2 = sqlite3_value_text(argv[1]);
 85 |     if (str1 == 0 || str2 == 0) {
 86 |         sqlite3_result_error(context, "arguments should not be NULL", -1);
 87 |         return;
 88 |     }
 89 |     if (!is_ascii(str1) || !is_ascii(str2)) {
 90 |         sqlite3_result_error(context, "arguments should be ASCII strings", -1);
 91 |         return;
 92 |     }
 93 |     unsigned distance = levenshtein((const char*)str1, (const char*)str2);
 94 |     sqlite3_result_int(context, distance);
 95 | }
 96 | 
 97 | // sqlite3_osa_distance implements Optimal String Alignment distance
 98 | static void sqlite3_osa_distance(sqlite3_context* context, int argc, sqlite3_value** argv) {
 99 |     assert(argc == 2);
100 |     const unsigned char* str1 = sqlite3_value_text(argv[0]);
101 |     const unsigned char* str2 = sqlite3_value_text(argv[1]);
102 |     if (str1 == 0 || str2 == 0) {
103 |         sqlite3_result_error(context, "arguments should not be NULL", -1);
104 |         return;
105 |     }
106 |     if (!is_ascii(str1) || !is_ascii(str2)) {
107 |         sqlite3_result_error(context, "arguments should be ASCII strings", -1);
108 |         return;
109 |     }
110 |     unsigned distance = optimal_string_alignment((const char*)str1, (const char*)str2);
111 |     sqlite3_result_int(context, distance);
112 | }
113 | 
114 | // sqlite3_soundex implements Soundex coding
115 | static void sqlite3_soundex(sqlite3_context* context, int argc, sqlite3_value** argv) {
116 |     assert(argc == 1);
117 |     const unsigned char* source = sqlite3_value_text(argv[0]);
118 |     if (source == 0) {
119 |         return;
120 |     }
121 |     if (!is_ascii(source)) {
122 |         sqlite3_result_error(context, "argument should be ASCII string", -1);
123 |         return;
124 |     }
125 |     char* result = soundex((const char*)source);
126 |     sqlite3_result_text(context, result, -1, free);
127 | }
128 | 
129 | // sqlite3_rsoundex implements Refined Soundex coding
130 | static void sqlite3_rsoundex(sqlite3_context* context, int argc, sqlite3_value** argv) {
131 |     assert(argc == 1);
132 |     const unsigned char* source = sqlite3_value_text(argv[0]);
133 |     if (source == 0) {
134 |         return;
135 |     }
136 |     if (!is_ascii(source)) {
137 |         sqlite3_result_error(context, "argument should be ASCII string", -1);
138 |         return;
139 |     }
140 |     char* result = refined_soundex((const char*)source);
141 |     sqlite3_result_text(context, result, -1, free);
142 | }
143 | 
144 | // Below are functions extracted from the spellfix SQLite exension
145 | // https://www.sqlite.org/src/file/ext/misc/spellfix.c
146 | 
147 | /*
148 | ** phonetic_hash(X)
149 | **
150 | ** Generate a "phonetic hash" from a string of ASCII characters in X.
151 | **
152 | **   * Map characters by character class as defined above.
153 | **   * Omit double-letters
154 | **   * Omit vowels beside R and L
155 | **   * Omit T when followed by CH
156 | **   * Omit W when followed by R
157 | **   * Omit D when followed by J or G
158 | **   * Omit K in KN or G in GN at the beginning of a word
159 | **
160 | ** Space to hold the result is obtained from sqlite3_malloc()
161 | **
162 | ** Return NULL if memory allocation fails.
163 | */
164 | static void sqlite3_phonetic_hash(sqlite3_context* context, int argc, sqlite3_value** argv) {
165 |     const unsigned char* zIn;
166 |     unsigned char* zOut;
167 | 
168 |     zIn = sqlite3_value_text(argv[0]);
169 |     if (zIn == 0)
170 |         return;
171 |     zOut = phonetic_hash(zIn, sqlite3_value_bytes(argv[0]));
172 |     if (zOut == 0) {
173 |         sqlite3_result_error_nomem(context);
174 |     } else {
175 |         sqlite3_result_text(context, (char*)zOut, -1, free);
176 |     }
177 | }
178 | 
179 | /*
180 | ** edit_distance(A,B)
181 | **
182 | ** Return the cost of transforming string A into string B.  Both strings
183 | ** must be pure ASCII text.  If A ends with '*' then it is assumed to be
184 | ** a prefix of B and extra characters on the end of B have minimal additional
185 | ** cost.
186 | */
187 | static void sqlite3_edit_distance(sqlite3_context* context, int argc, sqlite3_value** argv) {
188 |     int res = edit_distance((const char*)sqlite3_value_text(argv[0]),
189 |                             (const char*)sqlite3_value_text(argv[1]), 0);
190 |     if (res < 0) {
191 |         if (res == (-3)) {
192 |             sqlite3_result_error_nomem(context);
193 |         } else if (res == (-2)) {
194 |             sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
195 |         } else {
196 |             sqlite3_result_error(context, "NULL input to editdist()", -1);
197 |         }
198 |     } else {
199 |         sqlite3_result_int(context, res);
200 |     }
201 | }
202 | 
203 | /*
204 | ** translit(X)
205 | **
206 | ** Convert a string that contains non-ASCII Roman characters into
207 | ** pure ASCII.
208 | */
209 | static void sqlite3_transliterate(sqlite3_context* context, int argc, sqlite3_value** argv) {
210 |     const unsigned char* zIn = sqlite3_value_text(argv[0]);
211 |     int nIn = sqlite3_value_bytes(argv[0]);
212 |     unsigned char* zOut = transliterate(zIn, nIn);
213 |     if (zOut == 0) {
214 |         sqlite3_result_error_nomem(context);
215 |     } else {
216 |         sqlite3_result_text(context, (char*)zOut, -1, free);
217 |     }
218 | }
219 | 
220 | /*
221 | ** script_code(X)
222 | **
223 | ** Try to determine the dominant script used by the word X and return
224 | ** its ISO 15924 numeric code.
225 | **
226 | ** The current implementation only understands the following scripts:
227 | **
228 | **    215  (Latin)
229 | **    220  (Cyrillic)
230 | **    200  (Greek)
231 | **
232 | ** This routine will return 998 if the input X contains characters from
233 | ** two or more of the above scripts or 999 if X contains no characters
234 | ** from any of the above scripts.
235 | */
236 | static void sqlite3_script_code(sqlite3_context* context, int argc, sqlite3_value** argv) {
237 |     const unsigned char* zIn = sqlite3_value_text(argv[0]);
238 |     int nIn = sqlite3_value_bytes(argv[0]);
239 |     int res = script_code(zIn, nIn);
240 |     sqlite3_result_int(context, res);
241 | }
242 | 
243 | // Below are custom functions
244 | 
245 | // sqlite3_caverphone implements Caverphone coding
246 | static void sqlite3_caverphone(sqlite3_context* context, int argc, sqlite3_value** argv) {
247 |     assert(argc == 1);
248 |     const unsigned char* source = sqlite3_value_text(argv[0]);
249 |     if (source == 0) {
250 |         return;
251 |     }
252 |     if (!is_ascii(source)) {
253 |         sqlite3_result_error(context, "argument should be ASCII string", -1);
254 |         return;
255 |     }
256 |     char* result = caverphone((const char*)source);
257 |     sqlite3_result_text(context, result, -1, free);
258 | }
259 | 
260 | /*
261 |  * Registers the extension.
262 |  */
263 | #ifdef _WIN32
264 | __declspec(dllexport)
265 | #endif
266 |     int sqlite3_fuzzy_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) {
267 |     SQLITE_EXTENSION_INIT2(pApi);
268 |     static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS | SQLITE_DETERMINISTIC;
269 |     // libstrcmp
270 |     sqlite3_create_function(db, "dlevenshtein", 2, flags, 0, sqlite3_dlevenshtein, 0, 0);
271 |     sqlite3_create_function(db, "hamming", 2, flags, 0, sqlite3_hamming, 0, 0);
272 |     sqlite3_create_function(db, "jaro_winkler", 2, flags, 0, sqlite3_jaro_winkler, 0, 0);
273 |     sqlite3_create_function(db, "levenshtein", 2, flags, 0, sqlite3_levenshtein, 0, 0);
274 |     sqlite3_create_function(db, "osa_distance", 2, flags, 0, sqlite3_osa_distance, 0, 0);
275 |     sqlite3_create_function(db, "soundex", 1, flags, 0, sqlite3_soundex, 0, 0);
276 |     sqlite3_create_function(db, "rsoundex", 1, flags, 0, sqlite3_rsoundex, 0, 0);
277 |     // spellfix
278 |     sqlite3_create_function(db, "edit_distance", 2, flags, 0, sqlite3_edit_distance, 0, 0);
279 |     sqlite3_create_function(db, "phonetic_hash", 1, flags, 0, sqlite3_phonetic_hash, 0, 0);
280 |     sqlite3_create_function(db, "script_code", 1, flags, 0, sqlite3_script_code, 0, 0);
281 |     sqlite3_create_function(db, "translit", 1, flags, 0, sqlite3_transliterate, 0, 0);
282 |     // custom
283 |     sqlite3_create_function(db, "caverphone", 1, flags, 0, sqlite3_caverphone, 0, 0);
284 |     return SQLITE_OK;
285 | }


--------------------------------------------------------------------------------
/src/fuzzy/caverphone.c:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2021 Anton Zhiyanov, MIT License
  2 | // https://github.com/nalgeon/sqlean
  3 | 
  4 | // Caverphone phonetic coding algorithm.
  5 | // https://en.wikipedia.org/wiki/Caverphone
  6 | 
  7 | #include <assert.h>
  8 | #include <stdlib.h>
  9 | #include <string.h>
 10 | 
 11 | // remove_non_letters deletes everything from the source string,
 12 | // except lowercased letters a-z
 13 | static char* remove_non_letters(const char* src) {
 14 |     size_t src_len = strlen(src);
 15 |     char* res = malloc((src_len + 1) * sizeof(char));
 16 |     const char* src_it;
 17 |     char* res_it = res;
 18 |     for (size_t idx = 0; idx < src_len; idx++) {
 19 |         src_it = src + idx;
 20 |         if (*src_it < 97 || *src_it > 122) {
 21 |             continue;
 22 |         }
 23 |         *res_it = *src_it;
 24 |         res_it++;
 25 |     }
 26 |     *res_it = '\0';
 27 |     return res;
 28 | }
 29 | 
 30 | // replace_start replaces the `old` substring with the `new` one
 31 | // if it matches at the beginning of the `src` string
 32 | static char* replace_start(const char* src, const char* old, const char* new) {
 33 |     size_t src_len = strlen(src);
 34 |     size_t old_len = strlen(old);
 35 |     size_t new_len = strlen(new);
 36 |     assert(new_len <= old_len);
 37 | 
 38 |     char* res = malloc((src_len + 1) * sizeof(char));
 39 | 
 40 |     if (src_len < old_len) {
 41 |         // source string is shorter than the substring to replace,
 42 |         // so there is definitely no match
 43 |         strcpy(res, src);
 44 |         return res;
 45 |     }
 46 | 
 47 |     if (strncmp(src, old, old_len) == 0) {
 48 |         strncpy(res, new, new_len);
 49 |         strncpy(res + new_len, src + old_len, src_len - old_len);
 50 |         *(res + src_len - old_len + new_len) = '\0';
 51 |     } else {
 52 |         strcpy(res, src);
 53 |     }
 54 |     return res;
 55 | }
 56 | 
 57 | // replace_end replaces the `old` substring with the `new` one
 58 | // if it matches at the end of the `src` string
 59 | static char* replace_end(const char* src, const char* old, const char* new) {
 60 |     size_t src_len = strlen(src);
 61 |     size_t old_len = strlen(old);
 62 |     size_t new_len = strlen(new);
 63 |     assert(new_len <= old_len);
 64 | 
 65 |     char* res = malloc((src_len + 1) * sizeof(char));
 66 | 
 67 |     if (src_len < old_len) {
 68 |         // source string is shorter than the substring to replace,
 69 |         // so there is definitely no match
 70 |         strcpy(res, src);
 71 |         return res;
 72 |     }
 73 | 
 74 |     strncpy(res, src, src_len - old_len);
 75 |     if (strncmp(src + src_len - old_len, old, old_len) == 0) {
 76 |         strncpy(res + src_len - old_len, new, new_len);
 77 |         *(res + src_len - old_len + new_len) = '\0';
 78 |     } else {
 79 |         strncpy(res + src_len - old_len, src + src_len - old_len, old_len);
 80 |         *(res + src_len) = '\0';
 81 |     }
 82 |     return res;
 83 | }
 84 | 
 85 | // replace replaces all `old` substrings with `new` ones
 86 | // in the the `src` string
 87 | static char* replace(const char* src, const char* old, const char* new) {
 88 |     size_t src_len = strlen(src);
 89 |     size_t old_len = strlen(old);
 90 |     size_t new_len = strlen(new);
 91 |     assert(new_len <= old_len);
 92 | 
 93 |     char* res = malloc((src_len + 1) * sizeof(char));
 94 | 
 95 |     if (src_len < old_len) {
 96 |         // source string is shorter than the substring to replace,
 97 |         // so there is definitely no match
 98 |         strcpy(res, src);
 99 |         return res;
100 |     }
101 | 
102 |     const char* src_it;
103 |     char* res_it = res;
104 |     for (size_t idx = 0; idx < src_len;) {
105 |         src_it = src + idx;
106 |         if (strncmp(src_it, old, old_len) == 0) {
107 |             strncpy(res_it, new, new_len);
108 |             res_it += new_len;
109 |             idx += old_len;
110 |         } else {
111 |             *res_it = *src_it;
112 |             res_it++;
113 |             idx++;
114 |         }
115 |     }
116 |     *res_it = '\0';
117 |     return res;
118 | }
119 | 
120 | // replace_seq replaces all sequences of the `old` character
121 | // with the `new` substring in the the `src` string
122 | static char* replace_seq(const char* src, const char old, const char* new) {
123 |     size_t src_len = strlen(src);
124 |     size_t new_len = strlen(new);
125 |     char* res = malloc((src_len + 1) * sizeof(char));
126 |     const char* src_it;
127 |     char* res_it = res;
128 |     size_t match_len = 0;
129 |     for (size_t idx = 0; idx < src_len;) {
130 |         src_it = src + idx;
131 |         if (*src_it == old) {
132 |             match_len++;
133 |             idx++;
134 |         } else {
135 |             if (match_len > 0) {
136 |                 strncpy(res_it, new, new_len);
137 |                 res_it += new_len;
138 |                 match_len = 0;
139 |             }
140 |             *res_it = *src_it;
141 |             res_it++;
142 |             idx++;
143 |         }
144 |     }
145 |     if (match_len > 0) {
146 |         strncpy(res_it, new, new_len);
147 |         res_it += new_len;
148 |     }
149 |     *res_it = '\0';
150 |     return res;
151 | }
152 | 
153 | // pad pads `src` string with trailing 1s
154 | // up to the length of 10 characters
155 | static char* pad(const char* src) {
156 |     size_t src_len = strlen(src);
157 |     size_t max_len = 10;
158 | 
159 |     char* res = malloc((max_len + 1) * sizeof(char));
160 |     strncpy(res, src, max_len);
161 |     if (src_len < max_len) {
162 |         for (size_t idx = src_len; idx < max_len; idx++) {
163 |             *(res + idx) = '1';
164 |         }
165 |     }
166 |     *(res + max_len) = '\0';
167 |     return res;
168 | }
169 | 
170 | // step frees the source string and returns the result one
171 | static char* step(char* res, char* src) {
172 |     free(src);
173 |     return res;
174 | }
175 | 
176 | // caverphone implements the Caverphone phonetic hashing algorithm
177 | // as described in https://caversham.otago.ac.nz/files/working/ctp150804.pdf
178 | char* caverphone(const char* src) {
179 |     assert(src != NULL);
180 | 
181 |     char* res = malloc((strlen(src) + 1) * sizeof(char));
182 | 
183 |     if (src == 0 || *src == '\0') {
184 |         res[0] = '\0';
185 |         return res;
186 |     }
187 | 
188 |     strcpy(res, src);
189 | 
190 |     // Remove anything not in the standard alphabet
191 |     res = step(remove_non_letters((const char*)res), res);
192 | 
193 |     // Remove final e
194 |     res = step(replace_end((const char*)res, "e", ""), res);
195 | 
196 |     // If the name starts with *gh make it *2f
197 |     res = step(replace_start((const char*)res, "cough", "cou2f"), res);
198 |     res = step(replace_start((const char*)res, "rough", "rou2f"), res);
199 |     res = step(replace_start((const char*)res, "tough", "tou2f"), res);
200 |     res = step(replace_start((const char*)res, "enough", "enou2f"), res);
201 |     res = step(replace_start((const char*)res, "trough", "trou2f"), res);
202 | 
203 |     // If the name starts with gn make it 2n
204 |     res = step(replace_start((const char*)res, "gn", "2n"), res);
205 |     // If the name ends with mb make it m2
206 |     res = step(replace_end((const char*)res, "mb", "m2"), res);
207 |     // replace cq with 2q
208 |     res = step(replace((const char*)res, "cq", "2q"), res);
209 | 
210 |     // replace c[iey] with s[iey]
211 |     res = step(replace((const char*)res, "ci", "si"), res);
212 |     res = step(replace((const char*)res, "ce", "se"), res);
213 |     res = step(replace((const char*)res, "cy", "sy"), res);
214 | 
215 |     // replace tch with 2ch
216 |     res = step(replace((const char*)res, "tch", "2ch"), res);
217 | 
218 |     // replace [cqx] with k
219 |     res = step(replace((const char*)res, "c", "k"), res);
220 |     res = step(replace((const char*)res, "q", "k"), res);
221 |     res = step(replace((const char*)res, "x", "k"), res);
222 | 
223 |     // replace v with f
224 |     res = step(replace((const char*)res, "v", "f"), res);
225 |     // replace dg with 2g
226 |     res = step(replace((const char*)res, "dg", "2g"), res);
227 | 
228 |     // replace ti[oa] with si[oa]
229 |     res = step(replace((const char*)res, "tio", "sio"), res);
230 |     res = step(replace((const char*)res, "tia", "sia"), res);
231 | 
232 |     // replace d with t
233 |     res = step(replace((const char*)res, "d", "t"), res);
234 |     // replace ph with fh
235 |     res = step(replace((const char*)res, "ph", "fh"), res);
236 |     // replace b with p
237 |     res = step(replace((const char*)res, "b", "p"), res);
238 |     // replace sh with s2
239 |     res = step(replace((const char*)res, "sh", "s2"), res);
240 |     // replace z with s
241 |     res = step(replace((const char*)res, "z", "s"), res);
242 | 
243 |     // replace an initial vowel [aeiou] with an A
244 |     res = step(replace_start((const char*)res, "a", "A"), res);
245 |     res = step(replace_start((const char*)res, "e", "A"), res);
246 |     res = step(replace_start((const char*)res, "i", "A"), res);
247 |     res = step(replace_start((const char*)res, "o", "A"), res);
248 |     res = step(replace_start((const char*)res, "u", "A"), res);
249 | 
250 |     // replace all other vowels with a 3
251 |     res = step(replace((const char*)res, "a", "3"), res);
252 |     res = step(replace((const char*)res, "e", "3"), res);
253 |     res = step(replace((const char*)res, "i", "3"), res);
254 |     res = step(replace((const char*)res, "o", "3"), res);
255 |     res = step(replace((const char*)res, "u", "3"), res);
256 | 
257 |     // replace j with y
258 |     res = step(replace((const char*)res, "j", "y"), res);
259 | 
260 |     // replace an initial y3 with Y3
261 |     res = step(replace_start((const char*)res, "y3", "Y3"), res);
262 |     // replace an initial y with A
263 |     res = step(replace_start((const char*)res, "y", "A"), res);
264 |     // replace y with 3
265 |     res = step(replace((const char*)res, "y", "3"), res);
266 | 
267 |     // replace 3gh3 with 3kh3
268 |     res = step(replace((const char*)res, "3gh3", "3kh3"), res);
269 |     // replace gh with 22
270 |     res = step(replace((const char*)res, "gh", "22"), res);
271 |     // replace g with k
272 |     res = step(replace((const char*)res, "g", "k"), res);
273 | 
274 |     // replace sequence of the letter [stpkfmn] with an uppercased letter
275 |     res = step(replace_seq((const char*)res, 's', "S"), res);
276 |     res = step(replace_seq((const char*)res, 't', "T"), res);
277 |     res = step(replace_seq((const char*)res, 'p', "P"), res);
278 |     res = step(replace_seq((const char*)res, 'k', "K"), res);
279 |     res = step(replace_seq((const char*)res, 'f', "F"), res);
280 |     res = step(replace_seq((const char*)res, 'm', "M"), res);
281 |     res = step(replace_seq((const char*)res, 'n', "N"), res);
282 | 
283 |     // replace w3 with W3
284 |     res = step(replace((const char*)res, "w3", "W3"), res);
285 |     // replace wh3 with Wh3
286 |     res = step(replace((const char*)res, "wh3", "Wh3"), res);
287 |     // replace the final w with 3
288 |     res = step(replace_end((const char*)res, "w", "3"), res);
289 |     // replace w with 2
290 |     res = step(replace((const char*)res, "w", "2"), res);
291 | 
292 |     // replace an initial h with an A
293 |     res = step(replace_start((const char*)res, "h", "A"), res);
294 |     // replace all other occurrences of h with a 2
295 |     res = step(replace((const char*)res, "h", "2"), res);
296 | 
297 |     // replace r3 with R3
298 |     res = step(replace((const char*)res, "r3", "R3"), res);
299 |     // replace the final r with 3
300 |     res = step(replace_end((const char*)res, "r", "3"), res);
301 |     // replace r with 2
302 |     res = step(replace((const char*)res, "r", "2"), res);
303 | 
304 |     // replace l3 with L3
305 |     res = step(replace((const char*)res, "l3", "L3"), res);
306 |     // replace the final l with 3
307 |     res = step(replace_end((const char*)res, "l", "3"), res);
308 |     // replace l with 2
309 |     res = step(replace((const char*)res, "l", "2"), res);
310 | 
311 |     // remove all 2s
312 |     res = step(replace((const char*)res, "2", ""), res);
313 |     // replace the final 3 with A
314 |     res = step(replace_end((const char*)res, "3", "A"), res);
315 |     // remove all 3s
316 |     res = step(replace((const char*)res, "3", ""), res);
317 | 
318 |     // put ten 1s on the end
319 |     // take the first ten characters as the code
320 |     res = step(pad((const char*)res), res);
321 | 
322 |     return res;
323 | }
324 | 


--------------------------------------------------------------------------------
/src/sqlite3-stats.c:
--------------------------------------------------------------------------------
  1 | // Standard deviation and variance by Liam Healy, Public Domain
  2 | // extension-functions.c at https://sqlite.org/contrib/
  3 | 
  4 | // Percentile and generate series by D. Richard Hipp, Public Domain
  5 | // https://sqlite.org/src/file/ext/misc/percentile.c
  6 | // https://sqlite.org/src/file/ext/misc/series.c
  7 | 
  8 | // Refactored by Anton Zhiyanov, MIT License
  9 | // https://github.com/nalgeon/sqlean
 10 | 
 11 | // Statistical functions for SQLite.
 12 | 
 13 | #include <assert.h>
 14 | #include <ctype.h>
 15 | #include <errno.h>
 16 | #include <math.h>
 17 | #include <stdint.h>
 18 | #include <stdio.h>
 19 | #include <stdlib.h>
 20 | #include <string.h>
 21 | 
 22 | #include "sqlite3ext.h"
 23 | SQLITE_EXTENSION_INIT1
 24 | 
 25 | #pragma region Standard deviation and variance
 26 | 
 27 | /*
 28 | ** An instance of the following structure holds the context of a
 29 | ** stddev() or variance() aggregate computation.
 30 | ** implementaion of http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Algorithm_II
 31 | ** less prone to rounding errors
 32 | */
 33 | typedef struct StddevCtx StddevCtx;
 34 | struct StddevCtx {
 35 |     double rM;
 36 |     double rS;
 37 |     int64_t cnt; /* number of elements */
 38 | };
 39 | 
 40 | /*
 41 | ** called for each value received during a calculation of stddev or variance
 42 | */
 43 | static void varianceStep(sqlite3_context* context, int argc, sqlite3_value** argv) {
 44 |     StddevCtx* p;
 45 | 
 46 |     double delta;
 47 |     double x;
 48 | 
 49 |     assert(argc == 1);
 50 |     p = sqlite3_aggregate_context(context, sizeof(*p));
 51 |     /* only consider non-null values */
 52 |     if (SQLITE_NULL != sqlite3_value_numeric_type(argv[0])) {
 53 |         p->cnt++;
 54 |         x = sqlite3_value_double(argv[0]);
 55 |         delta = (x - p->rM);
 56 |         p->rM += delta / p->cnt;
 57 |         p->rS += delta * (x - p->rM);
 58 |     }
 59 | }
 60 | 
 61 | /*
 62 | ** Returns the sample standard deviation value
 63 | */
 64 | static void stddevFinalize(sqlite3_context* context) {
 65 |     StddevCtx* p;
 66 |     p = sqlite3_aggregate_context(context, 0);
 67 |     if (p && p->cnt > 1) {
 68 |         sqlite3_result_double(context, sqrt(p->rS / (p->cnt - 1)));
 69 |     } else {
 70 |         sqlite3_result_double(context, 0.0);
 71 |     }
 72 | }
 73 | 
 74 | /*
 75 | ** Returns the population standard deviation value
 76 | */
 77 | static void stddevpopFinalize(sqlite3_context* context) {
 78 |     StddevCtx* p;
 79 |     p = sqlite3_aggregate_context(context, 0);
 80 |     if (p && p->cnt > 1) {
 81 |         sqlite3_result_double(context, sqrt(p->rS / p->cnt));
 82 |     } else {
 83 |         sqlite3_result_double(context, 0.0);
 84 |     }
 85 | }
 86 | 
 87 | /*
 88 | ** Returns the sample variance value
 89 | */
 90 | static void varianceFinalize(sqlite3_context* context) {
 91 |     StddevCtx* p;
 92 |     p = sqlite3_aggregate_context(context, 0);
 93 |     if (p && p->cnt > 1) {
 94 |         sqlite3_result_double(context, p->rS / (p->cnt - 1));
 95 |     } else {
 96 |         sqlite3_result_double(context, 0.0);
 97 |     }
 98 | }
 99 | 
100 | /*
101 | ** Returns the population variance value
102 | */
103 | static void variancepopFinalize(sqlite3_context* context) {
104 |     StddevCtx* p;
105 |     p = sqlite3_aggregate_context(context, 0);
106 |     if (p && p->cnt > 1) {
107 |         sqlite3_result_double(context, p->rS / p->cnt);
108 |     } else {
109 |         sqlite3_result_double(context, 0.0);
110 |     }
111 | }
112 | 
113 | #pragma endregion
114 | 
115 | #pragma region Percentile
116 | 
117 | /* The following object is the session context for a single percentile()
118 | ** function.  We have to remember all input Y values until the very end.
119 | ** Those values are accumulated in the Percentile.a[] array.
120 | */
121 | typedef struct Percentile Percentile;
122 | struct Percentile {
123 |     unsigned nAlloc; /* Number of slots allocated for a[] */
124 |     unsigned nUsed;  /* Number of slots actually used in a[] */
125 |     double rPct;     /* 1.0 more than the value for P */
126 |     double* a;       /* Array of Y values */
127 | };
128 | 
129 | /*
130 | ** Return TRUE if the input floating-point number is an infinity.
131 | */
132 | static int isInfinity(double r) {
133 |     sqlite3_uint64 u;
134 |     assert(sizeof(u) == sizeof(r));
135 |     memcpy(&u, &r, sizeof(u));
136 |     return ((u >> 52) & 0x7ff) == 0x7ff;
137 | }
138 | 
139 | /*
140 | ** Return TRUE if two doubles differ by 0.001 or less
141 | */
142 | static int sameValue(double a, double b) {
143 |     a -= b;
144 |     return a >= -0.001 && a <= 0.001;
145 | }
146 | 
147 | /*
148 | ** The "step" function for percentile(Y,P) is called once for each
149 | ** input row.
150 | */
151 | static void percentStep(sqlite3_context* pCtx, double rPct, int argc, sqlite3_value** argv) {
152 |     Percentile* p;
153 |     int eType;
154 |     double y;
155 | 
156 |     /* Allocate the session context. */
157 |     p = (Percentile*)sqlite3_aggregate_context(pCtx, sizeof(*p));
158 |     if (p == 0)
159 |         return;
160 | 
161 |     /* Remember the P value.  Throw an error if the P value is different
162 |     ** from any prior row, per Requirement (2). */
163 |     if (p->rPct == 0.0) {
164 |         p->rPct = rPct + 1.0;
165 |     } else if (!sameValue(p->rPct, rPct + 1.0)) {
166 |         sqlite3_result_error(pCtx,
167 |                              "2nd argument to percentile() is not the "
168 |                              "same for all input rows",
169 |                              -1);
170 |         return;
171 |     }
172 | 
173 |     /* Ignore rows for which Y is NULL */
174 |     eType = sqlite3_value_type(argv[0]);
175 |     if (eType == SQLITE_NULL)
176 |         return;
177 | 
178 |     /* If not NULL, then Y must be numeric.  Otherwise throw an error.
179 |     ** Requirement 4 */
180 |     if (eType != SQLITE_INTEGER && eType != SQLITE_FLOAT) {
181 |         sqlite3_result_error(pCtx,
182 |                              "1st argument to percentile() is not "
183 |                              "numeric",
184 |                              -1);
185 |         return;
186 |     }
187 | 
188 |     /* Throw an error if the Y value is infinity or NaN */
189 |     y = sqlite3_value_double(argv[0]);
190 |     if (isInfinity(y)) {
191 |         sqlite3_result_error(pCtx, "Inf input to percentile()", -1);
192 |         return;
193 |     }
194 | 
195 |     /* Allocate and store the Y */
196 |     if (p->nUsed >= p->nAlloc) {
197 |         unsigned n = p->nAlloc * 2 + 250;
198 |         double* a = sqlite3_realloc64(p->a, sizeof(double) * n);
199 |         if (a == 0) {
200 |             sqlite3_free(p->a);
201 |             memset(p, 0, sizeof(*p));
202 |             sqlite3_result_error_nomem(pCtx);
203 |             return;
204 |         }
205 |         p->nAlloc = n;
206 |         p->a = a;
207 |     }
208 |     p->a[p->nUsed++] = y;
209 | }
210 | 
211 | static void percentStepCustom(sqlite3_context* pCtx, int argc, sqlite3_value** argv) {
212 |     assert(argc == 2);
213 |     /* Requirement 3:  P must be a number between 0 and 100 */
214 |     int eType = sqlite3_value_numeric_type(argv[1]);
215 |     double rPct = sqlite3_value_double(argv[1]);
216 |     if ((eType != SQLITE_INTEGER && eType != SQLITE_FLOAT) || rPct < 0.0 || rPct > 100.0) {
217 |         sqlite3_result_error(pCtx,
218 |                              "2nd argument to percentile() should be "
219 |                              "a number between 0.0 and 100.0",
220 |                              -1);
221 |         return;
222 |     }
223 |     percentStep(pCtx, rPct, argc, argv);
224 | }
225 | 
226 | static void percentStep25(sqlite3_context* pCtx, int argc, sqlite3_value** argv) {
227 |     assert(argc == 1);
228 |     percentStep(pCtx, 25, argc, argv);
229 | }
230 | 
231 | static void percentStep50(sqlite3_context* pCtx, int argc, sqlite3_value** argv) {
232 |     assert(argc == 1);
233 |     percentStep(pCtx, 50, argc, argv);
234 | }
235 | 
236 | static void percentStep75(sqlite3_context* pCtx, int argc, sqlite3_value** argv) {
237 |     assert(argc == 1);
238 |     percentStep(pCtx, 75, argc, argv);
239 | }
240 | 
241 | static void percentStep90(sqlite3_context* pCtx, int argc, sqlite3_value** argv) {
242 |     assert(argc == 1);
243 |     percentStep(pCtx, 90, argc, argv);
244 | }
245 | 
246 | static void percentStep95(sqlite3_context* pCtx, int argc, sqlite3_value** argv) {
247 |     assert(argc == 1);
248 |     percentStep(pCtx, 95, argc, argv);
249 | }
250 | 
251 | static void percentStep99(sqlite3_context* pCtx, int argc, sqlite3_value** argv) {
252 |     assert(argc == 1);
253 |     percentStep(pCtx, 99, argc, argv);
254 | }
255 | 
256 | /*
257 | ** Compare to doubles for sorting using qsort()
258 | */
259 | static int SQLITE_CDECL doubleCmp(const void* pA, const void* pB) {
260 |     double a = *(double*)pA;
261 |     double b = *(double*)pB;
262 |     if (a == b)
263 |         return 0;
264 |     if (a < b)
265 |         return -1;
266 |     return +1;
267 | }
268 | 
269 | /*
270 | ** Called to compute the final output of percentile() and to clean
271 | ** up all allocated memory.
272 | */
273 | static void percentFinal(sqlite3_context* pCtx) {
274 |     Percentile* p;
275 |     unsigned i1, i2;
276 |     double v1, v2;
277 |     double ix, vx;
278 |     p = (Percentile*)sqlite3_aggregate_context(pCtx, 0);
279 |     if (p == 0)
280 |         return;
281 |     if (p->a == 0)
282 |         return;
283 |     if (p->nUsed) {
284 |         qsort(p->a, p->nUsed, sizeof(double), doubleCmp);
285 |         ix = (p->rPct - 1.0) * (p->nUsed - 1) * 0.01;
286 |         i1 = (unsigned)ix;
287 |         i2 = ix == (double)i1 || i1 == p->nUsed - 1 ? i1 : i1 + 1;
288 |         v1 = p->a[i1];
289 |         v2 = p->a[i2];
290 |         vx = v1 + (v2 - v1) * (ix - i1);
291 |         sqlite3_result_double(pCtx, vx);
292 |     }
293 |     sqlite3_free(p->a);
294 |     memset(p, 0, sizeof(*p));
295 | }
296 | 
297 | #pragma endregion
298 | 
299 | #pragma region Generate series
300 | 
301 | #ifndef SQLITE_OMIT_VIRTUALTABLE
302 | 
303 | /* series_cursor is a subclass of sqlite3_vtab_cursor which will
304 | ** serve as the underlying representation of a cursor that scans
305 | ** over rows of the result
306 | */
307 | typedef struct series_cursor series_cursor;
308 | struct series_cursor {
309 |     sqlite3_vtab_cursor base; /* Base class - must be first */
310 |     int isDesc;               /* True to count down rather than up */
311 |     sqlite3_int64 iRowid;     /* The rowid */
312 |     sqlite3_int64 iValue;     /* Current value ("value") */
313 |     sqlite3_int64 mnValue;    /* Mimimum value ("start") */
314 |     sqlite3_int64 mxValue;    /* Maximum value ("stop") */
315 |     sqlite3_int64 iStep;      /* Increment ("step") */
316 | };
317 | 
318 | /*
319 | ** The seriesConnect() method is invoked to create a new
320 | ** series_vtab that describes the generate_series virtual table.
321 | **
322 | ** Think of this routine as the constructor for series_vtab objects.
323 | **
324 | ** All this routine needs to do is:
325 | **
326 | **    (1) Allocate the series_vtab object and initialize all fields.
327 | **
328 | **    (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the
329 | **        result set of queries against generate_series will look like.
330 | */
331 | static int seriesConnect(sqlite3* db,
332 |                          void* pUnused,
333 |                          int argcUnused,
334 |                          const char* const* argvUnused,
335 |                          sqlite3_vtab** ppVtab,
336 |                          char** pzErrUnused) {
337 |     sqlite3_vtab* pNew;
338 |     int rc;
339 | 
340 | /* Column numbers */
341 | #define SERIES_COLUMN_VALUE 0
342 | #define SERIES_COLUMN_START 1
343 | #define SERIES_COLUMN_STOP 2
344 | #define SERIES_COLUMN_STEP 3
345 | 
346 |     (void)pUnused;
347 |     (void)argcUnused;
348 |     (void)argvUnused;
349 |     (void)pzErrUnused;
350 |     rc = sqlite3_declare_vtab(db, "CREATE TABLE x(value,start hidden,stop hidden,step hidden)");
351 |     if (rc == SQLITE_OK) {
352 |         pNew = *ppVtab = sqlite3_malloc(sizeof(*pNew));
353 |         if (pNew == 0)
354 |             return SQLITE_NOMEM;
355 |         memset(pNew, 0, sizeof(*pNew));
356 |         sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
357 |     }
358 |     return rc;
359 | }
360 | 
361 | /*
362 | ** This method is the destructor for series_cursor objects.
363 | */
364 | static int seriesDisconnect(sqlite3_vtab* pVtab) {
365 |     sqlite3_free(pVtab);
366 |     return SQLITE_OK;
367 | }
368 | 
369 | /*
370 | ** Constructor for a new series_cursor object.
371 | */
372 | static int seriesOpen(sqlite3_vtab* pUnused, sqlite3_vtab_cursor** ppCursor) {
373 |     series_cursor* pCur;
374 |     (void)pUnused;
375 |     pCur = sqlite3_malloc(sizeof(*pCur));
376 |     if (pCur == 0)
377 |         return SQLITE_NOMEM;
378 |     memset(pCur, 0, sizeof(*pCur));
379 |     *ppCursor = &pCur->base;
380 |     return SQLITE_OK;
381 | }
382 | 
383 | /*
384 | ** Destructor for a series_cursor.
385 | */
386 | static int seriesClose(sqlite3_vtab_cursor* cur) {
387 |     sqlite3_free(cur);
388 |     return SQLITE_OK;
389 | }
390 | 
391 | /*
392 | ** Advance a series_cursor to its next row of output.
393 | */
394 | static int seriesNext(sqlite3_vtab_cursor* cur) {
395 |     series_cursor* pCur = (series_cursor*)cur;
396 |     if (pCur->isDesc) {
397 |         pCur->iValue -= pCur->iStep;
398 |     } else {
399 |         pCur->iValue += pCur->iStep;
400 |     }
401 |     pCur->iRowid++;
402 |     return SQLITE_OK;
403 | }
404 | 
405 | /*
406 | ** Return values of columns for the row at which the series_cursor
407 | ** is currently pointing.
408 | */
409 | static int seriesColumn(sqlite3_vtab_cursor* cur, /* The cursor */
410 |                         sqlite3_context* ctx,     /* First argument to sqlite3_result_...() */
411 |                         int i                     /* Which column to return */
412 | ) {
413 |     series_cursor* pCur = (series_cursor*)cur;
414 |     sqlite3_int64 x = 0;
415 |     switch (i) {
416 |         case SERIES_COLUMN_START:
417 |             x = pCur->mnValue;
418 |             break;
419 |         case SERIES_COLUMN_STOP:
420 |             x = pCur->mxValue;
421 |             break;
422 |         case SERIES_COLUMN_STEP:
423 |             x = pCur->iStep;
424 |             break;
425 |         default:
426 |             x = pCur->iValue;
427 |             break;
428 |     }
429 |     sqlite3_result_int64(ctx, x);
430 |     return SQLITE_OK;
431 | }
432 | 
433 | /*
434 | ** Return the rowid for the current row. In this implementation, the
435 | ** first row returned is assigned rowid value 1, and each subsequent
436 | ** row a value 1 more than that of the previous.
437 | */
438 | static int seriesRowid(sqlite3_vtab_cursor* cur, sqlite_int64* pRowid) {
439 |     series_cursor* pCur = (series_cursor*)cur;
440 |     *pRowid = pCur->iRowid;
441 |     return SQLITE_OK;
442 | }
443 | 
444 | /*
445 | ** Return TRUE if the cursor has been moved off of the last
446 | ** row of output.
447 | */
448 | static int seriesEof(sqlite3_vtab_cursor* cur) {
449 |     series_cursor* pCur = (series_cursor*)cur;
450 |     if (pCur->isDesc) {
451 |         return pCur->iValue < pCur->mnValue;
452 |     } else {
453 |         return pCur->iValue > pCur->mxValue;
454 |     }
455 | }
456 | 
457 | /* True to cause run-time checking of the start=, stop=, and/or step=
458 | ** parameters.  The only reason to do this is for testing the
459 | ** constraint checking logic for virtual tables in the SQLite core.
460 | */
461 | #ifndef SQLITE_SERIES_CONSTRAINT_VERIFY
462 | #define SQLITE_SERIES_CONSTRAINT_VERIFY 0
463 | #endif
464 | 
465 | /*
466 | ** This method is called to "rewind" the series_cursor object back
467 | ** to the first row of output.  This method is always called at least
468 | ** once prior to any call to seriesColumn() or seriesRowid() or
469 | ** seriesEof().
470 | **
471 | ** The query plan selected by seriesBestIndex is passed in the idxNum
472 | ** parameter.  (idxStr is not used in this implementation.)  idxNum
473 | ** is a bitmask showing which constraints are available:
474 | **
475 | **    1:    start=VALUE
476 | **    2:    stop=VALUE
477 | **    4:    step=VALUE
478 | **
479 | ** Also, if bit 8 is set, that means that the series should be output
480 | ** in descending order rather than in ascending order.  If bit 16 is
481 | ** set, then output must appear in ascending order.
482 | **
483 | ** This routine should initialize the cursor and position it so that it
484 | ** is pointing at the first row, or pointing off the end of the table
485 | ** (so that seriesEof() will return true) if the table is empty.
486 | */
487 | static int seriesFilter(sqlite3_vtab_cursor* pVtabCursor,
488 |                         int idxNum,
489 |                         const char* idxStrUnused,
490 |                         int argc,
491 |                         sqlite3_value** argv) {
492 |     series_cursor* pCur = (series_cursor*)pVtabCursor;
493 |     int i = 0;
494 |     (void)idxStrUnused;
495 |     if (idxNum & 1) {
496 |         pCur->mnValue = sqlite3_value_int64(argv[i++]);
497 |     } else {
498 |         pCur->mnValue = 0;
499 |     }
500 |     if (idxNum & 2) {
501 |         pCur->mxValue = sqlite3_value_int64(argv[i++]);
502 |     } else {
503 |         pCur->mxValue = 0xffffffff;
504 |     }
505 |     if (idxNum & 4) {
506 |         pCur->iStep = sqlite3_value_int64(argv[i++]);
507 |         if (pCur->iStep == 0) {
508 |             pCur->iStep = 1;
509 |         } else if (pCur->iStep < 0) {
510 |             pCur->iStep = -pCur->iStep;
511 |             if ((idxNum & 16) == 0)
512 |                 idxNum |= 8;
513 |         }
514 |     } else {
515 |         pCur->iStep = 1;
516 |     }
517 |     for (i = 0; i < argc; i++) {
518 |         if (sqlite3_value_type(argv[i]) == SQLITE_NULL) {
519 |             /* If any of the constraints have a NULL value, then return no rows.
520 |             ** See ticket https://www.sqlite.org/src/info/fac496b61722daf2 */
521 |             pCur->mnValue = 1;
522 |             pCur->mxValue = 0;
523 |             break;
524 |         }
525 |     }
526 |     if (idxNum & 8) {
527 |         pCur->isDesc = 1;
528 |         pCur->iValue = pCur->mxValue;
529 |         if (pCur->iStep > 0) {
530 |             pCur->iValue -= (pCur->mxValue - pCur->mnValue) % pCur->iStep;
531 |         }
532 |     } else {
533 |         pCur->isDesc = 0;
534 |         pCur->iValue = pCur->mnValue;
535 |     }
536 |     pCur->iRowid = 1;
537 |     return SQLITE_OK;
538 | }
539 | 
540 | /*
541 | ** SQLite will invoke this method one or more times while planning a query
542 | ** that uses the generate_series virtual table.  This routine needs to create
543 | ** a query plan for each invocation and compute an estimated cost for that
544 | ** plan.
545 | **
546 | ** In this implementation idxNum is used to represent the
547 | ** query plan.  idxStr is unused.
548 | **
549 | ** The query plan is represented by bits in idxNum:
550 | **
551 | **  (1)  start = $value  -- constraint exists
552 | **  (2)  stop = $value   -- constraint exists
553 | **  (4)  step = $value   -- constraint exists
554 | **  (8)  output in descending order
555 | */
556 | static int seriesBestIndex(sqlite3_vtab* pVTab, sqlite3_index_info* pIdxInfo) {
557 |     int i, j;             /* Loop over constraints */
558 |     int idxNum = 0;       /* The query plan bitmask */
559 |     int bStartSeen = 0;   /* EQ constraint seen on the START column */
560 |     int unusableMask = 0; /* Mask of unusable constraints */
561 |     int nArg = 0;         /* Number of arguments that seriesFilter() expects */
562 |     int aIdx[3];          /* Constraints on start, stop, and step */
563 |     const struct sqlite3_index_constraint* pConstraint;
564 | 
565 |     /* This implementation assumes that the start, stop, and step columns
566 |     ** are the last three columns in the virtual table. */
567 |     assert(SERIES_COLUMN_STOP == SERIES_COLUMN_START + 1);
568 |     assert(SERIES_COLUMN_STEP == SERIES_COLUMN_START + 2);
569 | 
570 |     aIdx[0] = aIdx[1] = aIdx[2] = -1;
571 |     pConstraint = pIdxInfo->aConstraint;
572 |     for (i = 0; i < pIdxInfo->nConstraint; i++, pConstraint++) {
573 |         int iCol;  /* 0 for start, 1 for stop, 2 for step */
574 |         int iMask; /* bitmask for those column */
575 |         if (pConstraint->iColumn < SERIES_COLUMN_START)
576 |             continue;
577 |         iCol = pConstraint->iColumn - SERIES_COLUMN_START;
578 |         assert(iCol >= 0 && iCol <= 2);
579 |         iMask = 1 << iCol;
580 |         if (iCol == 0)
581 |             bStartSeen = 1;
582 |         if (pConstraint->usable == 0) {
583 |             unusableMask |= iMask;
584 |             continue;
585 |         } else if (pConstraint->op == SQLITE_INDEX_CONSTRAINT_EQ) {
586 |             idxNum |= iMask;
587 |             aIdx[iCol] = i;
588 |         }
589 |     }
590 |     for (i = 0; i < 3; i++) {
591 |         if ((j = aIdx[i]) >= 0) {
592 |             pIdxInfo->aConstraintUsage[j].argvIndex = ++nArg;
593 |             pIdxInfo->aConstraintUsage[j].omit = !SQLITE_SERIES_CONSTRAINT_VERIFY;
594 |         }
595 |     }
596 |     /* The current generate_column() implementation requires at least one
597 |     ** argument (the START value).  Legacy versions assumed START=0 if the
598 |     ** first argument was omitted.  Compile with -DZERO_ARGUMENT_GENERATE_SERIES
599 |     ** to obtain the legacy behavior */
600 | #ifndef ZERO_ARGUMENT_GENERATE_SERIES
601 |     if (!bStartSeen) {
602 |         sqlite3_free(pVTab->zErrMsg);
603 |         pVTab->zErrMsg =
604 |             sqlite3_mprintf("first argument to \"generate_series()\" missing or unusable");
605 |         return SQLITE_ERROR;
606 |     }
607 | #endif
608 |     if ((unusableMask & ~idxNum) != 0) {
609 |         /* The start, stop, and step columns are inputs.  Therefore if there
610 |         ** are unusable constraints on any of start, stop, or step then
611 |         ** this plan is unusable */
612 |         return SQLITE_CONSTRAINT;
613 |     }
614 |     if ((idxNum & 3) == 3) {
615 |         /* Both start= and stop= boundaries are available.  This is the
616 |         ** the preferred case */
617 |         pIdxInfo->estimatedCost = (double)(2 - ((idxNum & 4) != 0));
618 |         pIdxInfo->estimatedRows = 1000;
619 |         if (pIdxInfo->nOrderBy == 1) {
620 |             if (pIdxInfo->aOrderBy[0].desc) {
621 |                 idxNum |= 8;
622 |             } else {
623 |                 idxNum |= 16;
624 |             }
625 |             pIdxInfo->orderByConsumed = 1;
626 |         }
627 |     } else {
628 |         /* If either boundary is missing, we have to generate a huge span
629 |         ** of numbers.  Make this case very expensive so that the query
630 |         ** planner will work hard to avoid it. */
631 |         pIdxInfo->estimatedRows = 2147483647;
632 |     }
633 |     pIdxInfo->idxNum = idxNum;
634 |     return SQLITE_OK;
635 | }
636 | 
637 | /*
638 | ** This following structure defines all the methods for the
639 | ** generate_series virtual table.
640 | */
641 | static sqlite3_module seriesModule = {
642 |     0,                /* iVersion */
643 |     0,                /* xCreate */
644 |     seriesConnect,    /* xConnect */
645 |     seriesBestIndex,  /* xBestIndex */
646 |     seriesDisconnect, /* xDisconnect */
647 |     0,                /* xDestroy */
648 |     seriesOpen,       /* xOpen - open a cursor */
649 |     seriesClose,      /* xClose - close a cursor */
650 |     seriesFilter,     /* xFilter - configure scan constraints */
651 |     seriesNext,       /* xNext - advance a cursor */
652 |     seriesEof,        /* xEof - check for end of scan */
653 |     seriesColumn,     /* xColumn - read data */
654 |     seriesRowid,      /* xRowid - read data */
655 |     0,                /* xUpdate */
656 |     0,                /* xBegin */
657 |     0,                /* xSync */
658 |     0,                /* xCommit */
659 |     0,                /* xRollback */
660 |     0,                /* xFindMethod */
661 |     0,                /* xRename */
662 |     0,                /* xSavepoint */
663 |     0,                /* xRelease */
664 |     0,                /* xRollbackTo */
665 |     0                 /* xShadowName */
666 | };
667 | 
668 | #endif /* SQLITE_OMIT_VIRTUALTABLE */
669 | 
670 | #pragma endregion
671 | 
672 | /*
673 |  * Registers the extension.
674 |  */
675 | #ifdef _WIN32
676 | __declspec(dllexport)
677 | #endif
678 |     int sqlite3_stats_init(sqlite3* db, char** pzErrMsg, const sqlite3_api_routines* pApi) {
679 |     SQLITE_EXTENSION_INIT2(pApi);
680 |     static const int flags = SQLITE_UTF8 | SQLITE_INNOCUOUS;
681 |     sqlite3_create_function(db, "stddev", 1, flags, 0, 0, varianceStep, stddevFinalize);
682 |     sqlite3_create_function(db, "stddev_samp", 1, flags, 0, 0, varianceStep, stddevFinalize);
683 |     sqlite3_create_function(db, "stddev_pop", 1, flags, 0, 0, varianceStep, stddevpopFinalize);
684 |     sqlite3_create_function(db, "variance", 1, flags, 0, 0, varianceStep, varianceFinalize);
685 |     sqlite3_create_function(db, "var_samp", 1, flags, 0, 0, varianceStep, varianceFinalize);
686 |     sqlite3_create_function(db, "var_pop", 1, flags, 0, 0, varianceStep, variancepopFinalize);
687 |     sqlite3_create_function(db, "median", 1, flags, 0, 0, percentStep50, percentFinal);
688 |     sqlite3_create_function(db, "percentile", 2, flags, 0, 0, percentStepCustom, percentFinal);
689 |     sqlite3_create_function(db, "percentile_25", 1, flags, 0, 0, percentStep25, percentFinal);
690 |     sqlite3_create_function(db, "percentile_75", 1, flags, 0, 0, percentStep75, percentFinal);
691 |     sqlite3_create_function(db, "percentile_90", 1, flags, 0, 0, percentStep90, percentFinal);
692 |     sqlite3_create_function(db, "percentile_95", 1, flags, 0, 0, percentStep95, percentFinal);
693 |     sqlite3_create_function(db, "percentile_99", 1, flags, 0, 0, percentStep99, percentFinal);
694 | #ifndef SQLITE_OMIT_VIRTUALTABLE
695 |     if (sqlite3_libversion_number() >= 3008012) {
696 |         sqlite3_create_module(db, "generate_series", &seriesModule, 0);
697 |     }
698 | #endif
699 |     return SQLITE_OK;
700 | }


--------------------------------------------------------------------------------